summaryrefslogtreecommitdiffstats
path: root/src/corelib/text
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@kdab.com>2020-05-20 15:20:07 +0200
committerLars Knoll <lars.knoll@qt.io>2020-08-31 22:57:49 +0200
commit2c9529e158fc589c48e6b1fb61dca2133e33ac4d (patch)
treee9429715ab339d95c2ba9456bf39a894e8b41539 /src/corelib/text
parentf0ae973244026ca5382f05630bd799b44154d224 (diff)
Long live Q{Any,Utf8}StringView!
We need to add these two classes at the same time, because QAnyStringView makes all QUtf8StringView relational operators moot. We might want to add some later, esp. for UTF-8/UTf-8 comparisons, to avoid the pessimization that we can't early-out on size() mismatch in QAnyStringView equality operators, but that's an optimization, not a correctness issue, and can be fixed in a source-compatible way even after Qt 6 is released. To deal with the char8_t problem in C++20, make QUtf8StringView a class template out of which two UTF-8 views can be instantiated: the Qt 7 version, which depends on C++20 char8_t as value_type, and the Qt 6 version where value_type is a char. Use inline namespaces to map the QUtf8StringView identifier to one or the other, depending on the C++ version used to compile the user code. The inline namespace names must needs be a bit ugly, as their inline'ness depends on __cpp_char8_t. If we simply used q_v1/q_v2 we'd be blocking these names for Qt inline namespaces forever, because it's likely that inline'ness of other users of inline namespaces in Qt depends on things other than __cpp_char8_t. While inline'ness of namespaces is, theoretically speaking, a compile-time-only property, at least Clang warns about mixed use of inline on a given namespace, so we need to bite the bullet here. This is also the reason for the QT_BEGIN_..._NAMESPACE macros: GCC is ok with the first declaration making a namespace inline, while Clang warns upon re-opening an inline namespace as a non-inline one. [ChangeLog][QtCore][QUtf8StringView] New class. [ChangeLog][QtCore][QAnyStringView] New class. Change-Id: Ia7179760fca0e0b67d52f5accb0a62e389b17913 Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'src/corelib/text')
-rw-r--r--src/corelib/text/qanystringview.h292
-rw-r--r--src/corelib/text/qanystringview.qdoc406
-rw-r--r--src/corelib/text/qstring.cpp127
-rw-r--r--src/corelib/text/qstring.h72
-rw-r--r--src/corelib/text/qstringalgorithms.h27
-rw-r--r--src/corelib/text/qstringbuilder.h8
-rw-r--r--src/corelib/text/qutf8stringview.h329
-rw-r--r--src/corelib/text/qutf8stringview.qdoc720
-rw-r--r--src/corelib/text/text.pri5
9 files changed, 1957 insertions, 29 deletions
diff --git a/src/corelib/text/qanystringview.h b/src/corelib/text/qanystringview.h
new file mode 100644
index 0000000000..61761c6a82
--- /dev/null
+++ b/src/corelib/text/qanystringview.h
@@ -0,0 +1,292 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
+** Contact: http://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef QANYSTRINGVIEW_H
+#define QANYSTRINGVIEW_H
+
+#include <QtCore/qstringview.h>
+#include <QtCore/qutf8stringview.h>
+
+QT_BEGIN_NAMESPACE
+
+template <typename, typename> class QStringBuilder;
+
+class QAnyStringView
+{
+public:
+ typedef qptrdiff difference_type;
+ typedef qsizetype size_type;
+private:
+ template <typename Char>
+ using if_compatible_char = std::enable_if_t<std::disjunction_v<
+ QtPrivate::IsCompatibleCharType<Char>,
+ QtPrivate::IsCompatibleChar8Type<Char>
+ >, bool>;
+
+ template <typename Pointer>
+ using if_compatible_pointer = std::enable_if_t<std::disjunction_v<
+ QtPrivate::IsCompatiblePointer<Pointer>,
+ QtPrivate::IsCompatiblePointer8<Pointer>
+ >, bool>;
+
+
+ template <typename T>
+ using if_compatible_container = std::enable_if_t<std::disjunction_v<
+ QtPrivate::IsContainerCompatibleWithQStringView<T>,
+ QtPrivate::IsContainerCompatibleWithQUtf8StringView<T>
+ >, bool>;
+
+ // confirm we don't make an accidental copy constructor:
+ static_assert(QtPrivate::IsContainerCompatibleWithQStringView<QAnyStringView>::value == false);
+ static_assert(QtPrivate::IsContainerCompatibleWithQUtf8StringView<QAnyStringView>::value == false);
+
+ template <typename Char>
+ static constexpr std::size_t encodeType(qsizetype sz) noexcept
+ {
+ // only deals with Utf8 and Utf16 - there's only one way to create
+ // a Latin1 string, and that ctor deals with the tag itself
+ Q_ASSERT(sz >= 0);
+ Q_ASSERT(sz <= qsizetype(SizeMask));
+ return std::size_t(sz) | uint(sizeof(Char) == sizeof(char16_t)) * Tag::Utf16;
+ }
+
+ template <typename Char>
+ static qsizetype lengthHelperPointer(const Char *str) noexcept
+ {
+#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL)
+ if (__builtin_constant_p(*str)) {
+ qsizetype result = 0;
+ while (*str++ != u'\0')
+ ++result;
+ return result;
+ }
+#endif
+ if constexpr (sizeof(Char) == sizeof(char16_t))
+ return QtPrivate::qustrlen(reinterpret_cast<const char16_t*>(str));
+ else
+ return qsizetype(strlen(reinterpret_cast<const char*>(str)));
+ }
+
+ template <typename Container>
+ static constexpr qsizetype lengthHelperContainer(const Container &c) noexcept
+ {
+ return qsizetype(std::size(c));
+ }
+
+ template <typename Char, size_t N>
+ static constexpr qsizetype lengthHelperContainer(const Char (&)[N]) noexcept
+ {
+ return qsizetype(N - 1);
+ }
+
+ static QChar toQChar(char ch) noexcept { return toQChar(QLatin1Char{ch}); } // we don't handle UTF-8 multibytes
+ static QChar toQChar(QChar ch) noexcept { return ch; }
+ static QChar toQChar(QLatin1Char ch) noexcept { return ch; }
+
+ explicit constexpr QAnyStringView(const void *d, qsizetype n, std::size_t sizeAndType) noexcept
+ : m_data{d}, m_size{std::size_t(n) | (sizeAndType & TypeMask)} {}
+public:
+ constexpr QAnyStringView() noexcept
+ : m_data{nullptr}, m_size{0} {}
+ constexpr QAnyStringView(std::nullptr_t) noexcept
+ : QAnyStringView() {}
+
+ template <typename Char, if_compatible_char<Char> = true>
+ constexpr QAnyStringView(const Char *str, qsizetype len)
+ : m_data{str},
+ m_size{encodeType<Char>((Q_ASSERT(len >= 0), Q_ASSERT(str || !len), len))} {}
+
+ template <typename Char, if_compatible_char<Char> = true>
+ constexpr QAnyStringView(const Char *f, const Char *l)
+ : QAnyStringView(f, l - f) {}
+
+#ifdef Q_CLANG_QDOC
+ template <typename Char, size_t N>
+ constexpr QAnyStringView(const Char (&array)[N]) noexcept;
+
+ template <typename Char>
+ constexpr QAnyStringView(const Char *str) noexcept;
+#else
+
+ template <typename Pointer, if_compatible_pointer<Pointer> = true>
+ constexpr QAnyStringView(const Pointer &str) noexcept
+ : QAnyStringView{str, str ? lengthHelperPointer(str) : 0} {}
+#endif
+
+ // defined in qstring.h
+ inline QAnyStringView(const QByteArray &str) noexcept; // TODO: Should we have this at all? Remove?
+ inline QAnyStringView(const QString &str) noexcept;
+ inline constexpr QAnyStringView(QLatin1String str) noexcept;
+
+ // defined in qstringbuilder.h
+ template <typename A, typename B>
+ inline QAnyStringView(const QStringBuilder<A, B> &expr,
+ typename QStringBuilder<A, B>::ConvertTo &&capacity = {});
+
+ template <typename Container, if_compatible_container<Container> = true>
+ constexpr QAnyStringView(const Container &c) noexcept
+ : QAnyStringView(std::data(c), lengthHelperContainer(c)) {}
+
+ template <typename Char, if_compatible_char<Char> = true>
+ constexpr QAnyStringView(const Char &c) noexcept
+ : QAnyStringView{&c, 1} {}
+ constexpr QAnyStringView(const QChar &c) noexcept
+ : QAnyStringView{&c, 1} {}
+
+ template <typename Char, typename Container = decltype(QChar::fromUcs4(U'x')),
+ std::enable_if_t<std::is_same_v<Char, char32_t>, bool> = true>
+ constexpr QAnyStringView(Char c, Container &&capacity = {})
+ : QAnyStringView(capacity = QChar::fromUcs4(c)) {}
+
+ constexpr QAnyStringView(QStringView v) noexcept
+ : QAnyStringView(std::data(v), lengthHelperContainer(v)) {}
+
+ template <bool UseChar8T>
+ constexpr QAnyStringView(QBasicUtf8StringView<UseChar8T> v) noexcept
+ : QAnyStringView(std::data(v), lengthHelperContainer(v)) {}
+
+ // defined in qstring.h:
+ template <typename Visitor>
+ inline constexpr decltype(auto) visit(Visitor &&v) const;
+
+ [[nodiscard]] inline QString toString() const; // defined in qstring.h
+
+ [[nodiscard]] constexpr qsizetype size() const noexcept { return qsizetype(m_size & SizeMask); }
+ [[nodiscard]] constexpr const void *data() const noexcept { return m_data; }
+
+ [[nodiscard]] Q_CORE_EXPORT static int compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+
+ //
+ // STL compatibility API:
+ //
+ [[nodiscard]] constexpr QChar front() const; // NOT noexcept!
+ [[nodiscard]] constexpr QChar back() const; // NOT noexcept!
+ [[nodiscard]] constexpr bool empty() const noexcept { return size() == 0; }
+ [[nodiscard]] constexpr qsizetype size_bytes() const noexcept
+ { return size() * charSize(); }
+
+ //
+ // Qt compatibility API:
+ //
+ [[nodiscard]] constexpr bool isNull() const noexcept { return !m_data; }
+ [[nodiscard]] constexpr bool isEmpty() const noexcept { return empty(); }
+#if QT_DEPRECATED_SINCE(6, 0)
+ [[nodiscard]]
+ Q_DECL_DEPRECATED_X("Use size() and port callers to qsizetype.")
+ constexpr int length() const /* not nothrow! */
+ { return Q_ASSERT(int(size()) == size()), int(size()); }
+#endif
+private:
+ // TODO: Optimize by inverting and storing the flags in the low bits and
+ // the size in the high.
+ static_assert(std::is_same_v<std::size_t, size_t>);
+ static_assert(sizeof(size_t) == sizeof(qsizetype));
+ static constexpr size_t SizeMask = (std::numeric_limits<size_t>::max)() / 4;
+ static constexpr size_t Latin1Flag = SizeMask + 1;
+ static constexpr size_t TwoByteCodePointFlag = Latin1Flag << 1;
+ static constexpr size_t TypeMask = (std::numeric_limits<size_t>::max)() & ~SizeMask;
+ static_assert(TypeMask == (Latin1Flag|TwoByteCodePointFlag));
+ // HI HI LO LO ...
+ // 0 0 SZ SZ Utf8
+ // 0 1 SZ SZ Latin1
+ // 1 0 SZ SZ Utf16
+ // 1 1 SZ SZ Unused
+ // ^ ^ latin1
+ // | sizeof code-point == 2
+ enum Tag : size_t {
+ Utf8 = 0,
+ Latin1 = Latin1Flag,
+ Utf16 = TwoByteCodePointFlag,
+ Unused = TypeMask,
+ };
+ [[nodiscard]] constexpr Tag tag() const noexcept { return Tag{m_size & TypeMask}; }
+ [[nodiscard]] constexpr bool isUtf16() const noexcept { return tag() == Tag::Utf16; }
+ [[nodiscard]] constexpr bool isUtf8() const noexcept { return tag() == Tag::Utf8; }
+ [[nodiscard]] constexpr bool isLatin1() const noexcept { return tag() == Tag::Latin1; }
+ [[nodiscard]] constexpr QStringView asStringView() const
+ { return Q_ASSERT(isUtf16()), QStringView{m_data_utf16, size()}; }
+ [[nodiscard]] constexpr q_no_char8_t::QUtf8StringView asUtf8StringView() const
+ { return Q_ASSERT(isUtf8()), q_no_char8_t::QUtf8StringView{m_data_utf8, size()}; }
+ [[nodiscard]] inline constexpr QLatin1String asLatin1StringView() const;
+ [[nodiscard]] constexpr size_t charSize() const noexcept { return isUtf16() ? 2 : 1; }
+ Q_ALWAYS_INLINE constexpr void verify(qsizetype pos, qsizetype n = 0) const
+ {
+ Q_ASSERT(pos >= 0);
+ Q_ASSERT(pos <= size());
+ Q_ASSERT(n >= 0);
+ Q_ASSERT(n <= size() - pos);
+ }
+ union {
+ const void *m_data;
+ const char *m_data_utf8;
+ const char16_t *m_data_utf16;
+ };
+ size_t m_size;
+};
+Q_DECLARE_TYPEINFO(QAnyStringView, Q_PRIMITIVE_TYPE);
+
+template <typename QStringLike, std::enable_if_t<std::disjunction_v<
+ std::is_same<QStringLike, QString>,
+ std::is_same<QStringLike, QByteArray>
+ >, bool> = true>
+[[nodiscard]] inline QAnyStringView qToAnyStringViewIgnoringNull(const QStringLike &s) noexcept
+{ return QAnyStringView(s.data(), s.size()); }
+
+
+#define Q_ANY_SV_MAKE_RELOP(op) \
+ [[nodiscard]] Q_ALWAYS_INLINE auto operator op (QAnyStringView lhs, QAnyStringView rhs) noexcept \
+ { return QAnyStringView::compare(lhs, rhs) op 0; } \
+ /* end */
+
+Q_ANY_SV_MAKE_RELOP(==) // size() shortcut doesn't apply for UTF-8 vs. {L1, UTF-16}
+Q_ANY_SV_MAKE_RELOP(!=)
+
+#ifdef __cpp_impl_three_way_comparison
+Q_ANY_SV_MAKE_RELOP(<=>)
+#else
+Q_ANY_SV_MAKE_RELOP(<=)
+Q_ANY_SV_MAKE_RELOP(>=)
+Q_ANY_SV_MAKE_RELOP(<)
+Q_ANY_SV_MAKE_RELOP(>)
+#endif
+
+#undef Q_ANY_SV_MAKE_RELOP
+
+QT_END_NAMESPACE
+
+#endif /* QANYSTRINGVIEW_H */
diff --git a/src/corelib/text/qanystringview.qdoc b/src/corelib/text/qanystringview.qdoc
new file mode 100644
index 0000000000..b3e8baa3f9
--- /dev/null
+++ b/src/corelib/text/qanystringview.qdoc
@@ -0,0 +1,406 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the documentation of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:FDL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Free Documentation License Usage
+** Alternatively, this file may be used under the terms of the GNU Free
+** Documentation License version 1.3 as published by the Free Software
+** Foundation and appearing in the file included in the packaging of
+** this file. Please review the following information to ensure
+** the GNU Free Documentation License version 1.3 requirements
+** will be met: https://www.gnu.org/licenses/fdl-1.3.html.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+/*!
+ \class QAnyStringView
+ \inmodule QtCore
+ \since 6.0
+ \brief The QAnyStringView class provides a unified view on Latin-1, UTF-8,
+ or UTF-16 strings with a read-only subset of the QString API.
+ \reentrant
+ \ingroup tools
+ \ingroup string-processing
+
+ A QAnyStringView references a contiguous portion of a string it does
+ not own. It acts as an interface type to all kinds of strings,
+ without the need to construct a QString first.
+
+ Unlike QStringView and QUtf8StringView, QAnyStringView can hold
+ strings of any of the following encodings: UTF-8, UTF-16, and
+ Latin-1. The latter is supported to keep old source working
+ efficiently. It is expected that by Qt 7, the Latin-1 support will
+ be removed.
+
+ The string may be represented as an array (or an array-compatible
+ data-structure such as QString, std::basic_string, etc.) of \c
+ char, \c char8_t, QChar, \c ushort, \c char16_t or (on platforms,
+ such as Windows, where it is a 16-bit type) \c wchar_t.
+
+ QAnyStringView is designed as an interface type; its main use-case
+ is as a function parameter type. When QAnyStringViews are used as
+ automatic variables or data members, care must be taken to ensure
+ that the referenced string data (for example, owned by a QString)
+ outlives the QAnyStringView on all code paths, lest the string
+ view ends up referencing deleted data.
+
+ When used as an interface type, QAnyStringView allows a single
+ function to accept a wide variety of string data sources. One
+ function accepting QAnyStringView thus replaces five function
+ overloads (taking QString, \c{(const QChar*, int)},
+ QUtf8StringView, QLatin1String (but see above), and QChar), while
+ at the same time enabling even more string data sources to be
+ passed to the function, such as \c{u8"Hello World"}, a \c char8_t
+ string literal.
+
+ Like elsewhere in Qt, QAnyStringView assumes \c char data is encoded
+ in UTF-8, unless it is presented as a QLatin1String.
+
+ QAnyStringViews should be passed by value, not by reference-to-const:
+ \snippet code/src_corelib_text_qanystringview.cpp 0
+
+ QAnyStringView can also be used as the return value of a function,
+ but this is not recommended. QUtf8StringView or QStringView are
+ better suited as function return values. If you call a function
+ returning QAnyStringView, take extra care to not keep the
+ QAnyStringView around longer than the function promises to keep
+ the referenced string data alive. If in doubt, obtain a strong
+ reference to the data by calling toString() to convert the
+ QAnyStringView into a QString.
+
+ QAnyStringView is a \e{Literal Type}.
+
+ \section Compatible Character Types
+
+ QAnyStringView accepts strings over a variety of character types:
+
+ \list
+ \li \c char (both signed and unsigned)
+ \li \c char8_t (C++20 only)
+ \li \c char16_t
+ \li \c wchar_t (where it's a 16-bit type, e.g. Windows)
+ \li \c ushort
+ \li \c QChar
+ \endlist
+
+ The 8-bit character types are interpreted as UTF-8 data (except when
+ presented as a QLatin1String) while the 16-bit character types are
+ interpreted as UTF-16 data in host byte order (the same as QString).
+
+ \section Sizes and Sub-Strings
+
+ All sizes and positions in QAnyStringView functions are in the
+ encoding's code points (that is, UTF-16 surrogate pairs count as
+ two for the purposes of these functions, the same as in QString,
+ and UTF-8 multibyte sequences count as two, three or four,
+ depending on their length).
+
+ \sa QUtf8StringView, QStringView
+*/
+
+/*!
+ \typedef QStringView::difference_type
+
+ Alias for \c{std::ptrdiff_t}. Provided for compatibility with the STL.
+*/
+
+/*!
+ \typedef QStringView::size_type
+
+ Alias for qsizetype. Provided for compatibility with the STL.
+*/
+
+/*!
+ \fn QAnyStringView::QAnyStringView()
+
+ Constructs a null string view.
+
+ \sa isNull()
+*/
+
+/*!
+ \fn QAnyStringView::QAnyStringView(std::nullptr_t)
+
+ Constructs a null string view.
+
+ \sa isNull()
+*/
+
+/*!
+ \fn template <typename Char> QAnyStringView::QAnyStringView(const Char *str, qsizetype len)
+
+ Constructs a string view on \a str with length \a len.
+
+ The range \c{[str,len)} must remain valid for the lifetime of this string view object.
+
+ Passing \nullptr as \a str is safe if \a len is 0, too, and results in a null string view.
+
+ The behavior is undefined if \a len is negative or, when positive, if \a str is \nullptr.
+
+ This constructor only participates in overload resolution if \c Char is a compatible
+ character type.
+
+ \sa isNull(), {Compatible Character Types}
+*/
+
+/*!
+ \fn template <typename Char> QAnyStringView::QAnyStringView(const Char *first, const Char *last)
+
+ Constructs a string view on \a first with length (\a last - \a first).
+
+ The range \c{[first,last)} must remain valid for the lifetime of
+ this string view object.
+
+ Passing \c \nullptr as \a first is safe if \a last is \nullptr, too,
+ and results in a null string view.
+
+ The behavior is undefined if \a last precedes \a first, or \a first
+ is \nullptr and \a last is not.
+
+ This constructor only participates in overload resolution if \c Char
+ is a compatible character type.
+
+ \sa isNull(), {Compatible Character Types}
+*/
+
+/*!
+ \fn template <typename Char> QAnyStringView::QAnyStringView(const Char *str)
+
+ Constructs a string view on \a str. The length is determined
+ by scanning for the first \c{Char(0)}.
+
+ \a str must remain valid for the lifetime of this string view object.
+
+ Passing \nullptr as \a str is safe and results in a null string view.
+
+ This constructor only participates in overload resolution if \a
+ str is not an array and if \c Char is a compatible character
+ type.
+
+ \sa isNull(), {Compatible Character Types}
+*/
+
+/*!
+ \fn template <typename Char, size_t N> QAnyStringView::QAnyStringView(const Char (&string)[N])
+
+ Constructs a string view on the character string literal \a string.
+ The length is set to \c{N-1}, excluding the trailing \c{Char(0)}.
+ If you need the full array, use the constructor from pointer and
+ size instead:
+
+ \snippet code/src_corelib_text_qanystringview.cpp 2
+
+ \a string must remain valid for the lifetime of this string view
+ object.
+
+ This constructor only participates in overload resolution if \a
+ string is an actual array and \c Char is a compatible character
+ type.
+
+ \sa {Compatible Character Types}
+*/
+
+/*!
+ \fn QAnyStringView::QAnyStringView(const QString &str)
+
+ Constructs a string view on \a str.
+
+ \c{str.data()} must remain valid for the lifetime of this string view object.
+
+ The string view will be null if and only if \c{str.isNull()}.
+*/
+
+/*!
+ \fn QAnyStringView::QAnyStringView(const QByteArray &str)
+
+ Constructs a string view on \a str. The data in \a str is interpreted as UTF-8.
+
+ \c{str.data()} must remain valid for the lifetime of this string view object.
+
+ The string view will be null if and only if \c{str.isNull()}.
+*/
+
+/*!
+ \fn template <typename StdBasicString> QAnyStringView::QAnyStringView(const StdBasicString &str)
+
+ Constructs a string view on \a str. The length is taken from \c{str.size()}.
+
+ \c{str.data()} must remain valid for the lifetime of this string view object.
+
+ This constructor only participates in overload resolution if \c StdBasicString is an
+ instantiation of \c std::basic_string with a compatible character type.
+
+ The string view will be empty if and only if \c{str.empty()}. It is unspecified
+ whether this constructor can result in a null string view (\c{str.data()} would
+ have to return \nullptr for this).
+
+ \sa isNull(), isEmpty(), {Compatible Character Types}
+*/
+
+/*!
+ \fn QString QAnyStringView::toString() const
+
+ Returns a deep copy of this string view's data as a QString.
+
+ The return value will be a null QString if and only if this string view is null.
+*/
+
+/*!
+ \fn const void *QStringView::data() const
+
+ Returns a const pointer to the first character in the string.
+
+ \note The character array represented by the return value is \e not null-terminated.
+
+ \sa size_bytes()
+*/
+
+/*!
+ \fn bool QAnyStringView::empty() const
+
+ Returns whether this string view is empty - that is, whether \c{size() == 0}.
+
+ This function is provided for STL compatibility.
+
+ \sa isEmpty(), isNull(), size()
+*/
+
+/*!
+ \fn bool QAnyStringView::isEmpty() const
+
+ Returns whether this string view is empty - that is, whether \c{size() == 0}.
+
+ This function is provided for compatibility with other Qt containers.
+
+ \sa empty(), isNull(), size()
+*/
+
+/*!
+ \fn bool QAnyStringView::isNull() const
+
+ Returns whether this string view is null - that is, whether \c{data() == nullptr}.
+
+ This functions is provided for compatibility with other Qt containers.
+
+ \sa empty(), isEmpty(), size()
+*/
+
+/*!
+ \fn qsizetype QAnyStringView::size() const
+
+ Returns the size of this string view, in the encoding's code points.
+
+ \sa empty(), isEmpty(), isNull(), size_bytes(), {Sizes and Sub-Strings}
+*/
+
+/*!
+ \fn QAnyStringView::size_bytes() const
+
+ Returns the size of this string view, but in bytes, not code-points.
+
+ You can use this function together with data() for hashing or serialization.
+
+ This function is provided for STL compatibility.
+
+ \sa size(), data()
+*/
+
+/*!
+ \fn int QStringView::length() const
+ \obsolete
+ Use size() instead, and port callers to qsizetype.
+
+ Same as size(), except that it returns the result as an \c int.
+
+ This function is provided for compatibility with other Qt containers.
+
+ \warning QAnyStringView can represent strings with more than 2\sup{31} characters.
+ Calling this function on a string view for which size() returns a value greater
+ than \c{INT_MAX} constitutes undefined behavior.
+
+ \sa size()
+*/
+
+/*!
+ \fn QChar QAnyStringView::front() const
+
+ Returns the first character in the string.
+
+ This function is provided for STL compatibility.
+
+ \warning Calling this function on an empty string view constitutes
+ undefined behavior.
+
+ \sa back(), {Sizes and Sub-Strings}
+*/
+
+/*!
+ \fn QChar QAnyStringView::back() const
+
+ Returns the last character in the string.
+
+ This function is provided for STL compatibility.
+
+ \warning Calling this function on an empty string view constitutes
+ undefined behavior.
+
+ \sa front(), {Sizes and Sub-Strings}
+*/
+
+/*!
+ \fn QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs)
+
+ Returns an integer that compares to zero as \a lhs compares to \a rhs.
+
+ If \a cs is Qt::CaseSensitive (the default), the comparison is case sensitive;
+ otherwise the comparison is case-insensitive.
+
+ \sa operator==(), operator<(), operator>()
+*/
+
+/*!
+ \fn template <typename QStringLike> qToAnyStringViewIgnoringNull(const QStringLike &s);
+ \since 6.0
+ \internal
+
+ Convert \a s to a QAnyStringView ignoring \c{s.isNull()}.
+
+ Returns a string-view that references \a{s}'s data, but is never null.
+
+ This is a faster way to convert a QString or QByteArray to a QAnyStringView,
+ if null QStrings or QByteArrays can legitimately be treated as empty ones.
+
+ \sa QString::isNull(), QAnyStringView
+*/
+
+/*!
+ \fn QAnyStringView::toWCharArray(wchar_t *array) const
+
+ Transcribes this string into the given \a array.
+
+ The caller is responsible for ensuring \a array is large enough to hold the
+ \c wchar_t encoding of this string (allocating the array with space for size()
+ elements is always sufficient). The array is encoded in UTF-16 on
+ platforms where \c wchar_t is 2 bytes wide (e.g. Windows); otherwise (Unix
+ systems), \c wchar_t is assumed to be 4 bytes wide and the data is written
+ in UCS-4.
+
+ \note This function writes no null terminator to the end of \a array.
+
+ Returns the number of \c wchar_t entries written to \a array.
+
+ \sa QString::toWCharArray()
+*/
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 068247adb4..bf37f2e675 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -1186,8 +1186,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, size_t l)
return 0;
}
-template <typename Number>
-constexpr int lencmp(Number lhs, Number rhs) noexcept
+constexpr int lencmp(qsizetype lhs, qsizetype rhs) noexcept
{
return lhs == rhs ? 0 :
lhs > rhs ? 1 :
@@ -1283,6 +1282,49 @@ static int qt_compare_strings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens
return r ? r : lencmp(lhs.size(), rhs.size());
}
+static int qt_compare_strings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
+{
+ if (cs == Qt::CaseSensitive)
+ return QUtf8::compareUtf8(lhs.data(), lhs.size(), rhs.data(), rhs.size());
+ else
+ return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+static int qt_compare_strings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
+{
+ return -qt_compare_strings(rhs, lhs, cs);
+}
+
+static int qt_compare_strings(QLatin1String lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
+{
+ return qt_compare_strings(lhs, rhs.toString(), cs); // ### optimize!
+}
+
+static int qt_compare_strings(QBasicUtf8StringView<false> lhs, QLatin1String rhs, Qt::CaseSensitivity cs) noexcept
+{
+ return -qt_compare_strings(rhs, lhs, cs);
+}
+
+static int qt_compare_strings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
+{
+ if (lhs.isEmpty())
+ return lencmp(0, rhs.size());
+ if (cs == Qt::CaseInsensitive)
+ return qt_compare_strings(lhs.toString(), rhs.toString(), cs); // ### optimize!
+ const auto l = std::min(lhs.size(), rhs.size());
+ int r = qstrncmp(lhs.data(), rhs.data(), l);
+ return r ? r : lencmp(lhs.size(), rhs.size());
+}
+
+int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
+{
+ return lhs.visit([rhs, cs](auto lhs) {
+ return rhs.visit([lhs, cs](auto rhs) {
+ return qt_compare_strings(lhs, rhs, cs);
+ });
+ });
+}
+
/*!
\relates QStringView
\internal
@@ -1329,19 +1371,19 @@ int QtPrivate::compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensit
/*!
\relates QStringView
\internal
- \since 5.10
+ \since 6.0
\overload
+*/
+int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
+{
+ return qt_compare_strings(lhs, rhs, cs);
+}
- Returns an integer that compares to 0 as \a lhs compares to \a rhs.
-
- If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive;
- otherwise the comparison is case-insensitive.
-
- Case-sensitive comparison is based exclusively on the numeric Unicode values
- of the characters and is very fast, but is not what a human would expect.
- Consider sorting user-visible strings with QString::localeAwareCompare().
-
- \sa {Comparing Strings}
+/*!
+ \relates QStringView
+ \internal
+ \since 5.10
+ \overload
*/
int QtPrivate::compareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
{
@@ -1375,23 +1417,43 @@ int QtPrivate::compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens
\internal
\since 6.0
\overload
+*/
+int QtPrivate::compareStrings(QLatin1String lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
+{
+ return qt_compare_strings(lhs, rhs, cs);
+}
- Returns an integer that compares to 0 as \a lhs compares to \a rhs.
+/*!
+ \relates QStringView
+ \internal
+ \since 6.0
+ \overload
+*/
+int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
+{
+ return qt_compare_strings(lhs, rhs, cs);
+}
- If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive;
- otherwise the comparison is case-insensitive.
+/*!
+ \relates QStringView
+ \internal
+ \since 6.0
+ \overload
+*/
+int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1String rhs, Qt::CaseSensitivity cs) noexcept
+{
+ return qt_compare_strings(lhs, rhs, cs);
+}
- Case-sensitive comparison is based exclusively on the numeric values of the
- decoded Unicode code points and is very fast, but is not what a human would
- expect. Consider sorting user-visible strings with
- QString::localeAwareCompare().
+/*!
+ \relates QStringView
+ \internal
+ \since 6.0
+ \overload
*/
-int QtPrivate::compareStringsUtf8(const char *u8str, qsizetype u8len, QStringView rhs, Qt::CaseSensitivity cs) noexcept
+int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
{
- if (cs == Qt::CaseSensitive)
- return QUtf8::compareUtf8(u8str, u8len, rhs.data(), rhs.size());
- else
- return ucstricmp8(u8str, u8str + u8len, rhs.begin(), rhs.end());
+ return qt_compare_strings(lhs, rhs, cs);
}
#define REHASH(a) \
@@ -4797,6 +4859,21 @@ QByteArray QString::toLatin1_helper(const QString &string)
}
/*!
+ \since 6.0
+ \internal
+ \relates QAnyStringView
+
+ Returns a UTF-16 representation of \a string as a QString.
+
+ \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
+ QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
+*/
+QString QtPrivate::convertToQString(QAnyStringView string)
+{
+ return string.visit([] (auto string) { return string.toString(); });
+}
+
+/*!
\since 5.10
\internal
\relates QStringView
diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h
index 7fd5f86e25..7daf86a366 100644
--- a/src/corelib/text/qstring.h
+++ b/src/corelib/text/qstring.h
@@ -53,7 +53,7 @@
#include <QtCore/qnamespace.h>
#include <QtCore/qstringliteral.h>
#include <QtCore/qstringalgorithms.h>
-#include <QtCore/qstringview.h>
+#include <QtCore/qanystringview.h>
#include <QtCore/qstringtokenizer.h>
#include <string>
@@ -81,6 +81,12 @@ namespace QtPrivate {
template <bool...B> class BoolList;
}
+// QStringAlgorithms inline functions:
+
+int QtPrivate::compareStringsUtf8(const char *lhs, qsizetype lhss, QStringView rhs, Qt::CaseSensitivity cs) noexcept
+{ return compareStrings(QUtf8StringView(lhs, lhss), rhs, cs); }
+
+
class QLatin1String
{
public:
@@ -282,6 +288,44 @@ bool QStringView::contains(QLatin1String s, Qt::CaseSensitivity cs) const noexce
qsizetype QStringView::lastIndexOf(QLatin1String s, qsizetype from, Qt::CaseSensitivity cs) const noexcept
{ return QtPrivate::lastIndexOf(*this, from, s, cs); }
+//
+// QAnyStringView members that require QLatin1String
+//
+
+constexpr QAnyStringView::QAnyStringView(QLatin1String str) noexcept
+ : m_data{str.data()}, m_size{size_t(str.size()) | Tag::Latin1} {}
+
+constexpr QLatin1String QAnyStringView::asLatin1StringView() const
+{
+ Q_ASSERT(isLatin1());
+ return QLatin1String{m_data_utf8, int(size())};
+}
+
+template <typename Visitor>
+constexpr decltype(auto) QAnyStringView::visit(Visitor &&v) const
+{
+ if (isUtf16())
+ return std::forward<Visitor>(v)(asStringView());
+ else if (isLatin1())
+ return std::forward<Visitor>(v)(asLatin1StringView());
+ else
+ return std::forward<Visitor>(v)(asUtf8StringView());
+}
+
+//
+// QAnyStringView members that require QAnyStringView::visit()
+//
+
+constexpr QChar QAnyStringView::front() const
+{
+ return visit([] (auto that) { return QAnyStringView::toQChar(that.front()); });
+}
+constexpr QChar QAnyStringView::back() const
+{
+ return visit([] (auto that) { return QAnyStringView::toQChar(that.back()); });
+}
+
+
class Q_CORE_EXPORT QString
{
typedef QTypedArrayData<char16_t> Data;
@@ -976,6 +1020,7 @@ QString QLatin1String::toString() const { return *this; }
//
// QStringView inline members that require QString:
//
+
QString QStringView::toString() const
{ return Q_ASSERT(size() == length()), QString(data(), length()); }
@@ -997,6 +1042,29 @@ ushort QStringView::toUShort(bool *ok, int base) const
{ return QString::toIntegral_helper<ushort>(*this, ok, base); }
//
+// QUtf8StringView inline members that require QString:
+//
+
+template <bool UseChar8T>
+QString QBasicUtf8StringView<UseChar8T>::toString() const
+{
+ Q_ASSERT(size() == int(size()));
+ return QString::fromUtf8(data(), int(size()));
+}
+
+//
+// QAnyStringView inline members that require QString:
+//
+
+QAnyStringView::QAnyStringView(const QByteArray &str) noexcept
+ : QAnyStringView{str.isNull() ? nullptr : str.data(), str.size()} {}
+QAnyStringView::QAnyStringView(const QString &str) noexcept
+ : QAnyStringView{str.isNull() ? nullptr : str.data(), str.size()} {}
+
+QString QAnyStringView::toString() const
+{ return QtPrivate::convertToQString(*this); }
+
+//
// QString inline members
//
inline QString::QString(QLatin1String aLatin1) : d(fromLatin1_helper(aLatin1.latin1(), aLatin1.size()))
@@ -1383,6 +1451,7 @@ inline bool operator> (QLatin1String lhs, QChar rhs) noexcept { return rhs <
inline bool operator<=(QLatin1String lhs, QChar rhs) noexcept { return !(rhs < lhs); }
inline bool operator>=(QLatin1String lhs, QChar rhs) noexcept { return !(rhs > lhs); }
+#if 0
// QStringView <> QStringView
inline bool operator==(QStringView lhs, QStringView rhs) noexcept { return lhs.size() == rhs.size() && QtPrivate::compareStrings(lhs, rhs) == 0; }
inline bool operator!=(QStringView lhs, QStringView rhs) noexcept { return !(lhs == rhs); }
@@ -1390,6 +1459,7 @@ inline bool operator< (QStringView lhs, QStringView rhs) noexcept { return QtPri
inline bool operator<=(QStringView lhs, QStringView rhs) noexcept { return QtPrivate::compareStrings(lhs, rhs) <= 0; }
inline bool operator> (QStringView lhs, QStringView rhs) noexcept { return QtPrivate::compareStrings(lhs, rhs) > 0; }
inline bool operator>=(QStringView lhs, QStringView rhs) noexcept { return QtPrivate::compareStrings(lhs, rhs) >= 0; }
+#endif
// QStringView <> QChar
inline bool operator==(QStringView lhs, QChar rhs) noexcept { return lhs == QStringView(&rhs, 1); }
diff --git a/src/corelib/text/qstringalgorithms.h b/src/corelib/text/qstringalgorithms.h
index 28c0760b61..c0619ca8e5 100644
--- a/src/corelib/text/qstringalgorithms.h
+++ b/src/corelib/text/qstringalgorithms.h
@@ -48,9 +48,27 @@
QT_BEGIN_NAMESPACE
+#ifdef __cpp_char8_t
+# define QT_BEGIN_HAS_CHAR8_T_NAMESPACE inline namespace q_has_char8_t {
+# define QT_BEGIN_NO_CHAR8_T_NAMESPACE namespace q_no_char8_t {
+#else
+# define QT_BEGIN_HAS_CHAR8_T_NAMESPACE namespace q_has_char8_t {
+# define QT_BEGIN_NO_CHAR8_T_NAMESPACE inline namespace q_no_char8_t {
+#endif
+#define QT_END_HAS_CHAR8_T_NAMESPACE }
+#define QT_END_NO_CHAR8_T_NAMESPACE }
+
+// declare namespaces:
+QT_BEGIN_HAS_CHAR8_T_NAMESPACE
+QT_END_HAS_CHAR8_T_NAMESPACE
+QT_BEGIN_NO_CHAR8_T_NAMESPACE
+QT_END_NO_CHAR8_T_NAMESPACE
+
class QByteArray;
class QLatin1String;
class QStringView;
+template <bool> class QBasicUtf8StringView;
+class QAnyStringView;
class QChar;
namespace QtPrivate {
@@ -60,10 +78,15 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t *qustrchr(QS
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
-Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStringsUtf8(const char *, qsizetype, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QBasicUtf8StringView<false> lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+Q_DECL_DEPRECATED inline int compareStringsUtf8(const char *lhs, qsizetype lhss, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
@@ -91,6 +114,8 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION QLatin1String trimmed(QLati
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION qsizetype count(QStringView haystack, QChar needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION qsizetype count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+Q_REQUIRED_RESULT Q_CORE_EXPORT QString convertToQString(QAnyStringView s);
+
Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToLatin1(QStringView str);
Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToUtf8(QStringView str);
Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToLocal8Bit(QStringView str);
diff --git a/src/corelib/text/qstringbuilder.h b/src/corelib/text/qstringbuilder.h
index 940eb9f4f7..d488b962f0 100644
--- a/src/corelib/text/qstringbuilder.h
+++ b/src/corelib/text/qstringbuilder.h
@@ -466,6 +466,14 @@ QString &operator+=(QString &a, const QStringBuilder<A, B> &b)
return a;
}
+//
+// inline QAnyStringView members requiring QStringBuilder:
+//
+
+template <typename A, typename B>
+QAnyStringView::QAnyStringView(const QStringBuilder<A, B> &expr,
+ typename QStringBuilder<A, B>::ConvertTo &&capacity)
+ : QAnyStringView(capacity = expr) {}
QT_END_NAMESPACE
diff --git a/src/corelib/text/qutf8stringview.h b/src/corelib/text/qutf8stringview.h
new file mode 100644
index 0000000000..a197d14a63
--- /dev/null
+++ b/src/corelib/text/qutf8stringview.h
@@ -0,0 +1,329 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
+** Contact: http://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef QUTF8STRINGVIEW_H
+#define QUTF8STRINGVIEW_H
+
+#include <QtCore/qstringalgorithms.h>
+#include <QtCore/qarraydata.h> // for QContainerImplHelper
+
+#include <string>
+
+QT_BEGIN_NAMESPACE
+
+template <bool> class QBasicUtf8StringView;
+class QByteArray;
+class QLatin1String;
+
+namespace QtPrivate {
+template <typename Char>
+using IsCompatibleChar8TypeHelper = std::disjunction<
+#ifdef __cpp_char8_t
+ std::is_same<Char, char8_t>,
+#endif
+ std::is_same<Char, char>,
+ std::is_same<Char, uchar>,
+ std::is_same<Char, signed char>
+ >;
+template <typename Char>
+using IsCompatibleChar8Type
+ = IsCompatibleChar8TypeHelper<std::remove_cv_t<std::remove_reference_t<Char>>>;
+
+template <typename Pointer>
+struct IsCompatiblePointer8Helper : std::false_type {};
+template <typename Char>
+struct IsCompatiblePointer8Helper<Char*>
+ : IsCompatibleChar8Type<Char> {};
+template <typename Pointer>
+using IsCompatiblePointer8
+ = IsCompatiblePointer8Helper<std::remove_cv_t<std::remove_reference_t<Pointer>>>;
+
+template <typename T, typename Enable = void>
+struct IsContainerCompatibleWithQUtf8StringView : std::false_type {};
+
+template <typename T>
+struct IsContainerCompatibleWithQUtf8StringView<T, std::enable_if_t<std::conjunction_v<
+ // lacking concepts and ranges, we accept any T whose std::data yields a suitable pointer ...
+ IsCompatiblePointer8<decltype(std::data(std::declval<const T &>()))>,
+ // ... and that has a suitable size ...
+ std::is_convertible<
+ decltype(std::size(std::declval<const T &>())),
+ qsizetype
+ >,
+ // ... and it's a range as it defines an iterator-like API
+ IsCompatibleChar8Type<typename std::iterator_traits<
+ decltype(std::begin(std::declval<const T &>()))>::value_type
+ >,
+ std::is_convertible<
+ decltype( std::begin(std::declval<const T &>()) != std::end(std::declval<const T &>()) ),
+ bool
+ >,
+
+ // This needs to be treated specially due to the empty vs null distinction
+ std::negation<std::is_same<std::decay_t<T>, QByteArray>>,
+
+ // This has a compatible value_type, but explicitly a different encoding
+ std::negation<std::is_same<std::decay_t<T>, QLatin1String>>,
+
+ // Don't make an accidental copy constructor
+ std::negation<std::disjunction<
+ std::is_same<std::decay_t<T>, QBasicUtf8StringView<true>>,
+ std::is_same<std::decay_t<T>, QBasicUtf8StringView<false>>
+ >>
+ >>> : std::true_type {};
+
+struct hide_char8_t {
+#ifdef __cpp_char8_t
+ using type = char8_t;
+#endif
+};
+
+struct wrap_char { using type = char; };
+
+} // namespace QtPrivate
+
+template <bool UseChar8T>
+class QBasicUtf8StringView
+{
+public:
+ using storage_type = typename std::conditional<UseChar8T,
+ QtPrivate::hide_char8_t,
+ QtPrivate::wrap_char
+ >::type::type;
+ typedef const storage_type value_type;
+ typedef qptrdiff difference_type;
+ typedef qsizetype size_type;
+ typedef value_type &reference;
+ typedef value_type &const_reference;
+ typedef value_type *pointer;
+ typedef value_type *const_pointer;
+
+ typedef pointer iterator;
+ typedef const_pointer const_iterator;
+ typedef std::reverse_iterator<iterator> reverse_iterator;
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+private:
+ template <typename Char>
+ using if_compatible_char = std::enable_if_t<QtPrivate::IsCompatibleChar8Type<Char>::value, bool>;
+
+ template <typename Pointer>
+ using if_compatible_pointer = std::enable_if_t<QtPrivate::IsCompatiblePointer8<Pointer>::value, bool>;
+
+ template <typename T>
+ using if_compatible_qstring_like = std::enable_if_t<std::is_same_v<T, QByteArray>, bool>;
+
+ template <typename T>
+ using if_compatible_container = std::enable_if_t<QtPrivate::IsContainerCompatibleWithQUtf8StringView<T>::value, bool>;
+
+ template <typename Container>
+ static constexpr qsizetype lengthHelperContainer(const Container &c) noexcept
+ {
+ return qsizetype(std::size(c));
+ }
+
+ // Note: Do not replace with std::size(const Char (&)[N]), cause the result
+ // will be of by one.
+ template <typename Char, size_t N>
+ static constexpr qsizetype lengthHelperContainer(const Char (&)[N]) noexcept
+ {
+ return qsizetype(N - 1);
+ }
+
+ template <typename Char>
+ static const storage_type *castHelper(const Char *str) noexcept
+ { return reinterpret_cast<const storage_type*>(str); }
+ static constexpr const storage_type *castHelper(const storage_type *str) noexcept
+ { return str; }
+
+public:
+ constexpr QBasicUtf8StringView() noexcept
+ : m_data(nullptr), m_size(0) {}
+ constexpr QBasicUtf8StringView(std::nullptr_t) noexcept
+ : QBasicUtf8StringView() {}
+
+ template <typename Char, if_compatible_char<Char> = true>
+ constexpr QBasicUtf8StringView(const Char *str, qsizetype len)
+ : m_data(castHelper(str)),
+ m_size((Q_ASSERT(len >= 0), Q_ASSERT(str || !len), len)) {}
+
+ template <typename Char, if_compatible_char<Char> = true>
+ constexpr QBasicUtf8StringView(const Char *f, const Char *l)
+ : QBasicUtf8StringView(f, l - f) {}
+
+#ifdef Q_CLANG_QDOC
+ template <typename Char, size_t N>
+ constexpr QBasicUtf8StringView(const Char (&array)[N]) noexcept;
+
+ template <typename Char>
+ constexpr QBasicUtf8StringView(const Char *str) noexcept;
+#else
+ template <typename Pointer, if_compatible_pointer<Pointer> = true>
+ constexpr QBasicUtf8StringView(const Pointer &str) noexcept
+ : QBasicUtf8StringView(str,
+ str ? std::char_traits<std::remove_cv_t<std::remove_pointer_t<Pointer>>>::length(str) : 0) {}
+#endif
+
+#ifdef Q_CLANG_QDOC
+ QBasicUtf8StringView(const QByteArray &str) noexcept;
+#else
+ template <typename String, if_compatible_qstring_like<String> = true>
+ QBasicUtf8StringView(const String &str) noexcept
+ : QBasicUtf8StringView(str.isNull() ? nullptr : str.data(), qsizetype(str.size())) {}
+#endif
+
+ template <typename Container, if_compatible_container<Container> = true>
+ constexpr QBasicUtf8StringView(const Container &c) noexcept
+ : QBasicUtf8StringView(std::data(c), lengthHelperContainer(c)) {}
+
+#ifdef __cpp_char8_t
+ constexpr QBasicUtf8StringView(QBasicUtf8StringView<!UseChar8T> other)
+ : QBasicUtf8StringView(other.data(), other.size()) {}
+#endif
+
+ [[nodiscard]] inline QString toString() const; // defined in qstring.h
+
+ [[nodiscard]] constexpr qsizetype size() const noexcept { return m_size; }
+ [[nodiscard]] const_pointer data() const noexcept { return reinterpret_cast<const_pointer>(m_data); }
+#ifdef __cpp_char8_t
+ [[nodiscard]] const char8_t *utf8() const noexcept { return reinterpret_cast<const char8_t*>(m_data); }
+#endif
+
+ [[nodiscard]] constexpr storage_type operator[](qsizetype n) const
+ { return Q_ASSERT(n >= 0), Q_ASSERT(n < size()), m_data[n]; }
+
+ //
+ // QString API
+ //
+
+ [[nodiscard]] constexpr storage_type at(qsizetype n) const { return (*this)[n]; }
+
+ [[nodiscard]]
+ constexpr QBasicUtf8StringView mid(qsizetype pos, qsizetype n = -1) const
+ {
+ using namespace QtPrivate;
+ auto result = QContainerImplHelper::mid(size(), &pos, &n);
+ return result == QContainerImplHelper::Null ? QBasicUtf8StringView() : QBasicUtf8StringView(m_data + pos, n);
+ }
+ [[nodiscard]]
+ constexpr QBasicUtf8StringView left(qsizetype n) const
+ {
+ if (size_t(n) >= size_t(size()))
+ n = size();
+ return QBasicUtf8StringView(m_data, n);
+ }
+ [[nodiscard]]
+ constexpr QBasicUtf8StringView right(qsizetype n) const
+ {
+ if (size_t(n) >= size_t(size()))
+ n = size();
+ return QBasicUtf8StringView(m_data + m_size - n, n);
+ }
+
+ [[nodiscard]] constexpr QBasicUtf8StringView sliced(qsizetype pos) const
+ { verify(pos); return QBasicUtf8StringView{m_data + pos, m_size - pos}; }
+ [[nodiscard]] constexpr QBasicUtf8StringView sliced(qsizetype pos, qsizetype n) const
+ { verify(pos, n); return QBasicUtf8StringView(m_data + pos, n); }
+ [[nodiscard]] constexpr QBasicUtf8StringView first(qsizetype n) const
+ { verify(n); return QBasicUtf8StringView(m_data, n); }
+ [[nodiscard]] constexpr QBasicUtf8StringView last(qsizetype n) const
+ { verify(n); return QBasicUtf8StringView(m_data + m_size - n, n); }
+ [[nodiscard]] constexpr QBasicUtf8StringView chopped(qsizetype n) const
+ { verify(n); return QBasicUtf8StringView(m_data, m_size - n); }
+
+ constexpr void truncate(qsizetype n)
+ { verify(n); m_size = n; }
+ constexpr void chop(qsizetype n)
+ { verify(n); m_size -= n; }
+
+ //
+ // STL compatibility API:
+ //
+ [[nodiscard]] const_iterator begin() const noexcept { return data(); }
+ [[nodiscard]] const_iterator end() const noexcept { return data() + size(); }
+ [[nodiscard]] const_iterator cbegin() const noexcept { return begin(); }
+ [[nodiscard]] const_iterator cend() const noexcept { return end(); }
+ [[nodiscard]] const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); }
+ [[nodiscard]] const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); }
+ [[nodiscard]] const_reverse_iterator crbegin() const noexcept { return rbegin(); }
+ [[nodiscard]] const_reverse_iterator crend() const noexcept { return rend(); }
+
+ [[nodiscard]] constexpr bool empty() const noexcept { return size() == 0; }
+ [[nodiscard]] constexpr storage_type front() const { return Q_ASSERT(!empty()), m_data[0]; }
+ [[nodiscard]] constexpr storage_type back() const { return Q_ASSERT(!empty()), m_data[m_size - 1]; }
+
+ //
+ // Qt compatibility API:
+ //
+ [[nodiscard]] constexpr bool isNull() const noexcept { return !m_data; }
+ [[nodiscard]] constexpr bool isEmpty() const noexcept { return empty(); }
+#if QT_DEPRECATED_SINCE(6, 0)
+ [[nodiscard]]
+ Q_DECL_DEPRECATED_X("Use size() and port callers to qsizetype.")
+ constexpr int length() const /* not nothrow! */
+ { return Q_ASSERT(int(size()) == size()), int(size()); }
+#endif
+private:
+ Q_ALWAYS_INLINE constexpr void verify(qsizetype pos, qsizetype n = 0) const
+ {
+ Q_ASSERT(pos >= 0);
+ Q_ASSERT(pos <= size());
+ Q_ASSERT(n >= 0);
+ Q_ASSERT(n <= size() - pos);
+ }
+ const storage_type *m_data;
+ qsizetype m_size;
+};
+template <bool UseChar8T>
+Q_DECLARE_TYPEINFO_BODY(QBasicUtf8StringView<UseChar8T>, Q_PRIMITIVE_TYPE);
+
+QT_BEGIN_NO_CHAR8_T_NAMESPACE
+using QUtf8StringView = QBasicUtf8StringView<false>;
+QT_END_NO_CHAR8_T_NAMESPACE
+
+QT_BEGIN_HAS_CHAR8_T_NAMESPACE
+using QUtf8StringView = QBasicUtf8StringView<true>;
+QT_END_HAS_CHAR8_T_NAMESPACE
+
+template <typename QStringLike, std::enable_if_t<std::is_same_v<QStringLike, QByteArray>, bool> = true>
+[[nodiscard]] inline q_no_char8_t::QUtf8StringView qToUtf8StringViewIgnoringNull(const QStringLike &s) noexcept
+{ return q_no_char8_t::QUtf8StringView(s.data(), s.size()); }
+
+QT_END_NAMESPACE
+
+#endif /* QUTF8STRINGVIEW_H */
diff --git a/src/corelib/text/qutf8stringview.qdoc b/src/corelib/text/qutf8stringview.qdoc
new file mode 100644
index 0000000000..9c0c620d88
--- /dev/null
+++ b/src/corelib/text/qutf8stringview.qdoc
@@ -0,0 +1,720 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the documentation of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:FDL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Free Documentation License Usage
+** Alternatively, this file may be used under the terms of the GNU Free
+** Documentation License version 1.3 as published by the Free Software
+** Foundation and appearing in the file included in the packaging of
+** this file. Please review the following information to ensure
+** the GNU Free Documentation License version 1.3 requirements
+** will be met: https://www.gnu.org/licenses/fdl-1.3.html.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+/*!
+ \class QUtf8StringView
+ \inmodule QtCore
+ \since 6.0
+ \brief The QUtf8StringView class provides a unified view on UTF-8 strings
+ with a read-only subset of the QString API.
+ \reentrant
+ \ingroup tools
+ \ingroup string-processing
+
+ A QUtf8StringView references a contiguous portion of a UTF-8
+ string it does not own. It acts as an interface type to all kinds
+ of UTF-8 string, without the need to construct a QString or
+ QByteArray first.
+
+ The UTF-8 string may be represented as an array (or an
+ array-compatible data-structure such as std::basic_string, etc.)
+ of \c char8_t, \c char, \c{signed char} or \c{unsigned char}.
+
+ QUtf8StringView is designed as an interface type; its main
+ use-case is as a function parameter type. When QUtf8StringViews
+ are used as automatic variables or data members, care must be
+ taken to ensure that the referenced string data (for example,
+ owned by a std::u8string) outlives the QUtf8StringView on all code
+ paths, lest the string view ends up referencing deleted data.
+
+ When used as an interface type, QUtf8StringView allows a single
+ function to accept a wide variety of UTF-8 string data
+ sources. One function accepting QUtf8StringView thus replaces
+ several function overloads (taking e.g. QByteArray), while at the
+ same time enabling even more string data sources to be passed to
+ the function, such as \c{u8"Hello World"}, a \c char8_t (C++20) or
+ \c char (C++17) string literal. The \c char8_t incompatibility
+ between C++17 and C++20 goes away when using QUtf8StringView.
+
+ Like all views, QUtf8StringViews should be passed by value, not by
+ reference-to-const:
+ \snippet code/src_corelib_text_qutf8stringview.cpp 0
+
+ If you want to give your users maximum freedom in what strings
+ they can pass to your function, consider using QAnyStringView
+ instead.
+
+ QUtf8StringView can also be used as the return value of a
+ function. If you call a function returning QUtf8StringView, take
+ extra care to not keep the QUtf8StringView around longer than the
+ function promises to keep the referenced string data alive. If in
+ doubt, obtain a strong reference to the data by calling toString()
+ to convert the QUtf8StringView into a QString.
+
+ QUtf8StringView is a \e{Literal Type}.
+
+ \section Compatible Character Types
+
+ QUtf8StringView accepts strings over a variety of character types:
+
+ \list
+ \li \c char (both signed and unsigned)
+ \li \c char8_t (C++20 only)
+ \endlist
+
+ \section Sizes and Sub-Strings
+
+ All sizes and postions in QUtf8StringView functions are in
+ UTF-8 code points (that is, UTF-8 multibyte sequences count as
+ two, three or four, depending on their length). QUtf8StringView
+ does not an attempt to detect or prevent slicing right through
+ UTF-8 multibyte sequences. This is similar to the situation with
+ QStringView and surrogate pairs.
+
+ \section C++20, char8_t, and QUtf8StringView
+
+ In C++20, \c{u8""} string literals changed their type from
+ \c{const char[]} to \c{const char8_t[]}. If Qt 6 could have depended
+ on C++20, QUtf8StringView would store \c char8_t natively, and the
+ following functions and aliases would use (pointers to) \c char8_t:
+
+ \list
+ \li storage_type, value_type, etc
+ \li begin(), end(), data(), etc
+ \li front(), back(), at(), operator[]()
+ \endlist
+
+ This is what QUtf8StringView is expected to look like in Qt 7, but for
+ Qt 6, this was not possible. Instead of locking users into a C++17-era
+ interface for the next decade, Qt provides two QUtf8StringView classes,
+ in different (inline) namespaces. The first, in namespace \c{q_no_char8_t},
+ has a value_type of \c{const char} and is universally available.
+ The second, in namespace \c{q_has_char8_t}, has a value_type of
+ \c{const char8_t} and is only available when compiling in C++20 mode.
+
+ In C++17 mode, \c{q_no_char8_t} is an inline namespace, in C++20 it's
+ \c{q_has_char8_t}. This means that the name "QUtf8StringView" (without
+ explicit namespace) will denote different types in C++17 and C++20 modes.
+
+ Internally, both are instantiations of the same template class,
+ QBasicUtf8StringView. Please do not use the template class's name in your
+ source code.
+
+ All Qt APIs use \c{q_no_char8_t::QUtf8StringView} due to binary compatibility,
+ but these APIs accept \c{q_has_char8_t::QUtf8StringView} as well, since the
+ latter implicitly converts into the former, and vice versa.
+
+ In your own code, please use only \c QUtf8StringView and/or
+ \c{q_no_char8_t::QUtf8StringView}:
+
+ \list
+ \li If you only target C++20, then use "QUtf8StringView". It will be an alias
+ for \c{q_has_char8_t::QUtf8StringView} and you'll never look back.
+ \li If you only target C++17, then use "QUtf8StringView". It will be an alias
+ for \c{q_no_char8_t::QUtf8StringView} and for the time being, you're ok.
+ \li If you target both C++17 and C++20, then you have a choice to make:
+ \list
+ \li If you don't mind the source-incompatibility of return values of
+ QUtf8StringView::data() etc changing when compiling under C++17 or C++20,
+ use "QUtf8StringView". You will need to write your code in such a way that
+ it adapts to the differences in the QUtf8StringView API in different C++
+ versions.
+ \li If you don't want to deal with the above source-incompatibilities, or if
+ you need to maintain binary compatibility between C++20 and C++17 builds,
+ use "q_no_char8_t::QUtf8StringView" explicitly. Be aware that the
+ \c{q_no_char8_t} version will disappear in Qt 7.
+ \endlist
+ \endlist
+
+ Taken together: Just use QUtf8StringView unless you know what you're doing.
+
+ \sa QAnyStringView, QUtf8StringView, QString
+*/
+
+/*!
+ \typedef QUtf8StringView::storage_type
+
+ Alias for \c{char}.
+*/
+
+/*!
+ \typedef QUtf8StringView::value_type
+
+ Alias for \c{const char}. Provided for compatibility with the STL.
+*/
+
+/*!
+ \typedef QUtf8StringView::difference_type
+
+ Alias for \c{std::ptrdiff_t}. Provided for compatibility with the STL.
+*/
+
+/*!
+ \typedef QUtf8StringView::size_type
+
+ Alias for qsizetype. Provided for compatibility with the STL.
+*/
+
+/*!
+ \typedef QUtf8StringView::reference
+
+ Alias for \c{value_type &}. Provided for compatibility with the STL.
+
+ QUtf8StringView does not support mutable references, so this is the same
+ as const_reference.
+*/
+
+/*!
+ \typedef QUtf8StringView::const_reference
+
+ Alias for \c{value_type &}. Provided for compatibility with the STL.
+*/
+
+/*!
+ \typedef QUtf8StringView::pointer
+
+ Alias for \c{value_type *}. Provided for compatibility with the STL.
+
+ QUtf8StringView does not support mutable pointers, so this is the same
+ as const_pointer.
+*/
+
+/*!
+ \typedef QUtf8StringView::const_pointer
+
+ Alias for \c{value_type *}. Provided for compatibility with the STL.
+*/
+
+/*!
+ \typedef QUtf8StringView::iterator
+
+ This typedef provides an STL-style const iterator for QUtf8StringView.
+
+ QUtf8StringView does not support mutable iterators, so this is the same
+ as const_iterator.
+
+ \sa const_iterator, reverse_iterator
+*/
+
+/*!
+ \typedef QUtf8StringView::const_iterator
+
+ This typedef provides an STL-style const iterator for QUtf8StringView.
+
+ \sa iterator, const_reverse_iterator
+*/
+
+/*!
+ \typedef QUtf8StringView::reverse_iterator
+
+ This typedef provides an STL-style const reverse iterator for QUtf8StringView.
+
+ QUtf8StringView does not support mutable reverse iterators, so this is the
+ same as const_reverse_iterator.
+
+ \sa const_reverse_iterator, iterator
+*/
+
+/*!
+ \typedef QUtf8StringView::const_reverse_iterator
+
+ This typedef provides an STL-style const reverse iterator for QUtf8StringView.
+
+ \sa reverse_iterator, const_iterator
+*/
+
+/*!
+ \fn QUtf8StringView::QUtf8StringView()
+
+ Constructs a null string view.
+
+ \sa isNull()
+*/
+
+/*!
+ \fn QUtf8StringView::QUtf8StringView(std::nullptr_t)
+
+ Constructs a null string view.
+
+ \sa isNull()
+*/
+
+/*!
+ \fn template <typename Char> QUtf8StringView::QUtf8StringView(const Char *str, qsizetype len)
+
+ Constructs a string view on \a str with length \a len.
+
+ The range \c{[str,len)} must remain valid for the lifetime of this string view object.
+
+ Passing \nullptr as \a str is safe if \a len is 0, too, and results in a null string view.
+
+ The behavior is undefined if \a len is negative or, when positive, if \a str is \nullptr.
+
+ This constructor only participates in overload resolution if \c Char is a compatible
+ character type. The compatible character types are: \c char8_t, \c char, \c{signed char} and
+ \c{unsigned char}.
+*/
+
+/*!
+ \fn template <typename Char> QUtf8StringView::QUtf8StringView(const Char *first, const Char *last)
+
+ Constructs a string view on \a first with length (\a last - \a first).
+
+ The range \c{[first,last)} must remain valid for the lifetime of
+ this string view object.
+
+ Passing \c \nullptr as \a first is safe if \a last is \nullptr, too,
+ and results in a null string view.
+
+ The behavior is undefined if \a last precedes \a first, or \a first
+ is \nullptr and \a last is not.
+
+ This constructor only participates in overload resolution if \c Char is a compatible
+ character type. The compatible character types are: \c char8_t, \c char, \c{signed char} and
+ \c{unsigned char}.
+*/
+
+/*!
+ \fn template <typename Char> QUtf8StringView::QUtf8StringView(const Char *str)
+
+ Constructs a string view on \a str. The length is determined
+ by scanning for the first \c{Char(0)}.
+
+ \a str must remain valid for the lifetime of this string view object.
+
+ Passing \nullptr as \a str is safe and results in a null string view.
+
+ This constructor only participates in overload resolution if \a str
+ is not an array and if \c Char is a compatible character type. The
+ compatible character types are: \c char8_t, \c char, \c{signed char} and
+ \c{unsigned char}.
+*/
+
+/*!
+ \fn template <typename Char, size_t N> QUtf8StringView::QUtf8StringView(const Char (&string)[N])
+
+ Constructs a string view on the character string literal \a string.
+ The length is set to \c{N-1}, excluding the trailing \c{Char(0)}.
+ If you need the full array, use the constructor from pointer and
+ size instead:
+
+ \snippet code/src_corelib_text_qutf8stringview.cpp 2
+
+ \a string must remain valid for the lifetime of this string view
+ object.
+
+ This constructor only participates in overload resolution if \a str
+ is an actual array and if \c Char is a compatible character type. The
+ compatible character types are: \c char8_t, \c char, \c{signed char} and
+ \c{unsigned char}.
+*/
+
+/*!
+ \fn template <typename StdBasicString> QUtf8StringView::QUtf8StringView(const StdBasicString &str)
+
+ Constructs a string view on \a str. The length is taken from \c{str.size()}.
+
+ \c{str.data()} must remain valid for the lifetime of this string view object.
+
+ This constructor only participates in overload resolution if \c StdBasicString is an
+ instantiation of \c std::basic_string with a compatible character type. The
+ compatible character types are: \c char8_t, \c char, \c{signed char} and
+ \c{unsigned char}.
+
+ The string view will be empty if and only if \c{str.empty()}. It is unspecified
+ whether this constructor can result in a null string view (\c{str.data()} would
+ have to return \nullptr for this).
+
+ \sa isNull(), isEmpty()
+*/
+
+/*!
+ \fn QString QUtf8StringView::toString() const
+
+ Returns a deep copy of this string view's data as a QString.
+
+ The return value will be a null QString if and only if this string view is null.
+*/
+
+/*!
+ \fn QUtf8StringView::data() const
+
+ Returns a const pointer to the first code point in the string.
+
+ \note The character array represented by the return value is \e not null-terminated.
+
+ \sa begin(), end(), utf8()
+*/
+
+/*!
+ \fn QUtf8StringView::utf8() const
+
+ Returns a const pointer to the first code point in the string.
+
+ The result is returned as a \c{const char8_t*}, so this function is only available when
+ compiling in C++20 mode.
+
+ \note The character array represented by the return value is \e not null-terminated.
+
+ \sa begin(), end(), data()
+*/
+
+/*!
+ \fn QUtf8StringView::const_iterator QUtf8StringView::begin() const
+
+ Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the first code point in
+ the string.
+
+ This function is provided for STL compatibility.
+
+ \sa end(), cbegin(), rbegin(), data()
+*/
+
+/*!
+ \fn QUtf8StringView::const_iterator QUtf8StringView::cbegin() const
+
+ Same as begin().
+
+ This function is provided for STL compatibility.
+
+ \sa cend(), begin(), crbegin(), data()
+*/
+
+/*!
+ \fn QUtf8StringView::const_iterator QUtf8StringView::end() const
+
+ Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the imaginary
+ code point after the last code point in the list.
+
+ This function is provided for STL compatibility.
+
+ \sa begin(), cend(), rend()
+*/
+
+/*! \fn QUtf8StringView::const_iterator QUtf8StringView::cend() const
+
+ Same as end().
+
+ This function is provided for STL compatibility.
+
+ \sa cbegin(), end(), crend()
+*/
+
+/*!
+ \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::rbegin() const
+
+ Returns a const \l{STL-style iterators}{STL-style} reverse iterator pointing to the first
+ code point in the string, in reverse order.
+
+ This function is provided for STL compatibility.
+
+ \sa rend(), crbegin(), begin()
+*/
+
+/*!
+ \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::crbegin() const
+
+ Same as rbegin().
+
+ This function is provided for STL compatibility.
+
+ \sa crend(), rbegin(), cbegin()
+*/
+
+/*!
+ \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::rend() const
+
+ Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to one past
+ the last code point in the string, in reverse order.
+
+ This function is provided for STL compatibility.
+
+ \sa rbegin(), crend(), end()
+*/
+
+/*!
+ \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::crend() const
+
+ Same as rend().
+
+ This function is provided for STL compatibility.
+
+ \sa crbegin(), rend(), cend()
+*/
+
+/*!
+ \fn bool QUtf8StringView::empty() const
+
+ Returns whether this string view is empty - that is, whether \c{size() == 0}.
+
+ This function is provided for STL compatibility.
+
+ \sa isEmpty(), isNull(), size(), length()
+*/
+
+/*!
+ \fn bool QUtf8StringView::isEmpty() const
+
+ Returns whether this string view is empty - that is, whether \c{size() == 0}.
+
+ This function is provided for compatibility with other Qt containers.
+
+ \sa empty(), isNull(), size(), length()
+*/
+
+/*!
+ \fn bool QUtf8StringView::isNull() const
+
+ Returns whether this string view is null - that is, whether \c{data() == nullptr}.
+
+ This functions is provided for compatibility with other Qt containers.
+
+ \sa empty(), isEmpty(), size(), length()
+*/
+
+/*!
+ \fn qsizetype QUtf8StringView::size() const
+
+ Returns the size of this string view, in UTF-8 code points (that is,
+ multi-byte sequences count as more than one for the purposes of this function, the same
+ as surrogate pairs in QString and QStringView).
+
+ \sa empty(), isEmpty(), isNull(), length()
+*/
+
+/*!
+ \fn int QUtf8StringView::length() const
+ \obsolete
+ Use size() and port callers to qsizetype.
+
+ Same as size(), except returns the result as an \c int.
+
+ This function is provided for compatibility with other Qt containers.
+
+ \warning QUtf8StringView can represent strings with more than 2\sup{31} code points.
+ Calling this function on a string view for which size() returns a value greater
+ than \c{INT_MAX} constitutes undefined behavior.
+
+ \sa empty(), isEmpty(), isNull(), size()
+*/
+
+/*!
+ \fn QUtf8StringView::operator[](qsizetype n) const
+
+ Returns the code point at position \a n in this string view.
+
+ The behavior is undefined if \a n is negative or not less than size().
+
+ \sa at(), front(), back()
+*/
+
+/*!
+ \fn QUtf8StringView::at(qsizetype n) const
+
+ Returns the code point at position \a n in this string view.
+
+ The behavior is undefined if \a n is negative or not less than size().
+
+ \sa operator[](), front(), back()
+*/
+
+/*!
+ \fn QUtf8StringView::front() const
+
+ Returns the first code point in the string. Same as first().
+
+ This function is provided for STL compatibility.
+
+ \warning Calling this function on an empty string view constitutes
+ undefined behavior.
+
+ \sa back()
+*/
+
+/*!
+ \fn QUtf8StringView::back() const
+
+ Returns the last code point in the string. Same as last().
+
+ This function is provided for STL compatibility.
+
+ \warning Calling this function on an empty string view constitutes
+ undefined behavior.
+
+ \sa front()
+*/
+
+/*!
+ \fn QUtf8StringView::mid(qsizetype start, qsizetype length) const
+
+ Returns the substring of length \a length starting at position
+ \a start in this object.
+
+ \obsolete Use sliced() instead in new code.
+
+ Returns an empty string view if \a start exceeds the
+ length of the string. If there are less than \a length code points
+ available in the string starting at \a start, or if
+ \a length is negative (default), the function returns all code points that
+ are available from \a start.
+
+ \sa first(), last(), sliced(), chopped(), chop(), truncate()
+*/
+
+/*!
+ \fn QUtf8StringView::left(qsizetype length) const
+
+ \obsolete Use first() instead in new code.
+
+ Returns the substring of length \a length starting at position
+ 0 in this object.
+
+ The entire string is returned if \a length is greater than or equal
+ to size(), or less than zero.
+
+ \sa first(), last(), sliced(), startsWith(), chopped(), chop(), truncate()
+*/
+
+/*!
+ \fn QUtf8StringView::right(qsizetype length) const
+
+ \obsolete Use last() instead in new code.
+
+ Returns the substring of length \a length starting at position
+ size() - \a length in this object.
+
+ The entire string is returned if \a length is greater than or equal
+ to size(), or less than zero.
+
+ \sa first(), last(), sliced(), endsWith(), chopped(), chop(), truncate()
+*/
+
+/*!
+ \fn QUtf8StringView::first(qsizetype n) const
+
+ Returns a string view that contains the first \a n code points
+ of this string.
+
+ \note The behavior is undefined when \a n < 0 or \a n > size().
+
+ \sa last(), sliced(), startsWith(), chopped(), chop(), truncate()
+*/
+
+/*!
+ \fn QUtf8StringView::last(qsizetype n) const
+
+ Returns a string view that contains the last \a n code points of this string.
+
+ \note The behavior is undefined when \a n < 0 or \a n > size().
+
+ \sa first(), sliced(), endsWith(), chopped(), chop(), truncate()
+*/
+
+/*!
+ \fn QUtf8StringView::sliced(qsizetype pos, qsizetype n) const
+
+ Returns a string view containing \a n code points of this string view,
+ starting at position \a pos.
+
+ \note The behavior is undefined when \a pos < 0, \a n < 0,
+ or \a pos + \a n > size().
+
+ \sa first(), last(), chopped(), chop(), truncate()
+*/
+
+/*!
+ \fn QUtf8StringView::sliced(qsizetype pos) const
+
+ Returns a string view starting at position \a pos in this object,
+ and extending to its end.
+
+ \note The behavior is undefined when \a pos < 0 or \a pos > size().
+
+ \sa first(), last(), chopped(), chop(), truncate()
+*/
+
+/*!
+ \fn QUtf8StringView::chopped(qsizetype length) const
+
+ Returns the substring of length size() - \a length starting at the
+ beginning of this object.
+
+ Same as \c{first(size() - length)}.
+
+ \note The behavior is undefined when \a length < 0 or \a length > size().
+
+ \sa sliced(), first(), last(), chop(), truncate()
+*/
+
+/*!
+ \fn QUtf8StringView::truncate(qsizetype length)
+
+ Truncates this string view to \a length code points.
+
+ Same as \c{*this = first(length)}.
+
+ \note The behavior is undefined when \a length < 0 or \a length > size().
+
+ \sa sliced(), first(), last(), chopped(), chop()
+*/
+
+/*!
+ \fn QUtf8StringView::chop(qsizetype length)
+
+ Truncates this string view by \a length code points.
+
+ Same as \c{*this = first(size() - length)}.
+
+ \note The behavior is undefined when \a length < 0 or \a length > size().
+
+ \sa sliced(), first(), last(), chopped(), truncate()
+*/
+
+/*!
+ \fn QUtf8StringView::trimmed() const
+
+ Strips leading and trailing whitespace and returns the result.
+
+ Whitespace means any code point for which QChar::isSpace() returns
+ \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
+ '\\f', '\\r', and ' '.
+*/
+
+/*!
+ \fn template <typename QStringLike> qToUtf8StringViewIgnoringNull(const QStringLike &s);
+ \relates QUtf8StringView
+ \internal
+
+ Convert \a s to a QUtf8StringView ignoring \c{s.isNull()}.
+
+ Returns a string-view that references \a{s}'s data, but is never null.
+
+ This is a faster way to convert a QByteArray to a QUtf8StringView,
+ if null QByteArrays can legitimately be treated as empty ones.
+
+ \sa QByteArray::isNull(), QUtf8StringView
+*/
diff --git a/src/corelib/text/text.pri b/src/corelib/text/text.pri
index 16d585cac7..0704426f09 100644
--- a/src/corelib/text/text.pri
+++ b/src/corelib/text/text.pri
@@ -1,6 +1,7 @@
# Qt text / string / character / unicode / byte array module
HEADERS += \
+ text/qanystringview.h \
text/qbytearray.h \
text/qbytearray_p.h \
text/qbytearrayalgorithms.h \
@@ -30,8 +31,8 @@ HEADERS += \
text/qstringtokenizer.h \
text/qtextboundaryfinder.h \
text/qunicodetables_p.h \
- text/qunicodetools_p.h
-
+ text/qunicodetools_p.h \
+ text/qutf8stringview.h
SOURCES += \
text/qbytearray.cpp \