diff options
author | Marc Mutz <marc.mutz@kdab.com> | 2020-05-20 15:20:07 +0200 |
---|---|---|
committer | Lars Knoll <lars.knoll@qt.io> | 2020-08-31 22:57:49 +0200 |
commit | 2c9529e158fc589c48e6b1fb61dca2133e33ac4d (patch) | |
tree | e9429715ab339d95c2ba9456bf39a894e8b41539 /src/corelib/text | |
parent | f0ae973244026ca5382f05630bd799b44154d224 (diff) |
Long live Q{Any,Utf8}StringView!
We need to add these two classes at the same time, because
QAnyStringView makes all QUtf8StringView relational operators moot. We
might want to add some later, esp. for UTF-8/UTf-8 comparisons, to
avoid the pessimization that we can't early-out on size() mismatch in
QAnyStringView equality operators, but that's an optimization, not a
correctness issue, and can be fixed in a source-compatible way even
after Qt 6 is released.
To deal with the char8_t problem in C++20, make QUtf8StringView a
class template out of which two UTF-8 views can be instantiated: the
Qt 7 version, which depends on C++20 char8_t as value_type, and the Qt
6 version where value_type is a char. Use inline namespaces to map the
QUtf8StringView identifier to one or the other, depending on the C++
version used to compile the user code. The inline namespace names must
needs be a bit ugly, as their inline'ness depends on __cpp_char8_t. If
we simply used q_v1/q_v2 we'd be blocking these names for Qt inline
namespaces forever, because it's likely that inline'ness of other
users of inline namespaces in Qt depends on things other than
__cpp_char8_t. While inline'ness of namespaces is, theoretically
speaking, a compile-time-only property, at least Clang warns about
mixed use of inline on a given namespace, so we need to bite the
bullet here. This is also the reason for the QT_BEGIN_..._NAMESPACE
macros: GCC is ok with the first declaration making a namespace
inline, while Clang warns upon re-opening an inline namespace as a
non-inline one.
[ChangeLog][QtCore][QUtf8StringView] New class.
[ChangeLog][QtCore][QAnyStringView] New class.
Change-Id: Ia7179760fca0e0b67d52f5accb0a62e389b17913
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'src/corelib/text')
-rw-r--r-- | src/corelib/text/qanystringview.h | 292 | ||||
-rw-r--r-- | src/corelib/text/qanystringview.qdoc | 406 | ||||
-rw-r--r-- | src/corelib/text/qstring.cpp | 127 | ||||
-rw-r--r-- | src/corelib/text/qstring.h | 72 | ||||
-rw-r--r-- | src/corelib/text/qstringalgorithms.h | 27 | ||||
-rw-r--r-- | src/corelib/text/qstringbuilder.h | 8 | ||||
-rw-r--r-- | src/corelib/text/qutf8stringview.h | 329 | ||||
-rw-r--r-- | src/corelib/text/qutf8stringview.qdoc | 720 | ||||
-rw-r--r-- | src/corelib/text/text.pri | 5 |
9 files changed, 1957 insertions, 29 deletions
diff --git a/src/corelib/text/qanystringview.h b/src/corelib/text/qanystringview.h new file mode 100644 index 0000000000..61761c6a82 --- /dev/null +++ b/src/corelib/text/qanystringview.h @@ -0,0 +1,292 @@ +/**************************************************************************** +** +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#ifndef QANYSTRINGVIEW_H +#define QANYSTRINGVIEW_H + +#include <QtCore/qstringview.h> +#include <QtCore/qutf8stringview.h> + +QT_BEGIN_NAMESPACE + +template <typename, typename> class QStringBuilder; + +class QAnyStringView +{ +public: + typedef qptrdiff difference_type; + typedef qsizetype size_type; +private: + template <typename Char> + using if_compatible_char = std::enable_if_t<std::disjunction_v< + QtPrivate::IsCompatibleCharType<Char>, + QtPrivate::IsCompatibleChar8Type<Char> + >, bool>; + + template <typename Pointer> + using if_compatible_pointer = std::enable_if_t<std::disjunction_v< + QtPrivate::IsCompatiblePointer<Pointer>, + QtPrivate::IsCompatiblePointer8<Pointer> + >, bool>; + + + template <typename T> + using if_compatible_container = std::enable_if_t<std::disjunction_v< + QtPrivate::IsContainerCompatibleWithQStringView<T>, + QtPrivate::IsContainerCompatibleWithQUtf8StringView<T> + >, bool>; + + // confirm we don't make an accidental copy constructor: + static_assert(QtPrivate::IsContainerCompatibleWithQStringView<QAnyStringView>::value == false); + static_assert(QtPrivate::IsContainerCompatibleWithQUtf8StringView<QAnyStringView>::value == false); + + template <typename Char> + static constexpr std::size_t encodeType(qsizetype sz) noexcept + { + // only deals with Utf8 and Utf16 - there's only one way to create + // a Latin1 string, and that ctor deals with the tag itself + Q_ASSERT(sz >= 0); + Q_ASSERT(sz <= qsizetype(SizeMask)); + return std::size_t(sz) | uint(sizeof(Char) == sizeof(char16_t)) * Tag::Utf16; + } + + template <typename Char> + static qsizetype lengthHelperPointer(const Char *str) noexcept + { +#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL) + if (__builtin_constant_p(*str)) { + qsizetype result = 0; + while (*str++ != u'\0') + ++result; + return result; + } +#endif + if constexpr (sizeof(Char) == sizeof(char16_t)) + return QtPrivate::qustrlen(reinterpret_cast<const char16_t*>(str)); + else + return qsizetype(strlen(reinterpret_cast<const char*>(str))); + } + + template <typename Container> + static constexpr qsizetype lengthHelperContainer(const Container &c) noexcept + { + return qsizetype(std::size(c)); + } + + template <typename Char, size_t N> + static constexpr qsizetype lengthHelperContainer(const Char (&)[N]) noexcept + { + return qsizetype(N - 1); + } + + static QChar toQChar(char ch) noexcept { return toQChar(QLatin1Char{ch}); } // we don't handle UTF-8 multibytes + static QChar toQChar(QChar ch) noexcept { return ch; } + static QChar toQChar(QLatin1Char ch) noexcept { return ch; } + + explicit constexpr QAnyStringView(const void *d, qsizetype n, std::size_t sizeAndType) noexcept + : m_data{d}, m_size{std::size_t(n) | (sizeAndType & TypeMask)} {} +public: + constexpr QAnyStringView() noexcept + : m_data{nullptr}, m_size{0} {} + constexpr QAnyStringView(std::nullptr_t) noexcept + : QAnyStringView() {} + + template <typename Char, if_compatible_char<Char> = true> + constexpr QAnyStringView(const Char *str, qsizetype len) + : m_data{str}, + m_size{encodeType<Char>((Q_ASSERT(len >= 0), Q_ASSERT(str || !len), len))} {} + + template <typename Char, if_compatible_char<Char> = true> + constexpr QAnyStringView(const Char *f, const Char *l) + : QAnyStringView(f, l - f) {} + +#ifdef Q_CLANG_QDOC + template <typename Char, size_t N> + constexpr QAnyStringView(const Char (&array)[N]) noexcept; + + template <typename Char> + constexpr QAnyStringView(const Char *str) noexcept; +#else + + template <typename Pointer, if_compatible_pointer<Pointer> = true> + constexpr QAnyStringView(const Pointer &str) noexcept + : QAnyStringView{str, str ? lengthHelperPointer(str) : 0} {} +#endif + + // defined in qstring.h + inline QAnyStringView(const QByteArray &str) noexcept; // TODO: Should we have this at all? Remove? + inline QAnyStringView(const QString &str) noexcept; + inline constexpr QAnyStringView(QLatin1String str) noexcept; + + // defined in qstringbuilder.h + template <typename A, typename B> + inline QAnyStringView(const QStringBuilder<A, B> &expr, + typename QStringBuilder<A, B>::ConvertTo &&capacity = {}); + + template <typename Container, if_compatible_container<Container> = true> + constexpr QAnyStringView(const Container &c) noexcept + : QAnyStringView(std::data(c), lengthHelperContainer(c)) {} + + template <typename Char, if_compatible_char<Char> = true> + constexpr QAnyStringView(const Char &c) noexcept + : QAnyStringView{&c, 1} {} + constexpr QAnyStringView(const QChar &c) noexcept + : QAnyStringView{&c, 1} {} + + template <typename Char, typename Container = decltype(QChar::fromUcs4(U'x')), + std::enable_if_t<std::is_same_v<Char, char32_t>, bool> = true> + constexpr QAnyStringView(Char c, Container &&capacity = {}) + : QAnyStringView(capacity = QChar::fromUcs4(c)) {} + + constexpr QAnyStringView(QStringView v) noexcept + : QAnyStringView(std::data(v), lengthHelperContainer(v)) {} + + template <bool UseChar8T> + constexpr QAnyStringView(QBasicUtf8StringView<UseChar8T> v) noexcept + : QAnyStringView(std::data(v), lengthHelperContainer(v)) {} + + // defined in qstring.h: + template <typename Visitor> + inline constexpr decltype(auto) visit(Visitor &&v) const; + + [[nodiscard]] inline QString toString() const; // defined in qstring.h + + [[nodiscard]] constexpr qsizetype size() const noexcept { return qsizetype(m_size & SizeMask); } + [[nodiscard]] constexpr const void *data() const noexcept { return m_data; } + + [[nodiscard]] Q_CORE_EXPORT static int compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; + + // + // STL compatibility API: + // + [[nodiscard]] constexpr QChar front() const; // NOT noexcept! + [[nodiscard]] constexpr QChar back() const; // NOT noexcept! + [[nodiscard]] constexpr bool empty() const noexcept { return size() == 0; } + [[nodiscard]] constexpr qsizetype size_bytes() const noexcept + { return size() * charSize(); } + + // + // Qt compatibility API: + // + [[nodiscard]] constexpr bool isNull() const noexcept { return !m_data; } + [[nodiscard]] constexpr bool isEmpty() const noexcept { return empty(); } +#if QT_DEPRECATED_SINCE(6, 0) + [[nodiscard]] + Q_DECL_DEPRECATED_X("Use size() and port callers to qsizetype.") + constexpr int length() const /* not nothrow! */ + { return Q_ASSERT(int(size()) == size()), int(size()); } +#endif +private: + // TODO: Optimize by inverting and storing the flags in the low bits and + // the size in the high. + static_assert(std::is_same_v<std::size_t, size_t>); + static_assert(sizeof(size_t) == sizeof(qsizetype)); + static constexpr size_t SizeMask = (std::numeric_limits<size_t>::max)() / 4; + static constexpr size_t Latin1Flag = SizeMask + 1; + static constexpr size_t TwoByteCodePointFlag = Latin1Flag << 1; + static constexpr size_t TypeMask = (std::numeric_limits<size_t>::max)() & ~SizeMask; + static_assert(TypeMask == (Latin1Flag|TwoByteCodePointFlag)); + // HI HI LO LO ... + // 0 0 SZ SZ Utf8 + // 0 1 SZ SZ Latin1 + // 1 0 SZ SZ Utf16 + // 1 1 SZ SZ Unused + // ^ ^ latin1 + // | sizeof code-point == 2 + enum Tag : size_t { + Utf8 = 0, + Latin1 = Latin1Flag, + Utf16 = TwoByteCodePointFlag, + Unused = TypeMask, + }; + [[nodiscard]] constexpr Tag tag() const noexcept { return Tag{m_size & TypeMask}; } + [[nodiscard]] constexpr bool isUtf16() const noexcept { return tag() == Tag::Utf16; } + [[nodiscard]] constexpr bool isUtf8() const noexcept { return tag() == Tag::Utf8; } + [[nodiscard]] constexpr bool isLatin1() const noexcept { return tag() == Tag::Latin1; } + [[nodiscard]] constexpr QStringView asStringView() const + { return Q_ASSERT(isUtf16()), QStringView{m_data_utf16, size()}; } + [[nodiscard]] constexpr q_no_char8_t::QUtf8StringView asUtf8StringView() const + { return Q_ASSERT(isUtf8()), q_no_char8_t::QUtf8StringView{m_data_utf8, size()}; } + [[nodiscard]] inline constexpr QLatin1String asLatin1StringView() const; + [[nodiscard]] constexpr size_t charSize() const noexcept { return isUtf16() ? 2 : 1; } + Q_ALWAYS_INLINE constexpr void verify(qsizetype pos, qsizetype n = 0) const + { + Q_ASSERT(pos >= 0); + Q_ASSERT(pos <= size()); + Q_ASSERT(n >= 0); + Q_ASSERT(n <= size() - pos); + } + union { + const void *m_data; + const char *m_data_utf8; + const char16_t *m_data_utf16; + }; + size_t m_size; +}; +Q_DECLARE_TYPEINFO(QAnyStringView, Q_PRIMITIVE_TYPE); + +template <typename QStringLike, std::enable_if_t<std::disjunction_v< + std::is_same<QStringLike, QString>, + std::is_same<QStringLike, QByteArray> + >, bool> = true> +[[nodiscard]] inline QAnyStringView qToAnyStringViewIgnoringNull(const QStringLike &s) noexcept +{ return QAnyStringView(s.data(), s.size()); } + + +#define Q_ANY_SV_MAKE_RELOP(op) \ + [[nodiscard]] Q_ALWAYS_INLINE auto operator op (QAnyStringView lhs, QAnyStringView rhs) noexcept \ + { return QAnyStringView::compare(lhs, rhs) op 0; } \ + /* end */ + +Q_ANY_SV_MAKE_RELOP(==) // size() shortcut doesn't apply for UTF-8 vs. {L1, UTF-16} +Q_ANY_SV_MAKE_RELOP(!=) + +#ifdef __cpp_impl_three_way_comparison +Q_ANY_SV_MAKE_RELOP(<=>) +#else +Q_ANY_SV_MAKE_RELOP(<=) +Q_ANY_SV_MAKE_RELOP(>=) +Q_ANY_SV_MAKE_RELOP(<) +Q_ANY_SV_MAKE_RELOP(>) +#endif + +#undef Q_ANY_SV_MAKE_RELOP + +QT_END_NAMESPACE + +#endif /* QANYSTRINGVIEW_H */ diff --git a/src/corelib/text/qanystringview.qdoc b/src/corelib/text/qanystringview.qdoc new file mode 100644 index 0000000000..b3e8baa3f9 --- /dev/null +++ b/src/corelib/text/qanystringview.qdoc @@ -0,0 +1,406 @@ +/**************************************************************************** +** +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the documentation of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:FDL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Free Documentation License Usage +** Alternatively, this file may be used under the terms of the GNU Free +** Documentation License version 1.3 as published by the Free Software +** Foundation and appearing in the file included in the packaging of +** this file. Please review the following information to ensure +** the GNU Free Documentation License version 1.3 requirements +** will be met: https://www.gnu.org/licenses/fdl-1.3.html. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +/*! + \class QAnyStringView + \inmodule QtCore + \since 6.0 + \brief The QAnyStringView class provides a unified view on Latin-1, UTF-8, + or UTF-16 strings with a read-only subset of the QString API. + \reentrant + \ingroup tools + \ingroup string-processing + + A QAnyStringView references a contiguous portion of a string it does + not own. It acts as an interface type to all kinds of strings, + without the need to construct a QString first. + + Unlike QStringView and QUtf8StringView, QAnyStringView can hold + strings of any of the following encodings: UTF-8, UTF-16, and + Latin-1. The latter is supported to keep old source working + efficiently. It is expected that by Qt 7, the Latin-1 support will + be removed. + + The string may be represented as an array (or an array-compatible + data-structure such as QString, std::basic_string, etc.) of \c + char, \c char8_t, QChar, \c ushort, \c char16_t or (on platforms, + such as Windows, where it is a 16-bit type) \c wchar_t. + + QAnyStringView is designed as an interface type; its main use-case + is as a function parameter type. When QAnyStringViews are used as + automatic variables or data members, care must be taken to ensure + that the referenced string data (for example, owned by a QString) + outlives the QAnyStringView on all code paths, lest the string + view ends up referencing deleted data. + + When used as an interface type, QAnyStringView allows a single + function to accept a wide variety of string data sources. One + function accepting QAnyStringView thus replaces five function + overloads (taking QString, \c{(const QChar*, int)}, + QUtf8StringView, QLatin1String (but see above), and QChar), while + at the same time enabling even more string data sources to be + passed to the function, such as \c{u8"Hello World"}, a \c char8_t + string literal. + + Like elsewhere in Qt, QAnyStringView assumes \c char data is encoded + in UTF-8, unless it is presented as a QLatin1String. + + QAnyStringViews should be passed by value, not by reference-to-const: + \snippet code/src_corelib_text_qanystringview.cpp 0 + + QAnyStringView can also be used as the return value of a function, + but this is not recommended. QUtf8StringView or QStringView are + better suited as function return values. If you call a function + returning QAnyStringView, take extra care to not keep the + QAnyStringView around longer than the function promises to keep + the referenced string data alive. If in doubt, obtain a strong + reference to the data by calling toString() to convert the + QAnyStringView into a QString. + + QAnyStringView is a \e{Literal Type}. + + \section Compatible Character Types + + QAnyStringView accepts strings over a variety of character types: + + \list + \li \c char (both signed and unsigned) + \li \c char8_t (C++20 only) + \li \c char16_t + \li \c wchar_t (where it's a 16-bit type, e.g. Windows) + \li \c ushort + \li \c QChar + \endlist + + The 8-bit character types are interpreted as UTF-8 data (except when + presented as a QLatin1String) while the 16-bit character types are + interpreted as UTF-16 data in host byte order (the same as QString). + + \section Sizes and Sub-Strings + + All sizes and positions in QAnyStringView functions are in the + encoding's code points (that is, UTF-16 surrogate pairs count as + two for the purposes of these functions, the same as in QString, + and UTF-8 multibyte sequences count as two, three or four, + depending on their length). + + \sa QUtf8StringView, QStringView +*/ + +/*! + \typedef QStringView::difference_type + + Alias for \c{std::ptrdiff_t}. Provided for compatibility with the STL. +*/ + +/*! + \typedef QStringView::size_type + + Alias for qsizetype. Provided for compatibility with the STL. +*/ + +/*! + \fn QAnyStringView::QAnyStringView() + + Constructs a null string view. + + \sa isNull() +*/ + +/*! + \fn QAnyStringView::QAnyStringView(std::nullptr_t) + + Constructs a null string view. + + \sa isNull() +*/ + +/*! + \fn template <typename Char> QAnyStringView::QAnyStringView(const Char *str, qsizetype len) + + Constructs a string view on \a str with length \a len. + + The range \c{[str,len)} must remain valid for the lifetime of this string view object. + + Passing \nullptr as \a str is safe if \a len is 0, too, and results in a null string view. + + The behavior is undefined if \a len is negative or, when positive, if \a str is \nullptr. + + This constructor only participates in overload resolution if \c Char is a compatible + character type. + + \sa isNull(), {Compatible Character Types} +*/ + +/*! + \fn template <typename Char> QAnyStringView::QAnyStringView(const Char *first, const Char *last) + + Constructs a string view on \a first with length (\a last - \a first). + + The range \c{[first,last)} must remain valid for the lifetime of + this string view object. + + Passing \c \nullptr as \a first is safe if \a last is \nullptr, too, + and results in a null string view. + + The behavior is undefined if \a last precedes \a first, or \a first + is \nullptr and \a last is not. + + This constructor only participates in overload resolution if \c Char + is a compatible character type. + + \sa isNull(), {Compatible Character Types} +*/ + +/*! + \fn template <typename Char> QAnyStringView::QAnyStringView(const Char *str) + + Constructs a string view on \a str. The length is determined + by scanning for the first \c{Char(0)}. + + \a str must remain valid for the lifetime of this string view object. + + Passing \nullptr as \a str is safe and results in a null string view. + + This constructor only participates in overload resolution if \a + str is not an array and if \c Char is a compatible character + type. + + \sa isNull(), {Compatible Character Types} +*/ + +/*! + \fn template <typename Char, size_t N> QAnyStringView::QAnyStringView(const Char (&string)[N]) + + Constructs a string view on the character string literal \a string. + The length is set to \c{N-1}, excluding the trailing \c{Char(0)}. + If you need the full array, use the constructor from pointer and + size instead: + + \snippet code/src_corelib_text_qanystringview.cpp 2 + + \a string must remain valid for the lifetime of this string view + object. + + This constructor only participates in overload resolution if \a + string is an actual array and \c Char is a compatible character + type. + + \sa {Compatible Character Types} +*/ + +/*! + \fn QAnyStringView::QAnyStringView(const QString &str) + + Constructs a string view on \a str. + + \c{str.data()} must remain valid for the lifetime of this string view object. + + The string view will be null if and only if \c{str.isNull()}. +*/ + +/*! + \fn QAnyStringView::QAnyStringView(const QByteArray &str) + + Constructs a string view on \a str. The data in \a str is interpreted as UTF-8. + + \c{str.data()} must remain valid for the lifetime of this string view object. + + The string view will be null if and only if \c{str.isNull()}. +*/ + +/*! + \fn template <typename StdBasicString> QAnyStringView::QAnyStringView(const StdBasicString &str) + + Constructs a string view on \a str. The length is taken from \c{str.size()}. + + \c{str.data()} must remain valid for the lifetime of this string view object. + + This constructor only participates in overload resolution if \c StdBasicString is an + instantiation of \c std::basic_string with a compatible character type. + + The string view will be empty if and only if \c{str.empty()}. It is unspecified + whether this constructor can result in a null string view (\c{str.data()} would + have to return \nullptr for this). + + \sa isNull(), isEmpty(), {Compatible Character Types} +*/ + +/*! + \fn QString QAnyStringView::toString() const + + Returns a deep copy of this string view's data as a QString. + + The return value will be a null QString if and only if this string view is null. +*/ + +/*! + \fn const void *QStringView::data() const + + Returns a const pointer to the first character in the string. + + \note The character array represented by the return value is \e not null-terminated. + + \sa size_bytes() +*/ + +/*! + \fn bool QAnyStringView::empty() const + + Returns whether this string view is empty - that is, whether \c{size() == 0}. + + This function is provided for STL compatibility. + + \sa isEmpty(), isNull(), size() +*/ + +/*! + \fn bool QAnyStringView::isEmpty() const + + Returns whether this string view is empty - that is, whether \c{size() == 0}. + + This function is provided for compatibility with other Qt containers. + + \sa empty(), isNull(), size() +*/ + +/*! + \fn bool QAnyStringView::isNull() const + + Returns whether this string view is null - that is, whether \c{data() == nullptr}. + + This functions is provided for compatibility with other Qt containers. + + \sa empty(), isEmpty(), size() +*/ + +/*! + \fn qsizetype QAnyStringView::size() const + + Returns the size of this string view, in the encoding's code points. + + \sa empty(), isEmpty(), isNull(), size_bytes(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::size_bytes() const + + Returns the size of this string view, but in bytes, not code-points. + + You can use this function together with data() for hashing or serialization. + + This function is provided for STL compatibility. + + \sa size(), data() +*/ + +/*! + \fn int QStringView::length() const + \obsolete + Use size() instead, and port callers to qsizetype. + + Same as size(), except that it returns the result as an \c int. + + This function is provided for compatibility with other Qt containers. + + \warning QAnyStringView can represent strings with more than 2\sup{31} characters. + Calling this function on a string view for which size() returns a value greater + than \c{INT_MAX} constitutes undefined behavior. + + \sa size() +*/ + +/*! + \fn QChar QAnyStringView::front() const + + Returns the first character in the string. + + This function is provided for STL compatibility. + + \warning Calling this function on an empty string view constitutes + undefined behavior. + + \sa back(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QChar QAnyStringView::back() const + + Returns the last character in the string. + + This function is provided for STL compatibility. + + \warning Calling this function on an empty string view constitutes + undefined behavior. + + \sa front(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) + + Returns an integer that compares to zero as \a lhs compares to \a rhs. + + If \a cs is Qt::CaseSensitive (the default), the comparison is case sensitive; + otherwise the comparison is case-insensitive. + + \sa operator==(), operator<(), operator>() +*/ + +/*! + \fn template <typename QStringLike> qToAnyStringViewIgnoringNull(const QStringLike &s); + \since 6.0 + \internal + + Convert \a s to a QAnyStringView ignoring \c{s.isNull()}. + + Returns a string-view that references \a{s}'s data, but is never null. + + This is a faster way to convert a QString or QByteArray to a QAnyStringView, + if null QStrings or QByteArrays can legitimately be treated as empty ones. + + \sa QString::isNull(), QAnyStringView +*/ + +/*! + \fn QAnyStringView::toWCharArray(wchar_t *array) const + + Transcribes this string into the given \a array. + + The caller is responsible for ensuring \a array is large enough to hold the + \c wchar_t encoding of this string (allocating the array with space for size() + elements is always sufficient). The array is encoded in UTF-16 on + platforms where \c wchar_t is 2 bytes wide (e.g. Windows); otherwise (Unix + systems), \c wchar_t is assumed to be 4 bytes wide and the data is written + in UCS-4. + + \note This function writes no null terminator to the end of \a array. + + Returns the number of \c wchar_t entries written to \a array. + + \sa QString::toWCharArray() +*/ diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 068247adb4..bf37f2e675 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -1186,8 +1186,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, size_t l) return 0; } -template <typename Number> -constexpr int lencmp(Number lhs, Number rhs) noexcept +constexpr int lencmp(qsizetype lhs, qsizetype rhs) noexcept { return lhs == rhs ? 0 : lhs > rhs ? 1 : @@ -1283,6 +1282,49 @@ static int qt_compare_strings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens return r ? r : lencmp(lhs.size(), rhs.size()); } +static int qt_compare_strings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept +{ + if (cs == Qt::CaseSensitive) + return QUtf8::compareUtf8(lhs.data(), lhs.size(), rhs.data(), rhs.size()); + else + return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +} + +static int qt_compare_strings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept +{ + return -qt_compare_strings(rhs, lhs, cs); +} + +static int qt_compare_strings(QLatin1String lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept +{ + return qt_compare_strings(lhs, rhs.toString(), cs); // ### optimize! +} + +static int qt_compare_strings(QBasicUtf8StringView<false> lhs, QLatin1String rhs, Qt::CaseSensitivity cs) noexcept +{ + return -qt_compare_strings(rhs, lhs, cs); +} + +static int qt_compare_strings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept +{ + if (lhs.isEmpty()) + return lencmp(0, rhs.size()); + if (cs == Qt::CaseInsensitive) + return qt_compare_strings(lhs.toString(), rhs.toString(), cs); // ### optimize! + const auto l = std::min(lhs.size(), rhs.size()); + int r = qstrncmp(lhs.data(), rhs.data(), l); + return r ? r : lencmp(lhs.size(), rhs.size()); +} + +int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept +{ + return lhs.visit([rhs, cs](auto lhs) { + return rhs.visit([lhs, cs](auto rhs) { + return qt_compare_strings(lhs, rhs, cs); + }); + }); +} + /*! \relates QStringView \internal @@ -1329,19 +1371,19 @@ int QtPrivate::compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensit /*! \relates QStringView \internal - \since 5.10 + \since 6.0 \overload +*/ +int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept +{ + return qt_compare_strings(lhs, rhs, cs); +} - Returns an integer that compares to 0 as \a lhs compares to \a rhs. - - If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive; - otherwise the comparison is case-insensitive. - - Case-sensitive comparison is based exclusively on the numeric Unicode values - of the characters and is very fast, but is not what a human would expect. - Consider sorting user-visible strings with QString::localeAwareCompare(). - - \sa {Comparing Strings} +/*! + \relates QStringView + \internal + \since 5.10 + \overload */ int QtPrivate::compareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept { @@ -1375,23 +1417,43 @@ int QtPrivate::compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens \internal \since 6.0 \overload +*/ +int QtPrivate::compareStrings(QLatin1String lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept +{ + return qt_compare_strings(lhs, rhs, cs); +} - Returns an integer that compares to 0 as \a lhs compares to \a rhs. +/*! + \relates QStringView + \internal + \since 6.0 + \overload +*/ +int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept +{ + return qt_compare_strings(lhs, rhs, cs); +} - If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive; - otherwise the comparison is case-insensitive. +/*! + \relates QStringView + \internal + \since 6.0 + \overload +*/ +int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1String rhs, Qt::CaseSensitivity cs) noexcept +{ + return qt_compare_strings(lhs, rhs, cs); +} - Case-sensitive comparison is based exclusively on the numeric values of the - decoded Unicode code points and is very fast, but is not what a human would - expect. Consider sorting user-visible strings with - QString::localeAwareCompare(). +/*! + \relates QStringView + \internal + \since 6.0 + \overload */ -int QtPrivate::compareStringsUtf8(const char *u8str, qsizetype u8len, QStringView rhs, Qt::CaseSensitivity cs) noexcept +int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept { - if (cs == Qt::CaseSensitive) - return QUtf8::compareUtf8(u8str, u8len, rhs.data(), rhs.size()); - else - return ucstricmp8(u8str, u8str + u8len, rhs.begin(), rhs.end()); + return qt_compare_strings(lhs, rhs, cs); } #define REHASH(a) \ @@ -4797,6 +4859,21 @@ QByteArray QString::toLatin1_helper(const QString &string) } /*! + \since 6.0 + \internal + \relates QAnyStringView + + Returns a UTF-16 representation of \a string as a QString. + + \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(), + QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4() +*/ +QString QtPrivate::convertToQString(QAnyStringView string) +{ + return string.visit([] (auto string) { return string.toString(); }); +} + +/*! \since 5.10 \internal \relates QStringView diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h index 7fd5f86e25..7daf86a366 100644 --- a/src/corelib/text/qstring.h +++ b/src/corelib/text/qstring.h @@ -53,7 +53,7 @@ #include <QtCore/qnamespace.h> #include <QtCore/qstringliteral.h> #include <QtCore/qstringalgorithms.h> -#include <QtCore/qstringview.h> +#include <QtCore/qanystringview.h> #include <QtCore/qstringtokenizer.h> #include <string> @@ -81,6 +81,12 @@ namespace QtPrivate { template <bool...B> class BoolList; } +// QStringAlgorithms inline functions: + +int QtPrivate::compareStringsUtf8(const char *lhs, qsizetype lhss, QStringView rhs, Qt::CaseSensitivity cs) noexcept +{ return compareStrings(QUtf8StringView(lhs, lhss), rhs, cs); } + + class QLatin1String { public: @@ -282,6 +288,44 @@ bool QStringView::contains(QLatin1String s, Qt::CaseSensitivity cs) const noexce qsizetype QStringView::lastIndexOf(QLatin1String s, qsizetype from, Qt::CaseSensitivity cs) const noexcept { return QtPrivate::lastIndexOf(*this, from, s, cs); } +// +// QAnyStringView members that require QLatin1String +// + +constexpr QAnyStringView::QAnyStringView(QLatin1String str) noexcept + : m_data{str.data()}, m_size{size_t(str.size()) | Tag::Latin1} {} + +constexpr QLatin1String QAnyStringView::asLatin1StringView() const +{ + Q_ASSERT(isLatin1()); + return QLatin1String{m_data_utf8, int(size())}; +} + +template <typename Visitor> +constexpr decltype(auto) QAnyStringView::visit(Visitor &&v) const +{ + if (isUtf16()) + return std::forward<Visitor>(v)(asStringView()); + else if (isLatin1()) + return std::forward<Visitor>(v)(asLatin1StringView()); + else + return std::forward<Visitor>(v)(asUtf8StringView()); +} + +// +// QAnyStringView members that require QAnyStringView::visit() +// + +constexpr QChar QAnyStringView::front() const +{ + return visit([] (auto that) { return QAnyStringView::toQChar(that.front()); }); +} +constexpr QChar QAnyStringView::back() const +{ + return visit([] (auto that) { return QAnyStringView::toQChar(that.back()); }); +} + + class Q_CORE_EXPORT QString { typedef QTypedArrayData<char16_t> Data; @@ -976,6 +1020,7 @@ QString QLatin1String::toString() const { return *this; } // // QStringView inline members that require QString: // + QString QStringView::toString() const { return Q_ASSERT(size() == length()), QString(data(), length()); } @@ -997,6 +1042,29 @@ ushort QStringView::toUShort(bool *ok, int base) const { return QString::toIntegral_helper<ushort>(*this, ok, base); } // +// QUtf8StringView inline members that require QString: +// + +template <bool UseChar8T> +QString QBasicUtf8StringView<UseChar8T>::toString() const +{ + Q_ASSERT(size() == int(size())); + return QString::fromUtf8(data(), int(size())); +} + +// +// QAnyStringView inline members that require QString: +// + +QAnyStringView::QAnyStringView(const QByteArray &str) noexcept + : QAnyStringView{str.isNull() ? nullptr : str.data(), str.size()} {} +QAnyStringView::QAnyStringView(const QString &str) noexcept + : QAnyStringView{str.isNull() ? nullptr : str.data(), str.size()} {} + +QString QAnyStringView::toString() const +{ return QtPrivate::convertToQString(*this); } + +// // QString inline members // inline QString::QString(QLatin1String aLatin1) : d(fromLatin1_helper(aLatin1.latin1(), aLatin1.size())) @@ -1383,6 +1451,7 @@ inline bool operator> (QLatin1String lhs, QChar rhs) noexcept { return rhs < inline bool operator<=(QLatin1String lhs, QChar rhs) noexcept { return !(rhs < lhs); } inline bool operator>=(QLatin1String lhs, QChar rhs) noexcept { return !(rhs > lhs); } +#if 0 // QStringView <> QStringView inline bool operator==(QStringView lhs, QStringView rhs) noexcept { return lhs.size() == rhs.size() && QtPrivate::compareStrings(lhs, rhs) == 0; } inline bool operator!=(QStringView lhs, QStringView rhs) noexcept { return !(lhs == rhs); } @@ -1390,6 +1459,7 @@ inline bool operator< (QStringView lhs, QStringView rhs) noexcept { return QtPri inline bool operator<=(QStringView lhs, QStringView rhs) noexcept { return QtPrivate::compareStrings(lhs, rhs) <= 0; } inline bool operator> (QStringView lhs, QStringView rhs) noexcept { return QtPrivate::compareStrings(lhs, rhs) > 0; } inline bool operator>=(QStringView lhs, QStringView rhs) noexcept { return QtPrivate::compareStrings(lhs, rhs) >= 0; } +#endif // QStringView <> QChar inline bool operator==(QStringView lhs, QChar rhs) noexcept { return lhs == QStringView(&rhs, 1); } diff --git a/src/corelib/text/qstringalgorithms.h b/src/corelib/text/qstringalgorithms.h index 28c0760b61..c0619ca8e5 100644 --- a/src/corelib/text/qstringalgorithms.h +++ b/src/corelib/text/qstringalgorithms.h @@ -48,9 +48,27 @@ QT_BEGIN_NAMESPACE +#ifdef __cpp_char8_t +# define QT_BEGIN_HAS_CHAR8_T_NAMESPACE inline namespace q_has_char8_t { +# define QT_BEGIN_NO_CHAR8_T_NAMESPACE namespace q_no_char8_t { +#else +# define QT_BEGIN_HAS_CHAR8_T_NAMESPACE namespace q_has_char8_t { +# define QT_BEGIN_NO_CHAR8_T_NAMESPACE inline namespace q_no_char8_t { +#endif +#define QT_END_HAS_CHAR8_T_NAMESPACE } +#define QT_END_NO_CHAR8_T_NAMESPACE } + +// declare namespaces: +QT_BEGIN_HAS_CHAR8_T_NAMESPACE +QT_END_HAS_CHAR8_T_NAMESPACE +QT_BEGIN_NO_CHAR8_T_NAMESPACE +QT_END_NO_CHAR8_T_NAMESPACE + class QByteArray; class QLatin1String; class QStringView; +template <bool> class QBasicUtf8StringView; +class QAnyStringView; class QChar; namespace QtPrivate { @@ -60,10 +78,15 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t *qustrchr(QS Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; +Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; -Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStringsUtf8(const char *, qsizetype, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; +Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; +Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; +Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QBasicUtf8StringView<false> lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; +Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; +Q_DECL_DEPRECATED inline int compareStringsUtf8(const char *lhs, qsizetype lhss, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; @@ -91,6 +114,8 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION QLatin1String trimmed(QLati Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION qsizetype count(QStringView haystack, QChar needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION qsizetype count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; +Q_REQUIRED_RESULT Q_CORE_EXPORT QString convertToQString(QAnyStringView s); + Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToLatin1(QStringView str); Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToUtf8(QStringView str); Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToLocal8Bit(QStringView str); diff --git a/src/corelib/text/qstringbuilder.h b/src/corelib/text/qstringbuilder.h index 940eb9f4f7..d488b962f0 100644 --- a/src/corelib/text/qstringbuilder.h +++ b/src/corelib/text/qstringbuilder.h @@ -466,6 +466,14 @@ QString &operator+=(QString &a, const QStringBuilder<A, B> &b) return a; } +// +// inline QAnyStringView members requiring QStringBuilder: +// + +template <typename A, typename B> +QAnyStringView::QAnyStringView(const QStringBuilder<A, B> &expr, + typename QStringBuilder<A, B>::ConvertTo &&capacity) + : QAnyStringView(capacity = expr) {} QT_END_NAMESPACE diff --git a/src/corelib/text/qutf8stringview.h b/src/corelib/text/qutf8stringview.h new file mode 100644 index 0000000000..a197d14a63 --- /dev/null +++ b/src/corelib/text/qutf8stringview.h @@ -0,0 +1,329 @@ +/**************************************************************************** +** +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#ifndef QUTF8STRINGVIEW_H +#define QUTF8STRINGVIEW_H + +#include <QtCore/qstringalgorithms.h> +#include <QtCore/qarraydata.h> // for QContainerImplHelper + +#include <string> + +QT_BEGIN_NAMESPACE + +template <bool> class QBasicUtf8StringView; +class QByteArray; +class QLatin1String; + +namespace QtPrivate { +template <typename Char> +using IsCompatibleChar8TypeHelper = std::disjunction< +#ifdef __cpp_char8_t + std::is_same<Char, char8_t>, +#endif + std::is_same<Char, char>, + std::is_same<Char, uchar>, + std::is_same<Char, signed char> + >; +template <typename Char> +using IsCompatibleChar8Type + = IsCompatibleChar8TypeHelper<std::remove_cv_t<std::remove_reference_t<Char>>>; + +template <typename Pointer> +struct IsCompatiblePointer8Helper : std::false_type {}; +template <typename Char> +struct IsCompatiblePointer8Helper<Char*> + : IsCompatibleChar8Type<Char> {}; +template <typename Pointer> +using IsCompatiblePointer8 + = IsCompatiblePointer8Helper<std::remove_cv_t<std::remove_reference_t<Pointer>>>; + +template <typename T, typename Enable = void> +struct IsContainerCompatibleWithQUtf8StringView : std::false_type {}; + +template <typename T> +struct IsContainerCompatibleWithQUtf8StringView<T, std::enable_if_t<std::conjunction_v< + // lacking concepts and ranges, we accept any T whose std::data yields a suitable pointer ... + IsCompatiblePointer8<decltype(std::data(std::declval<const T &>()))>, + // ... and that has a suitable size ... + std::is_convertible< + decltype(std::size(std::declval<const T &>())), + qsizetype + >, + // ... and it's a range as it defines an iterator-like API + IsCompatibleChar8Type<typename std::iterator_traits< + decltype(std::begin(std::declval<const T &>()))>::value_type + >, + std::is_convertible< + decltype( std::begin(std::declval<const T &>()) != std::end(std::declval<const T &>()) ), + bool + >, + + // This needs to be treated specially due to the empty vs null distinction + std::negation<std::is_same<std::decay_t<T>, QByteArray>>, + + // This has a compatible value_type, but explicitly a different encoding + std::negation<std::is_same<std::decay_t<T>, QLatin1String>>, + + // Don't make an accidental copy constructor + std::negation<std::disjunction< + std::is_same<std::decay_t<T>, QBasicUtf8StringView<true>>, + std::is_same<std::decay_t<T>, QBasicUtf8StringView<false>> + >> + >>> : std::true_type {}; + +struct hide_char8_t { +#ifdef __cpp_char8_t + using type = char8_t; +#endif +}; + +struct wrap_char { using type = char; }; + +} // namespace QtPrivate + +template <bool UseChar8T> +class QBasicUtf8StringView +{ +public: + using storage_type = typename std::conditional<UseChar8T, + QtPrivate::hide_char8_t, + QtPrivate::wrap_char + >::type::type; + typedef const storage_type value_type; + typedef qptrdiff difference_type; + typedef qsizetype size_type; + typedef value_type &reference; + typedef value_type &const_reference; + typedef value_type *pointer; + typedef value_type *const_pointer; + + typedef pointer iterator; + typedef const_pointer const_iterator; + typedef std::reverse_iterator<iterator> reverse_iterator; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + +private: + template <typename Char> + using if_compatible_char = std::enable_if_t<QtPrivate::IsCompatibleChar8Type<Char>::value, bool>; + + template <typename Pointer> + using if_compatible_pointer = std::enable_if_t<QtPrivate::IsCompatiblePointer8<Pointer>::value, bool>; + + template <typename T> + using if_compatible_qstring_like = std::enable_if_t<std::is_same_v<T, QByteArray>, bool>; + + template <typename T> + using if_compatible_container = std::enable_if_t<QtPrivate::IsContainerCompatibleWithQUtf8StringView<T>::value, bool>; + + template <typename Container> + static constexpr qsizetype lengthHelperContainer(const Container &c) noexcept + { + return qsizetype(std::size(c)); + } + + // Note: Do not replace with std::size(const Char (&)[N]), cause the result + // will be of by one. + template <typename Char, size_t N> + static constexpr qsizetype lengthHelperContainer(const Char (&)[N]) noexcept + { + return qsizetype(N - 1); + } + + template <typename Char> + static const storage_type *castHelper(const Char *str) noexcept + { return reinterpret_cast<const storage_type*>(str); } + static constexpr const storage_type *castHelper(const storage_type *str) noexcept + { return str; } + +public: + constexpr QBasicUtf8StringView() noexcept + : m_data(nullptr), m_size(0) {} + constexpr QBasicUtf8StringView(std::nullptr_t) noexcept + : QBasicUtf8StringView() {} + + template <typename Char, if_compatible_char<Char> = true> + constexpr QBasicUtf8StringView(const Char *str, qsizetype len) + : m_data(castHelper(str)), + m_size((Q_ASSERT(len >= 0), Q_ASSERT(str || !len), len)) {} + + template <typename Char, if_compatible_char<Char> = true> + constexpr QBasicUtf8StringView(const Char *f, const Char *l) + : QBasicUtf8StringView(f, l - f) {} + +#ifdef Q_CLANG_QDOC + template <typename Char, size_t N> + constexpr QBasicUtf8StringView(const Char (&array)[N]) noexcept; + + template <typename Char> + constexpr QBasicUtf8StringView(const Char *str) noexcept; +#else + template <typename Pointer, if_compatible_pointer<Pointer> = true> + constexpr QBasicUtf8StringView(const Pointer &str) noexcept + : QBasicUtf8StringView(str, + str ? std::char_traits<std::remove_cv_t<std::remove_pointer_t<Pointer>>>::length(str) : 0) {} +#endif + +#ifdef Q_CLANG_QDOC + QBasicUtf8StringView(const QByteArray &str) noexcept; +#else + template <typename String, if_compatible_qstring_like<String> = true> + QBasicUtf8StringView(const String &str) noexcept + : QBasicUtf8StringView(str.isNull() ? nullptr : str.data(), qsizetype(str.size())) {} +#endif + + template <typename Container, if_compatible_container<Container> = true> + constexpr QBasicUtf8StringView(const Container &c) noexcept + : QBasicUtf8StringView(std::data(c), lengthHelperContainer(c)) {} + +#ifdef __cpp_char8_t + constexpr QBasicUtf8StringView(QBasicUtf8StringView<!UseChar8T> other) + : QBasicUtf8StringView(other.data(), other.size()) {} +#endif + + [[nodiscard]] inline QString toString() const; // defined in qstring.h + + [[nodiscard]] constexpr qsizetype size() const noexcept { return m_size; } + [[nodiscard]] const_pointer data() const noexcept { return reinterpret_cast<const_pointer>(m_data); } +#ifdef __cpp_char8_t + [[nodiscard]] const char8_t *utf8() const noexcept { return reinterpret_cast<const char8_t*>(m_data); } +#endif + + [[nodiscard]] constexpr storage_type operator[](qsizetype n) const + { return Q_ASSERT(n >= 0), Q_ASSERT(n < size()), m_data[n]; } + + // + // QString API + // + + [[nodiscard]] constexpr storage_type at(qsizetype n) const { return (*this)[n]; } + + [[nodiscard]] + constexpr QBasicUtf8StringView mid(qsizetype pos, qsizetype n = -1) const + { + using namespace QtPrivate; + auto result = QContainerImplHelper::mid(size(), &pos, &n); + return result == QContainerImplHelper::Null ? QBasicUtf8StringView() : QBasicUtf8StringView(m_data + pos, n); + } + [[nodiscard]] + constexpr QBasicUtf8StringView left(qsizetype n) const + { + if (size_t(n) >= size_t(size())) + n = size(); + return QBasicUtf8StringView(m_data, n); + } + [[nodiscard]] + constexpr QBasicUtf8StringView right(qsizetype n) const + { + if (size_t(n) >= size_t(size())) + n = size(); + return QBasicUtf8StringView(m_data + m_size - n, n); + } + + [[nodiscard]] constexpr QBasicUtf8StringView sliced(qsizetype pos) const + { verify(pos); return QBasicUtf8StringView{m_data + pos, m_size - pos}; } + [[nodiscard]] constexpr QBasicUtf8StringView sliced(qsizetype pos, qsizetype n) const + { verify(pos, n); return QBasicUtf8StringView(m_data + pos, n); } + [[nodiscard]] constexpr QBasicUtf8StringView first(qsizetype n) const + { verify(n); return QBasicUtf8StringView(m_data, n); } + [[nodiscard]] constexpr QBasicUtf8StringView last(qsizetype n) const + { verify(n); return QBasicUtf8StringView(m_data + m_size - n, n); } + [[nodiscard]] constexpr QBasicUtf8StringView chopped(qsizetype n) const + { verify(n); return QBasicUtf8StringView(m_data, m_size - n); } + + constexpr void truncate(qsizetype n) + { verify(n); m_size = n; } + constexpr void chop(qsizetype n) + { verify(n); m_size -= n; } + + // + // STL compatibility API: + // + [[nodiscard]] const_iterator begin() const noexcept { return data(); } + [[nodiscard]] const_iterator end() const noexcept { return data() + size(); } + [[nodiscard]] const_iterator cbegin() const noexcept { return begin(); } + [[nodiscard]] const_iterator cend() const noexcept { return end(); } + [[nodiscard]] const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } + [[nodiscard]] const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + [[nodiscard]] const_reverse_iterator crbegin() const noexcept { return rbegin(); } + [[nodiscard]] const_reverse_iterator crend() const noexcept { return rend(); } + + [[nodiscard]] constexpr bool empty() const noexcept { return size() == 0; } + [[nodiscard]] constexpr storage_type front() const { return Q_ASSERT(!empty()), m_data[0]; } + [[nodiscard]] constexpr storage_type back() const { return Q_ASSERT(!empty()), m_data[m_size - 1]; } + + // + // Qt compatibility API: + // + [[nodiscard]] constexpr bool isNull() const noexcept { return !m_data; } + [[nodiscard]] constexpr bool isEmpty() const noexcept { return empty(); } +#if QT_DEPRECATED_SINCE(6, 0) + [[nodiscard]] + Q_DECL_DEPRECATED_X("Use size() and port callers to qsizetype.") + constexpr int length() const /* not nothrow! */ + { return Q_ASSERT(int(size()) == size()), int(size()); } +#endif +private: + Q_ALWAYS_INLINE constexpr void verify(qsizetype pos, qsizetype n = 0) const + { + Q_ASSERT(pos >= 0); + Q_ASSERT(pos <= size()); + Q_ASSERT(n >= 0); + Q_ASSERT(n <= size() - pos); + } + const storage_type *m_data; + qsizetype m_size; +}; +template <bool UseChar8T> +Q_DECLARE_TYPEINFO_BODY(QBasicUtf8StringView<UseChar8T>, Q_PRIMITIVE_TYPE); + +QT_BEGIN_NO_CHAR8_T_NAMESPACE +using QUtf8StringView = QBasicUtf8StringView<false>; +QT_END_NO_CHAR8_T_NAMESPACE + +QT_BEGIN_HAS_CHAR8_T_NAMESPACE +using QUtf8StringView = QBasicUtf8StringView<true>; +QT_END_HAS_CHAR8_T_NAMESPACE + +template <typename QStringLike, std::enable_if_t<std::is_same_v<QStringLike, QByteArray>, bool> = true> +[[nodiscard]] inline q_no_char8_t::QUtf8StringView qToUtf8StringViewIgnoringNull(const QStringLike &s) noexcept +{ return q_no_char8_t::QUtf8StringView(s.data(), s.size()); } + +QT_END_NAMESPACE + +#endif /* QUTF8STRINGVIEW_H */ diff --git a/src/corelib/text/qutf8stringview.qdoc b/src/corelib/text/qutf8stringview.qdoc new file mode 100644 index 0000000000..9c0c620d88 --- /dev/null +++ b/src/corelib/text/qutf8stringview.qdoc @@ -0,0 +1,720 @@ +/**************************************************************************** +** +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the documentation of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:FDL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Free Documentation License Usage +** Alternatively, this file may be used under the terms of the GNU Free +** Documentation License version 1.3 as published by the Free Software +** Foundation and appearing in the file included in the packaging of +** this file. Please review the following information to ensure +** the GNU Free Documentation License version 1.3 requirements +** will be met: https://www.gnu.org/licenses/fdl-1.3.html. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +/*! + \class QUtf8StringView + \inmodule QtCore + \since 6.0 + \brief The QUtf8StringView class provides a unified view on UTF-8 strings + with a read-only subset of the QString API. + \reentrant + \ingroup tools + \ingroup string-processing + + A QUtf8StringView references a contiguous portion of a UTF-8 + string it does not own. It acts as an interface type to all kinds + of UTF-8 string, without the need to construct a QString or + QByteArray first. + + The UTF-8 string may be represented as an array (or an + array-compatible data-structure such as std::basic_string, etc.) + of \c char8_t, \c char, \c{signed char} or \c{unsigned char}. + + QUtf8StringView is designed as an interface type; its main + use-case is as a function parameter type. When QUtf8StringViews + are used as automatic variables or data members, care must be + taken to ensure that the referenced string data (for example, + owned by a std::u8string) outlives the QUtf8StringView on all code + paths, lest the string view ends up referencing deleted data. + + When used as an interface type, QUtf8StringView allows a single + function to accept a wide variety of UTF-8 string data + sources. One function accepting QUtf8StringView thus replaces + several function overloads (taking e.g. QByteArray), while at the + same time enabling even more string data sources to be passed to + the function, such as \c{u8"Hello World"}, a \c char8_t (C++20) or + \c char (C++17) string literal. The \c char8_t incompatibility + between C++17 and C++20 goes away when using QUtf8StringView. + + Like all views, QUtf8StringViews should be passed by value, not by + reference-to-const: + \snippet code/src_corelib_text_qutf8stringview.cpp 0 + + If you want to give your users maximum freedom in what strings + they can pass to your function, consider using QAnyStringView + instead. + + QUtf8StringView can also be used as the return value of a + function. If you call a function returning QUtf8StringView, take + extra care to not keep the QUtf8StringView around longer than the + function promises to keep the referenced string data alive. If in + doubt, obtain a strong reference to the data by calling toString() + to convert the QUtf8StringView into a QString. + + QUtf8StringView is a \e{Literal Type}. + + \section Compatible Character Types + + QUtf8StringView accepts strings over a variety of character types: + + \list + \li \c char (both signed and unsigned) + \li \c char8_t (C++20 only) + \endlist + + \section Sizes and Sub-Strings + + All sizes and postions in QUtf8StringView functions are in + UTF-8 code points (that is, UTF-8 multibyte sequences count as + two, three or four, depending on their length). QUtf8StringView + does not an attempt to detect or prevent slicing right through + UTF-8 multibyte sequences. This is similar to the situation with + QStringView and surrogate pairs. + + \section C++20, char8_t, and QUtf8StringView + + In C++20, \c{u8""} string literals changed their type from + \c{const char[]} to \c{const char8_t[]}. If Qt 6 could have depended + on C++20, QUtf8StringView would store \c char8_t natively, and the + following functions and aliases would use (pointers to) \c char8_t: + + \list + \li storage_type, value_type, etc + \li begin(), end(), data(), etc + \li front(), back(), at(), operator[]() + \endlist + + This is what QUtf8StringView is expected to look like in Qt 7, but for + Qt 6, this was not possible. Instead of locking users into a C++17-era + interface for the next decade, Qt provides two QUtf8StringView classes, + in different (inline) namespaces. The first, in namespace \c{q_no_char8_t}, + has a value_type of \c{const char} and is universally available. + The second, in namespace \c{q_has_char8_t}, has a value_type of + \c{const char8_t} and is only available when compiling in C++20 mode. + + In C++17 mode, \c{q_no_char8_t} is an inline namespace, in C++20 it's + \c{q_has_char8_t}. This means that the name "QUtf8StringView" (without + explicit namespace) will denote different types in C++17 and C++20 modes. + + Internally, both are instantiations of the same template class, + QBasicUtf8StringView. Please do not use the template class's name in your + source code. + + All Qt APIs use \c{q_no_char8_t::QUtf8StringView} due to binary compatibility, + but these APIs accept \c{q_has_char8_t::QUtf8StringView} as well, since the + latter implicitly converts into the former, and vice versa. + + In your own code, please use only \c QUtf8StringView and/or + \c{q_no_char8_t::QUtf8StringView}: + + \list + \li If you only target C++20, then use "QUtf8StringView". It will be an alias + for \c{q_has_char8_t::QUtf8StringView} and you'll never look back. + \li If you only target C++17, then use "QUtf8StringView". It will be an alias + for \c{q_no_char8_t::QUtf8StringView} and for the time being, you're ok. + \li If you target both C++17 and C++20, then you have a choice to make: + \list + \li If you don't mind the source-incompatibility of return values of + QUtf8StringView::data() etc changing when compiling under C++17 or C++20, + use "QUtf8StringView". You will need to write your code in such a way that + it adapts to the differences in the QUtf8StringView API in different C++ + versions. + \li If you don't want to deal with the above source-incompatibilities, or if + you need to maintain binary compatibility between C++20 and C++17 builds, + use "q_no_char8_t::QUtf8StringView" explicitly. Be aware that the + \c{q_no_char8_t} version will disappear in Qt 7. + \endlist + \endlist + + Taken together: Just use QUtf8StringView unless you know what you're doing. + + \sa QAnyStringView, QUtf8StringView, QString +*/ + +/*! + \typedef QUtf8StringView::storage_type + + Alias for \c{char}. +*/ + +/*! + \typedef QUtf8StringView::value_type + + Alias for \c{const char}. Provided for compatibility with the STL. +*/ + +/*! + \typedef QUtf8StringView::difference_type + + Alias for \c{std::ptrdiff_t}. Provided for compatibility with the STL. +*/ + +/*! + \typedef QUtf8StringView::size_type + + Alias for qsizetype. Provided for compatibility with the STL. +*/ + +/*! + \typedef QUtf8StringView::reference + + Alias for \c{value_type &}. Provided for compatibility with the STL. + + QUtf8StringView does not support mutable references, so this is the same + as const_reference. +*/ + +/*! + \typedef QUtf8StringView::const_reference + + Alias for \c{value_type &}. Provided for compatibility with the STL. +*/ + +/*! + \typedef QUtf8StringView::pointer + + Alias for \c{value_type *}. Provided for compatibility with the STL. + + QUtf8StringView does not support mutable pointers, so this is the same + as const_pointer. +*/ + +/*! + \typedef QUtf8StringView::const_pointer + + Alias for \c{value_type *}. Provided for compatibility with the STL. +*/ + +/*! + \typedef QUtf8StringView::iterator + + This typedef provides an STL-style const iterator for QUtf8StringView. + + QUtf8StringView does not support mutable iterators, so this is the same + as const_iterator. + + \sa const_iterator, reverse_iterator +*/ + +/*! + \typedef QUtf8StringView::const_iterator + + This typedef provides an STL-style const iterator for QUtf8StringView. + + \sa iterator, const_reverse_iterator +*/ + +/*! + \typedef QUtf8StringView::reverse_iterator + + This typedef provides an STL-style const reverse iterator for QUtf8StringView. + + QUtf8StringView does not support mutable reverse iterators, so this is the + same as const_reverse_iterator. + + \sa const_reverse_iterator, iterator +*/ + +/*! + \typedef QUtf8StringView::const_reverse_iterator + + This typedef provides an STL-style const reverse iterator for QUtf8StringView. + + \sa reverse_iterator, const_iterator +*/ + +/*! + \fn QUtf8StringView::QUtf8StringView() + + Constructs a null string view. + + \sa isNull() +*/ + +/*! + \fn QUtf8StringView::QUtf8StringView(std::nullptr_t) + + Constructs a null string view. + + \sa isNull() +*/ + +/*! + \fn template <typename Char> QUtf8StringView::QUtf8StringView(const Char *str, qsizetype len) + + Constructs a string view on \a str with length \a len. + + The range \c{[str,len)} must remain valid for the lifetime of this string view object. + + Passing \nullptr as \a str is safe if \a len is 0, too, and results in a null string view. + + The behavior is undefined if \a len is negative or, when positive, if \a str is \nullptr. + + This constructor only participates in overload resolution if \c Char is a compatible + character type. The compatible character types are: \c char8_t, \c char, \c{signed char} and + \c{unsigned char}. +*/ + +/*! + \fn template <typename Char> QUtf8StringView::QUtf8StringView(const Char *first, const Char *last) + + Constructs a string view on \a first with length (\a last - \a first). + + The range \c{[first,last)} must remain valid for the lifetime of + this string view object. + + Passing \c \nullptr as \a first is safe if \a last is \nullptr, too, + and results in a null string view. + + The behavior is undefined if \a last precedes \a first, or \a first + is \nullptr and \a last is not. + + This constructor only participates in overload resolution if \c Char is a compatible + character type. The compatible character types are: \c char8_t, \c char, \c{signed char} and + \c{unsigned char}. +*/ + +/*! + \fn template <typename Char> QUtf8StringView::QUtf8StringView(const Char *str) + + Constructs a string view on \a str. The length is determined + by scanning for the first \c{Char(0)}. + + \a str must remain valid for the lifetime of this string view object. + + Passing \nullptr as \a str is safe and results in a null string view. + + This constructor only participates in overload resolution if \a str + is not an array and if \c Char is a compatible character type. The + compatible character types are: \c char8_t, \c char, \c{signed char} and + \c{unsigned char}. +*/ + +/*! + \fn template <typename Char, size_t N> QUtf8StringView::QUtf8StringView(const Char (&string)[N]) + + Constructs a string view on the character string literal \a string. + The length is set to \c{N-1}, excluding the trailing \c{Char(0)}. + If you need the full array, use the constructor from pointer and + size instead: + + \snippet code/src_corelib_text_qutf8stringview.cpp 2 + + \a string must remain valid for the lifetime of this string view + object. + + This constructor only participates in overload resolution if \a str + is an actual array and if \c Char is a compatible character type. The + compatible character types are: \c char8_t, \c char, \c{signed char} and + \c{unsigned char}. +*/ + +/*! + \fn template <typename StdBasicString> QUtf8StringView::QUtf8StringView(const StdBasicString &str) + + Constructs a string view on \a str. The length is taken from \c{str.size()}. + + \c{str.data()} must remain valid for the lifetime of this string view object. + + This constructor only participates in overload resolution if \c StdBasicString is an + instantiation of \c std::basic_string with a compatible character type. The + compatible character types are: \c char8_t, \c char, \c{signed char} and + \c{unsigned char}. + + The string view will be empty if and only if \c{str.empty()}. It is unspecified + whether this constructor can result in a null string view (\c{str.data()} would + have to return \nullptr for this). + + \sa isNull(), isEmpty() +*/ + +/*! + \fn QString QUtf8StringView::toString() const + + Returns a deep copy of this string view's data as a QString. + + The return value will be a null QString if and only if this string view is null. +*/ + +/*! + \fn QUtf8StringView::data() const + + Returns a const pointer to the first code point in the string. + + \note The character array represented by the return value is \e not null-terminated. + + \sa begin(), end(), utf8() +*/ + +/*! + \fn QUtf8StringView::utf8() const + + Returns a const pointer to the first code point in the string. + + The result is returned as a \c{const char8_t*}, so this function is only available when + compiling in C++20 mode. + + \note The character array represented by the return value is \e not null-terminated. + + \sa begin(), end(), data() +*/ + +/*! + \fn QUtf8StringView::const_iterator QUtf8StringView::begin() const + + Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the first code point in + the string. + + This function is provided for STL compatibility. + + \sa end(), cbegin(), rbegin(), data() +*/ + +/*! + \fn QUtf8StringView::const_iterator QUtf8StringView::cbegin() const + + Same as begin(). + + This function is provided for STL compatibility. + + \sa cend(), begin(), crbegin(), data() +*/ + +/*! + \fn QUtf8StringView::const_iterator QUtf8StringView::end() const + + Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the imaginary + code point after the last code point in the list. + + This function is provided for STL compatibility. + + \sa begin(), cend(), rend() +*/ + +/*! \fn QUtf8StringView::const_iterator QUtf8StringView::cend() const + + Same as end(). + + This function is provided for STL compatibility. + + \sa cbegin(), end(), crend() +*/ + +/*! + \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::rbegin() const + + Returns a const \l{STL-style iterators}{STL-style} reverse iterator pointing to the first + code point in the string, in reverse order. + + This function is provided for STL compatibility. + + \sa rend(), crbegin(), begin() +*/ + +/*! + \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::crbegin() const + + Same as rbegin(). + + This function is provided for STL compatibility. + + \sa crend(), rbegin(), cbegin() +*/ + +/*! + \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::rend() const + + Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to one past + the last code point in the string, in reverse order. + + This function is provided for STL compatibility. + + \sa rbegin(), crend(), end() +*/ + +/*! + \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::crend() const + + Same as rend(). + + This function is provided for STL compatibility. + + \sa crbegin(), rend(), cend() +*/ + +/*! + \fn bool QUtf8StringView::empty() const + + Returns whether this string view is empty - that is, whether \c{size() == 0}. + + This function is provided for STL compatibility. + + \sa isEmpty(), isNull(), size(), length() +*/ + +/*! + \fn bool QUtf8StringView::isEmpty() const + + Returns whether this string view is empty - that is, whether \c{size() == 0}. + + This function is provided for compatibility with other Qt containers. + + \sa empty(), isNull(), size(), length() +*/ + +/*! + \fn bool QUtf8StringView::isNull() const + + Returns whether this string view is null - that is, whether \c{data() == nullptr}. + + This functions is provided for compatibility with other Qt containers. + + \sa empty(), isEmpty(), size(), length() +*/ + +/*! + \fn qsizetype QUtf8StringView::size() const + + Returns the size of this string view, in UTF-8 code points (that is, + multi-byte sequences count as more than one for the purposes of this function, the same + as surrogate pairs in QString and QStringView). + + \sa empty(), isEmpty(), isNull(), length() +*/ + +/*! + \fn int QUtf8StringView::length() const + \obsolete + Use size() and port callers to qsizetype. + + Same as size(), except returns the result as an \c int. + + This function is provided for compatibility with other Qt containers. + + \warning QUtf8StringView can represent strings with more than 2\sup{31} code points. + Calling this function on a string view for which size() returns a value greater + than \c{INT_MAX} constitutes undefined behavior. + + \sa empty(), isEmpty(), isNull(), size() +*/ + +/*! + \fn QUtf8StringView::operator[](qsizetype n) const + + Returns the code point at position \a n in this string view. + + The behavior is undefined if \a n is negative or not less than size(). + + \sa at(), front(), back() +*/ + +/*! + \fn QUtf8StringView::at(qsizetype n) const + + Returns the code point at position \a n in this string view. + + The behavior is undefined if \a n is negative or not less than size(). + + \sa operator[](), front(), back() +*/ + +/*! + \fn QUtf8StringView::front() const + + Returns the first code point in the string. Same as first(). + + This function is provided for STL compatibility. + + \warning Calling this function on an empty string view constitutes + undefined behavior. + + \sa back() +*/ + +/*! + \fn QUtf8StringView::back() const + + Returns the last code point in the string. Same as last(). + + This function is provided for STL compatibility. + + \warning Calling this function on an empty string view constitutes + undefined behavior. + + \sa front() +*/ + +/*! + \fn QUtf8StringView::mid(qsizetype start, qsizetype length) const + + Returns the substring of length \a length starting at position + \a start in this object. + + \obsolete Use sliced() instead in new code. + + Returns an empty string view if \a start exceeds the + length of the string. If there are less than \a length code points + available in the string starting at \a start, or if + \a length is negative (default), the function returns all code points that + are available from \a start. + + \sa first(), last(), sliced(), chopped(), chop(), truncate() +*/ + +/*! + \fn QUtf8StringView::left(qsizetype length) const + + \obsolete Use first() instead in new code. + + Returns the substring of length \a length starting at position + 0 in this object. + + The entire string is returned if \a length is greater than or equal + to size(), or less than zero. + + \sa first(), last(), sliced(), startsWith(), chopped(), chop(), truncate() +*/ + +/*! + \fn QUtf8StringView::right(qsizetype length) const + + \obsolete Use last() instead in new code. + + Returns the substring of length \a length starting at position + size() - \a length in this object. + + The entire string is returned if \a length is greater than or equal + to size(), or less than zero. + + \sa first(), last(), sliced(), endsWith(), chopped(), chop(), truncate() +*/ + +/*! + \fn QUtf8StringView::first(qsizetype n) const + + Returns a string view that contains the first \a n code points + of this string. + + \note The behavior is undefined when \a n < 0 or \a n > size(). + + \sa last(), sliced(), startsWith(), chopped(), chop(), truncate() +*/ + +/*! + \fn QUtf8StringView::last(qsizetype n) const + + Returns a string view that contains the last \a n code points of this string. + + \note The behavior is undefined when \a n < 0 or \a n > size(). + + \sa first(), sliced(), endsWith(), chopped(), chop(), truncate() +*/ + +/*! + \fn QUtf8StringView::sliced(qsizetype pos, qsizetype n) const + + Returns a string view containing \a n code points of this string view, + starting at position \a pos. + + \note The behavior is undefined when \a pos < 0, \a n < 0, + or \a pos + \a n > size(). + + \sa first(), last(), chopped(), chop(), truncate() +*/ + +/*! + \fn QUtf8StringView::sliced(qsizetype pos) const + + Returns a string view starting at position \a pos in this object, + and extending to its end. + + \note The behavior is undefined when \a pos < 0 or \a pos > size(). + + \sa first(), last(), chopped(), chop(), truncate() +*/ + +/*! + \fn QUtf8StringView::chopped(qsizetype length) const + + Returns the substring of length size() - \a length starting at the + beginning of this object. + + Same as \c{first(size() - length)}. + + \note The behavior is undefined when \a length < 0 or \a length > size(). + + \sa sliced(), first(), last(), chop(), truncate() +*/ + +/*! + \fn QUtf8StringView::truncate(qsizetype length) + + Truncates this string view to \a length code points. + + Same as \c{*this = first(length)}. + + \note The behavior is undefined when \a length < 0 or \a length > size(). + + \sa sliced(), first(), last(), chopped(), chop() +*/ + +/*! + \fn QUtf8StringView::chop(qsizetype length) + + Truncates this string view by \a length code points. + + Same as \c{*this = first(size() - length)}. + + \note The behavior is undefined when \a length < 0 or \a length > size(). + + \sa sliced(), first(), last(), chopped(), truncate() +*/ + +/*! + \fn QUtf8StringView::trimmed() const + + Strips leading and trailing whitespace and returns the result. + + Whitespace means any code point for which QChar::isSpace() returns + \c true. This includes the ASCII characters '\\t', '\\n', '\\v', + '\\f', '\\r', and ' '. +*/ + +/*! + \fn template <typename QStringLike> qToUtf8StringViewIgnoringNull(const QStringLike &s); + \relates QUtf8StringView + \internal + + Convert \a s to a QUtf8StringView ignoring \c{s.isNull()}. + + Returns a string-view that references \a{s}'s data, but is never null. + + This is a faster way to convert a QByteArray to a QUtf8StringView, + if null QByteArrays can legitimately be treated as empty ones. + + \sa QByteArray::isNull(), QUtf8StringView +*/ diff --git a/src/corelib/text/text.pri b/src/corelib/text/text.pri index 16d585cac7..0704426f09 100644 --- a/src/corelib/text/text.pri +++ b/src/corelib/text/text.pri @@ -1,6 +1,7 @@ # Qt text / string / character / unicode / byte array module HEADERS += \ + text/qanystringview.h \ text/qbytearray.h \ text/qbytearray_p.h \ text/qbytearrayalgorithms.h \ @@ -30,8 +31,8 @@ HEADERS += \ text/qstringtokenizer.h \ text/qtextboundaryfinder.h \ text/qunicodetables_p.h \ - text/qunicodetools_p.h - + text/qunicodetools_p.h \ + text/qutf8stringview.h SOURCES += \ text/qbytearray.cpp \ |