diff options
Diffstat (limited to 'src/corelib/text/qanystringview.cpp')
-rw-r--r-- | src/corelib/text/qanystringview.cpp | 660 |
1 files changed, 660 insertions, 0 deletions
diff --git a/src/corelib/text/qanystringview.cpp b/src/corelib/text/qanystringview.cpp new file mode 100644 index 0000000000..4129257c02 --- /dev/null +++ b/src/corelib/text/qanystringview.cpp @@ -0,0 +1,660 @@ +// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +#include "qanystringview.h" +#include "qdebug.h" +#include "qttypetraits.h" + +QT_BEGIN_NAMESPACE + +/*! + \class QAnyStringView + \inmodule QtCore + \since 6.0 + \brief The QAnyStringView class provides a unified view on Latin-1, UTF-8, + or UTF-16 strings with a read-only subset of the QString API. + \reentrant + \ingroup tools + \ingroup string-processing + + \compares strong + \compareswith strong char16_t QChar {const char16_t *} {const char *} \ + QByteArray QByteArrayView QString QStringView QUtf8StringView \ + QLatin1StringView + \endcompareswith + + A QAnyStringView references a contiguous portion of a string it does + not own. It acts as an interface type to all kinds of strings, + without the need to construct a QString first. + + Unlike QStringView and QUtf8StringView, QAnyStringView can hold + strings of any of the following encodings: UTF-8, UTF-16, and + Latin-1. The latter is supported because Latin-1, unlike UTF-8, + can be efficiently compared to UTF-16 data: a length mismatch + already means the strings cannot be equal. This is not true for + UTF-8/UTF-16 comparisons, because UTF-8 is a variable-length + encoding. + + The string may be represented as an array (or an array-compatible + data-structure such as QString, std::basic_string, etc.) of \c + char, \c char8_t, QChar, \c ushort, \c char16_t or (on platforms, + such as Windows, where it is a 16-bit type) \c wchar_t. + + QAnyStringView is designed as an interface type; its main use-case + is as a function parameter type. When QAnyStringViews are used as + automatic variables or data members, care must be taken to ensure + that the referenced string data (for example, owned by a QString) + outlives the QAnyStringView on all code paths, lest the string + view ends up referencing deleted data. + + When used as an interface type, QAnyStringView allows a single + function to accept a wide variety of string data sources. One + function accepting QAnyStringView thus replaces five function + overloads (taking QString, \c{(const QChar*, qsizetype)}, + QUtf8StringView, QLatin1StringView (but see above), and QChar), while + at the same time enabling even more string data sources to be + passed to the function, such as \c{u8"Hello World"}, a \c char8_t + string literal. + + Like elsewhere in Qt, QAnyStringView assumes \c char data is encoded + in UTF-8, unless it is presented as a QLatin1StringView. + + Since Qt 6.4, however, UTF-8 string literals that are pure US-ASCII are + automatically stored as Latin-1. This is a compile-time check with no + runtime overhead. The feature requires compiling in C++20, or with a recent + GCC. + + QAnyStringViews should be passed by value, not by reference-to-const: + \snippet code/src_corelib_text_qanystringview.cpp 0 + + QAnyStringView can also be used as the return value of a function, + but this is not recommended. QUtf8StringView or QStringView are + better suited as function return values. If you call a function + returning QAnyStringView, take extra care to not keep the + QAnyStringView around longer than the function promises to keep + the referenced string data alive. If in doubt, obtain a strong + reference to the data by calling toString() to convert the + QAnyStringView into a QString. + + QAnyStringView is a \e{Literal Type}. + + \section2 Compatible Character Types + + QAnyStringView accepts strings over a variety of character types: + + \list + \li \c char (both signed and unsigned) + \li \c char8_t (C++20 only) + \li \c char16_t + \li \c wchar_t (where it's a 16-bit type, e.g. Windows) + \li \c ushort + \li \c QChar + \endlist + + The 8-bit character types are interpreted as UTF-8 data (except when + presented as a QLatin1StringView) while the 16-bit character types are + interpreted as UTF-16 data in host byte order (the same as QString). + + \section2 Sizes and Sub-Strings + + All sizes and positions in QAnyStringView functions are in the + encoding's code units (that is, UTF-16 surrogate pairs count as + two for the purposes of these functions, the same as in QString, + and UTF-8 multibyte sequences count as two, three or four, + depending on their length). + + \sa QUtf8StringView, QStringView +*/ + +/*! + \typedef QAnyStringView::difference_type + + Alias for \c{std::ptrdiff_t}. Provided for compatibility with the STL. +*/ + +/*! + \typedef QAnyStringView::size_type + + Alias for qsizetype. Provided for compatibility with the STL. +*/ + +/*! + \fn QAnyStringView::QAnyStringView() + + Constructs a null string view. + + \sa isNull() +*/ + +/*! + \fn QAnyStringView::QAnyStringView(std::nullptr_t) + + Constructs a null string view. + + \sa isNull() +*/ + +/*! + \fn template <typename Char, QAnyStringView::if_compatible_char<Char> = true> QAnyStringView::QAnyStringView(const Char *str, qsizetype len) + + Constructs a string view on \a str with length \a len. + + The range \c{[str,len)} must remain valid for the lifetime of this string view object. + + Passing \nullptr as \a str is safe if \a len is 0, too, and results in a null string view. + + The behavior is undefined if \a len is negative or, when positive, if \a str is \nullptr. + + This constructor only participates in overload resolution if \c Char is a compatible + character type. + + \sa isNull(), {Compatible Character Types} +*/ + +/*! + \fn template <typename Char, QAnyStringView::if_compatible_char<Char> = true> QAnyStringView::QAnyStringView(const Char *first, const Char *last) + + Constructs a string view on \a first with length (\a last - \a first). + + The range \c{[first,last)} must remain valid for the lifetime of + this string view object. + + Passing \nullptr as \a first is safe if \a last is \nullptr, too, + and results in a null string view. + + The behavior is undefined if \a last precedes \a first, or \a first + is \nullptr and \a last is not. + + This constructor only participates in overload resolution if \c Char + is a compatible character type. + + \sa isNull(), {Compatible Character Types} +*/ + +/*! + \fn template <typename Char> QAnyStringView::QAnyStringView(const Char *str) + + Constructs a string view on \a str. The length is determined + by scanning for the first \c{Char(0)}. + + \a str must remain valid for the lifetime of this string view object. + + Passing \nullptr as \a str is safe and results in a null string view. + + This constructor only participates in overload resolution if \a + str is not an array and if \c Char is a compatible character + type. + + \sa isNull(), {Compatible Character Types} +*/ + +/*! + \fn template <typename Char, size_t N> QAnyStringView::QAnyStringView(const Char (&string)[N]) + + Constructs a string view on the character string literal \a string. + The view covers the array until the first \c{Char(0)} is encountered, + or \c N, whichever comes first. + If you need the full array, use fromArray() instead. + + \a string must remain valid for the lifetime of this string view + object. + + This constructor only participates in overload resolution if \a + string is an actual array and \c Char is a compatible character + type. + + \sa {Compatible Character Types} +*/ + +/*! + \fn QAnyStringView::QAnyStringView(const QString &str) + + Constructs a string view on \a str. + + \c{str.data()} must remain valid for the lifetime of this string view object. + + The string view will be null if and only if \c{str.isNull()}. +*/ + +/*! + \fn QAnyStringView::QAnyStringView(const QByteArray &str) + + Constructs a string view on \a str. The data in \a str is interpreted as UTF-8. + + \c{str.data()} must remain valid for the lifetime of this string view object. + + The string view will be null if and only if \c{str.isNull()}. +*/ + +/*! + \fn template <typename Container, QAnyStringView::if_compatible_container<Container>> QAnyStringView::QAnyStringView(const Container &str) + + Constructs a string view on \a str. The length is taken from \c{std::size(str)}. + + \c{std::data(str)} must remain valid for the lifetime of this string view object. + + This constructor only participates in overload resolution if \c Container is a + container with a compatible character type as \c{value_type}. + + The string view will be empty if and only if \c{std::size(str) == 0}. It is unspecified + whether this constructor can result in a null string view (\c{std::data(str)} would + have to return \nullptr for this). + + \sa isNull(), isEmpty() +*/ + +/*! + \fn template <typename Char, size_t Size> static QAnyStringView fromArray(const Char (&string)[Size]) noexcept + + Constructs a string view on the full character string literal \a string, + including any trailing \c{Char(0)}. If you don't want the + null-terminator included in the view then you can use the constructor + overload taking a pointer and a size: + + \snippet code/src_corelib_text_qanystringview.cpp 2 + + Alternatively you can use the constructor overload taking an + array literal which will create a view up to, but not including, + the first null-terminator in the data. + + \a string must remain valid for the lifetime of this string view + object. + + This function will work with any array literal if \c Char is a + compatible character type. +*/ + +/*! + \fn QString QAnyStringView::toString() const + + Returns a deep copy of this string view's data as a QString. + + The return value will be a null QString if and only if this string view is null. +*/ + +/*! + \fn const void *QAnyStringView::data() const + + Returns a const pointer to the first character in the string view. + + \note The character array represented by the return value is \e not null-terminated. + + \sa size_bytes() +*/ + +/*! + \fn bool QAnyStringView::empty() const + + Returns whether this string view is empty - that is, whether \c{size() == 0}. + + This function is provided for STL compatibility. + + \sa isEmpty(), isNull(), size() +*/ + +/*! + \fn bool QAnyStringView::isEmpty() const + + Returns whether this string view is empty - that is, whether \c{size() == 0}. + + This function is provided for compatibility with other Qt containers. + + \sa empty(), isNull(), size() +*/ + +/*! + \fn bool QAnyStringView::isNull() const + + Returns whether this string view is null - that is, whether \c{data() == nullptr}. + + This functions is provided for compatibility with other Qt containers. + + \sa empty(), isEmpty(), size() +*/ + +/*! + \fn qsizetype QAnyStringView::size() const + + Returns the size of this string view, in the encoding's code points. + + \sa empty(), isEmpty(), isNull(), size_bytes(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::size_bytes() const + + Returns the size of this string view, but in bytes, not code-points. + + You can use this function together with data() for hashing or serialization. + + This function is provided for STL compatibility. + + \sa size(), data() +*/ + +/*! + \fn QAnyStringView::length() const + + Same as size(). + + This function is provided for compatibility with other Qt containers. + + \sa size() +*/ + +/*! + \fn QChar QAnyStringView::front() const + + Returns the first character in the string view. + + This function is provided for STL compatibility. + + \warning Calling this function on an empty string view constitutes + undefined behavior. + + \sa back(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QChar QAnyStringView::back() const + + Returns the last character in the string view. + + This function is provided for STL compatibility. + + \warning Calling this function on an empty string view constitutes + undefined behavior. + + \sa front(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::mid(qsizetype pos, qsizetype n) const + \since 6.5 + + Returns the substring of length \a n starting at position + \a pos in this object. + + \deprecated Use sliced() instead in new code. + + Returns an empty string view if \a n exceeds the + length of the string view. If there are less than \a n code points + available in the string view starting at \a pos, or if + \a n is negative (default), the function returns all code points that + are available from \a pos. + + \sa first(), last(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::left(qsizetype n) const + \since 6.5 + + \deprecated Use first() instead in new code. + + Returns the substring of length \a n starting at position + 0 in this object. + + The entire string view is returned if \a n is greater than or equal + to size(), or less than zero. + + \sa first(), last(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::right(qsizetype n) const + \since 6.5 + + \deprecated Use last() instead in new code. + + Returns the substring of length \a n starting at position + size() - \a n in this object. + + The entire string view is returned if \a n is greater than or equal + to size(), or less than zero. + + \sa first(), last(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::first(qsizetype n) const + \since 6.5 + + Returns a string view that contains the first \a n code points + of this string view. + + \note The behavior is undefined when \a n < 0 or \a n > size(). + + \sa last(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::last(qsizetype n) const + \since 6.5 + + Returns a string view that contains the last \a n code points of this string view. + + \note The behavior is undefined when \a n < 0 or \a n > size(). + + \sa first(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::sliced(qsizetype pos, qsizetype n) const + \since 6.5 + + Returns a string view containing \a n code points of this string view, + starting at position \a pos. + +//! [UB-sliced-index-length] + \note The behavior is undefined when \a pos < 0, \a n < 0, + or \a pos + \a n > size(). +//! [UB-sliced-index-length] + + \sa first(), last(), chopped(), chop(), truncate(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::sliced(qsizetype pos) const + \since 6.5 + + Returns a string view starting at position \a pos in this object, + and extending to its end. + +//! [UB-sliced-index-only] + \note The behavior is undefined when \a pos < 0 or \a pos > size(). +//! [UB-sliced-index-only] + + \sa first(), last(), chopped(), chop(), truncate(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::chopped(qsizetype n) const + \since 6.5 + + Returns the substring of length size() - \a n starting at the + beginning of this object. + + Same as \c{first(size() - n)}. + + \note The behavior is undefined when \a n < 0 or \a n > size(). + + \sa sliced(), first(), last(), chop(), truncate(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::truncate(qsizetype n) + \since 6.5 + + Truncates this string view to \a n code points. + + Same as \c{*this = first(n)}. + + \note The behavior is undefined when \a n < 0 or \a n > size(). + + \sa sliced(), first(), last(), chopped(), chop(), {Sizes and Sub-Strings} +*/ + +/*! + \fn QAnyStringView::chop(qsizetype n) + \since 6.5 + + Truncates this string view by \a n code points. + + Same as \c{*this = first(size() - n)}. + + \note The behavior is undefined when \a n < 0 or \a n > size(). + + \sa sliced(), first(), last(), chopped(), truncate(), {Sizes and Sub-Strings} +*/ + +/*! \fn template <typename Visitor> decltype(auto) QAnyStringView::visit(Visitor &&v) const + + Calls \a v with either a QUtf8StringView, QLatin1String, or QStringView, depending + on the encoding of the string data this string-view references. + + This is how most functions taking QAnyStringView fork off into per-encoding + functions: + + \code + void processImpl(QLatin1String s) { ~~~ } + void processImpl(QUtf8StringView s) { ~~~ } + void processImpl(QStringView s) { ~~~ } + + void process(QAnyStringView s) + { + s.visit([](auto s) { processImpl(s); }); + } + \endcode + + Here, we're reusing the same name, \c s, for both the QAnyStringView + object, as well as the lambda's parameter. This is idiomatic code and helps + track the identity of the objects through visit() calls, for example in more + complex situations such as + + \code + bool equal(QAnyStringView lhs, QAnyStringView rhs) + { + // assuming operator==(QAnyStringView, QAnyStringView) didn't, yet, exist: + return lhs.visit([rhs](auto lhs) { + rhs.visit([lhs](auto rhs) { + return lhs == rhs; + }); + }); + } + \endcode + + visit() requires that all lambda instantiations have the same return type. + If they differ, you get a compile error, even if there is a common type. To + fix, you can use explicit return types on the lambda, or cast in the return + statements: + + \code + // wrong: + QAnyStringView firstHalf(QAnyStringView input) + { + return input.visit([](auto input) { // ERROR: lambdas return different types + return input.sliced(0, input.size() / 2); + }); + } + // correct: + QAnyStringView firstHalf(QAnyStringView input) + { + return input.visit([](auto input) -> QAnyStringView { // OK, explicit return type + return input.sliced(0, input.size() / 2); + }); + } + // also correct: + QAnyStringView firstHalf(QAnyStringView input) + { + return input.visit([](auto input) { + return QAnyStringView(input.sliced(0, input.size() / 2)); // OK, cast to common type + }); + } + \endcode +*/ + +/*! + \fn QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) + + Compares the string view \a lhs with the string view \a rhs and returns a + negative integer if \a lhs is less than \a rhs, a positive integer if it is + greater than \a rhs, and zero if they are equal. + + If \a cs is Qt::CaseSensitive (the default), the comparison is case sensitive; + otherwise the comparison is case-insensitive. + + \sa operator==(), operator<(), operator>() +*/ + +/*! + \fn bool QAnyStringView::operator==(const QAnyStringView &lhs, const QAnyStringView & rhs) + \fn bool QAnyStringView::operator!=(const QAnyStringView & lhs, const QAnyStringView & rhs) + \fn bool QAnyStringView::operator<=(const QAnyStringView & lhs, const QAnyStringView & rhs) + \fn bool QAnyStringView::operator>=(const QAnyStringView & lhs, const QAnyStringView & rhs) + \fn bool QAnyStringView::operator<(const QAnyStringView & lhs, const QAnyStringView & rhs) + \fn bool QAnyStringView::operator>(const QAnyStringView & lhs, const QAnyStringView & rhs) + + Operators that compare \a lhs to \a rhs. + + \sa compare() +*/ + +/*! + \fn template <typename QStringLike> qToAnyStringViewIgnoringNull(const QStringLike &s); + \since 6.0 + \internal + + Convert \a s to a QAnyStringView ignoring \c{s.isNull()}. + + Returns a string view that references \a{s}'s data, but is never null. + + This is a faster way to convert a QString or QByteArray to a QAnyStringView, + if null QStrings or QByteArrays can legitimately be treated as empty ones. + + \sa QString::isNull(), QAnyStringView +*/ + +/*! + \fn QAnyStringView::operator<<(QDebug d, QAnyStringView s) + \since 6.7 + \relates QDebug + + Outputs \a s to debug stream \a d. + + If \c{d.quotedString()} is \c true, indicates which encoding the string is + in. If you just want the string data, use visit() like this: + + \code + s.visit([&d) (auto s) { d << s; }); + \endcode + + \sa QAnyStringView::visit() +*/ +QDebug operator<<(QDebug d, QAnyStringView s) +{ + struct S { const char *prefix, *suffix; }; + const auto affixes = s.visit([](auto s) { + using View = decltype(s); + if constexpr (std::is_same_v<View, QLatin1StringView>) { + return S{"", "_L1"}; + } else if constexpr (std::is_same_v<View, QUtf8StringView>) { + return S{"u8", ""}; + } else if constexpr (std::is_same_v<View, QStringView>) { + return S{"u", ""}; + } else { + static_assert(QtPrivate::type_dependent_false<View>()); + } + }); + const QDebugStateSaver saver(d); + d.nospace(); + if (d.quoteStrings()) + d << affixes.prefix; + s.visit([&d](auto s) { d << s; }); + if (d.quoteStrings()) + d << affixes.suffix; + return d; +} + + +QT_END_NAMESPACE |