summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@kdab.com>2017-04-05 00:07:09 +0200
committerMarc Mutz <marc.mutz@kdab.com>2017-04-07 08:54:16 +0000
commit5dc1e08c8c602738f6a827cea0de44683c7bbd0b (patch)
treec413e9dfbeb5db2fc5e9377c9fdac97bf60f457a
parent86005ea2570fcce81db8e3f4988f5541b4624de8 (diff)
Add qConvertTo{Utf8,Latin1,Local8Bit,Ucs4}() and corresponding QStringView methods
Like the qt_compare_strings()/qCompareStrings() split, distinguish between the internal and exported functions. Because of the circular dependency between qstring.h and qvector.h, the inline toUcs4() function has to be in qvector.h. At some point, we need to refactor the headers so qvector.h is lower in the dependency chain than qstring.h. It's not the first time this bites. Change-Id: Ief9f3bd92c83cdd1f31c51c700f42e146916eefd Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/tools/qstring.cpp162
-rw-r--r--src/corelib/tools/qstring.h5
-rw-r--r--src/corelib/tools/qstringview.cpp51
-rw-r--r--src/corelib/tools/qstringview.h4
-rw-r--r--src/corelib/tools/qvector.h2
-rw-r--r--tests/auto/corelib/tools/qstringapisymmetry/tst_qstringapisymmetry.cpp9
6 files changed, 194 insertions, 39 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 672dda804f..4f975724d8 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -4632,29 +4632,51 @@ bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
: foldCase(d->data()[d->size - 1]) == foldCase(c.unicode()));
}
+static QByteArray qt_convert_to_latin1(QStringView string);
+
QByteArray QString::toLatin1_helper(const QString &string)
{
- if (Q_UNLIKELY(string.isNull()))
- return QByteArray();
-
- return toLatin1_helper(string.constData(), string.length());
+ return qt_convert_to_latin1(string);
}
QByteArray QString::toLatin1_helper(const QChar *data, int length)
{
- QByteArray ba(length, Qt::Uninitialized);
+ return qt_convert_to_latin1(QStringView(data, length));
+}
+
+/*!
+ \since 5.10
+ \relates QStringView
+
+ Returns a Latin-1 representation of \a string as a QByteArray.
+
+ The behavior is undefined if \a string contains non-Latin1 characters.
+
+ \sa QString::toLatin1(), QStringView::toLatin1(), qConvertToUtf8(), qConvertToLocal8Bit(), qConvertToUcs4()
+*/
+QByteArray qConvertToLatin1(QStringView string)
+{
+ return qt_convert_to_latin1(string);
+}
+
+static QByteArray qt_convert_to_latin1(QStringView string)
+{
+ if (Q_UNLIKELY(string.isNull()))
+ return QByteArray();
+
+ QByteArray ba(string.length(), Qt::Uninitialized);
// since we own the only copy, we're going to const_cast the constData;
// that avoids an unnecessary call to detach() and expansion code that will never get used
qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
- reinterpret_cast<const ushort *>(data), length);
+ reinterpret_cast<const ushort *>(string.data()), string.length());
return ba;
}
QByteArray QString::toLatin1_helper_inplace(QString &s)
{
if (!s.isDetached())
- return s.toLatin1();
+ return qt_convert_to_latin1(s);
// We can return our own buffer to the caller.
// Conversion to Latin-1 always shrinks the buffer by half.
@@ -4690,7 +4712,7 @@ QByteArray QString::toLatin1_helper_inplace(QString &s)
characters. Those characters may be suppressed or replaced with a
question mark.
- \sa fromLatin1(), toUtf8(), toLocal8Bit(), QTextCodec
+ \sa fromLatin1(), toUtf8(), toLocal8Bit(), QTextCodec, qConvertToLatin1()
*/
/*!
@@ -4706,6 +4728,8 @@ QByteArray QString::toLatin1_helper_inplace(QString &s)
\sa fromAscii(), toLatin1(), toUtf8(), toLocal8Bit(), QTextCodec
*/
+static QByteArray qt_convert_to_local_8bit(QStringView string);
+
/*!
\fn QByteArray QString::toLocal8Bit() const
@@ -4721,21 +4745,47 @@ QByteArray QString::toLatin1_helper_inplace(QString &s)
locale, the returned byte array is undefined. Those characters may be
suppressed or replaced by another.
- \sa fromLocal8Bit(), toLatin1(), toUtf8(), QTextCodec
+ \sa fromLocal8Bit(), toLatin1(), toUtf8(), QTextCodec, qConvertToLocal8Bit()
*/
QByteArray QString::toLocal8Bit_helper(const QChar *data, int size)
{
- if (!data)
+ return qt_convert_to_local_8bit(QStringView(data, size));
+}
+
+static QByteArray qt_convert_to_local_8bit(QStringView string)
+{
+ if (string.isNull())
return QByteArray();
#ifndef QT_NO_TEXTCODEC
QTextCodec *localeCodec = QTextCodec::codecForLocale();
if (localeCodec)
- return localeCodec->fromUnicode(data, size);
+ return localeCodec->fromUnicode(string);
#endif // QT_NO_TEXTCODEC
- return toLatin1_helper(data, size);
+ return qt_convert_to_latin1(string);
}
+/*!
+ \since 5.10
+ \relates QStringView
+
+ Returns a local 8-bit representation of \a string as a QByteArray.
+
+ QTextCodec::codecForLocale() is used to perform the conversion from
+ Unicode. If the locale's encoding could not be determined, this function
+ does the same as qConvertToLatin1().
+
+ The behavior is undefined if \a string contains characters not
+ supported by the locale's 8-bit encoding.
+
+ \sa QString::toLocal8Bit(), QStringView::toLocal8Bit(), qConvertToLatin1(), qConvertToUtf8(), qConvertToUcs4()
+*/
+QByteArray qConvertToLocal8Bit(QStringView string)
+{
+ return qt_convert_to_local_8bit(string);
+}
+
+static QByteArray qt_convert_to_utf8(QStringView str);
/*!
\fn QByteArray QString::toUtf8() const
@@ -4745,18 +4795,41 @@ QByteArray QString::toLocal8Bit_helper(const QChar *data, int size)
UTF-8 is a Unicode codec and can represent all characters in a Unicode
string like QString.
- \sa fromUtf8(), toLatin1(), toLocal8Bit(), QTextCodec
+ \sa fromUtf8(), toLatin1(), toLocal8Bit(), QTextCodec, qConvertToUtf8()
*/
QByteArray QString::toUtf8_helper(const QString &str)
{
+ return qt_convert_to_utf8(str);
+}
+
+static QByteArray qt_convert_to_utf8(QStringView str)
+{
if (str.isNull())
return QByteArray();
- return QUtf8::convertFromUnicode(str.constData(), str.length());
+ return QUtf8::convertFromUnicode(str.data(), str.length());
}
/*!
+ \since 5.10
+ \relates QStringView
+
+ Returns a UTF-8 representation of \a string as a QByteArray.
+
+ UTF-8 is a Unicode codec and can represent all characters in a Unicode
+ string like QStringView.
+
+ \sa QString::toUtf8(), QStringView::toUtf8(), qConvertToLatin1(), qConvertToLocal8Bit(), qConvertToUcs4()
+*/
+QByteArray qConvertToUtf8(QStringView string)
+{
+ return qt_convert_to_utf8(string);
+}
+
+static QVector<uint> qt_convert_to_ucs4(QStringView string);
+
+/*!
\since 4.2
Returns a UCS-4/UTF-32 representation of the string as a QVector<uint>.
@@ -4768,17 +4841,44 @@ QByteArray QString::toUtf8_helper(const QString &str)
The returned vector is not NUL terminated.
- \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QTextCodec, fromUcs4(), toWCharArray()
+ \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QTextCodec, fromUcs4(), toWCharArray(), qConvertToUcs4()
*/
QVector<uint> QString::toUcs4() const
{
- QVector<uint> v(length());
- uint *a = v.data();
- int len = toUcs4_helper(d->data(), length(), a);
- v.resize(len);
+ return qt_convert_to_ucs4(*this);
+}
+
+static QVector<uint> qt_convert_to_ucs4(QStringView string)
+{
+ QVector<uint> v(string.length());
+ uint *a = const_cast<uint*>(v.constData());
+ QStringIterator it(string);
+ while (it.hasNext())
+ *a++ = it.next();
+ v.resize(a - v.constData());
return v;
}
+/*!
+ \since 5.10
+ \relates QStringView
+
+ Returns a UCS-4/UTF-32 representation of \a string as a QVector<uint>.
+
+ UCS-4 is a Unicode codec and therefore it is lossless. All characters from
+ this string will be encoded in UCS-4. Any invalid sequence of code units in
+ this string is replaced by the Unicode's replacement character
+ (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
+
+ The returned vector is not NUL terminated.
+
+ \sa QString::toUcs4(), QStringView::toUcs4(), qConvertToLatin1(), qConvertToLocal8Bit(), qConvertToUtf8()
+*/
+QVector<uint> qConvertToUcs4(QStringView string)
+{
+ return qt_convert_to_ucs4(string);
+}
+
QString::Data *QString::fromLatin1_helper(const char *str, int size)
{
Data *d;
@@ -10693,9 +10793,7 @@ static inline bool qt_ends_with(const QChar *haystack, int haystackLen,
*/
QByteArray QStringRef::toLatin1() const
{
- if (isNull())
- return QByteArray();
- return QString::toLatin1_helper(unicode(), length());
+ return qt_convert_to_latin1(*this);
}
/*!
@@ -10732,14 +10830,7 @@ QByteArray QStringRef::toLatin1() const
*/
QByteArray QStringRef::toLocal8Bit() const
{
-#ifndef QT_NO_TEXTCODEC
- if (!isNull()) {
- QTextCodec *localeCodec = QTextCodec::codecForLocale();
- if (localeCodec)
- return localeCodec->fromUnicode(unicode(), length());
- }
-#endif // QT_NO_TEXTCODEC
- return toLatin1();
+ return qt_convert_to_local_8bit(*this);
}
/*!
@@ -10754,10 +10845,7 @@ QByteArray QStringRef::toLocal8Bit() const
*/
QByteArray QStringRef::toUtf8() const
{
- if (isNull())
- return QByteArray();
-
- return QUtf8::convertFromUnicode(constData(), length());
+ return qt_convert_to_utf8(*this);
}
/*!
@@ -10776,11 +10864,7 @@ QByteArray QStringRef::toUtf8() const
*/
QVector<uint> QStringRef::toUcs4() const
{
- QVector<uint> v(length());
- uint *a = v.data();
- int len = QString::toUcs4_helper(reinterpret_cast<const ushort *>(unicode()), length(), a);
- v.resize(len);
- return v;
+ return qt_convert_to_ucs4(*this);
}
/*!
diff --git a/src/corelib/tools/qstring.h b/src/corelib/tools/qstring.h
index 696aeb2efb..431e2672ff 100644
--- a/src/corelib/tools/qstring.h
+++ b/src/corelib/tools/qstring.h
@@ -96,6 +96,11 @@ Q_CORE_EXPORT int qCompareStrings(QStringView lhs, QLatin1String rhs, Qt::Case
Q_CORE_EXPORT int qCompareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) Q_DECL_NOTHROW Q_REQUIRED_RESULT;
Q_CORE_EXPORT int qCompareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) Q_DECL_NOTHROW Q_REQUIRED_RESULT;
+Q_CORE_EXPORT QByteArray qConvertToLatin1(QStringView str) Q_REQUIRED_RESULT;
+Q_CORE_EXPORT QByteArray qConvertToUtf8(QStringView str) Q_REQUIRED_RESULT;
+Q_CORE_EXPORT QByteArray qConvertToLocal8Bit(QStringView str) Q_REQUIRED_RESULT;
+Q_CORE_EXPORT QVector<uint> qConvertToUcs4(QStringView str) Q_REQUIRED_RESULT;
+
class QLatin1String
{
public:
diff --git a/src/corelib/tools/qstringview.cpp b/src/corelib/tools/qstringview.cpp
index adffe1800c..65f76965f6 100644
--- a/src/corelib/tools/qstringview.cpp
+++ b/src/corelib/tools/qstringview.cpp
@@ -616,4 +616,55 @@ QT_BEGIN_NAMESPACE
\sa mid(), left()
*/
+/*!
+ \fn QByteArray QStringView::toLatin1() const
+
+ Returns a Latin-1 representation of the string as a QByteArray.
+
+ The behavior is undefined if the string contains non-Latin1 characters.
+
+ \sa toUtf8(), toLocal8Bit(), QTextCodec, qConvertToLatin1()
+*/
+
+/*!
+ \fn QByteArray QStringView::toLocal8Bit() const
+
+ Returns a local 8-bit representation of the string as a QByteArray.
+
+ QTextCodec::codecForLocale() is used to perform the conversion from
+ Unicode. If the locale's encoding could not be determined, this function
+ does the same as toLatin1().
+
+ The behavior is undefined if the string contains characters not
+ supported by the locale's 8-bit encoding.
+
+ \sa toLatin1(), toUtf8(), QTextCodec, qConvertToLocal8Bit()
+*/
+
+/*!
+ \fn QByteArray QStringView::toUtf8() const
+
+ Returns a UTF-8 representation of the string as a QByteArray.
+
+ UTF-8 is a Unicode codec and can represent all characters in a Unicode
+ string like QString.
+
+ \sa toLatin1(), toLocal8Bit(), QTextCodec, qConvertToUtf8()
+*/
+
+/*!
+ \fn QVector<uint> QStringView::toUcs4() const
+
+ Returns a UCS-4/UTF-32 representation of the string as a QVector<uint>.
+
+ UCS-4 is a Unicode codec and therefore it is lossless. All characters from
+ this string will be encoded in UCS-4. Any invalid sequence of code units in
+ this string is replaced by the Unicode replacement character
+ (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
+
+ The returned vector is not 0-terminated.
+
+ \sa toUtf8(), toLatin1(), toLocal8Bit(), QTextCodec
+*/
+
QT_END_NAMESPACE
diff --git a/src/corelib/tools/qstringview.h b/src/corelib/tools/qstringview.h
index 0cfa9e1133..f3f3f52e08 100644
--- a/src/corelib/tools/qstringview.h
+++ b/src/corelib/tools/qstringview.h
@@ -218,6 +218,10 @@ public:
// QString API
//
+ QByteArray toLatin1() const Q_REQUIRED_RESULT { return qConvertToLatin1(*this); }
+ QByteArray toUtf8() const Q_REQUIRED_RESULT { return qConvertToUtf8(*this); }
+ QByteArray toLocal8Bit() const Q_REQUIRED_RESULT { return qConvertToLocal8Bit(*this); }
+ inline QVector<uint> toUcs4() const Q_REQUIRED_RESULT;
Q_DECL_CONSTEXPR QChar at(qssize_t n) const { return (*this)[n]; }
diff --git a/src/corelib/tools/qvector.h b/src/corelib/tools/qvector.h
index 57e80ae125..fd124d1d59 100644
--- a/src/corelib/tools/qvector.h
+++ b/src/corelib/tools/qvector.h
@@ -994,6 +994,8 @@ Q_TEMPLATE_EXTERN template class Q_CORE_EXPORT QVector<QPointF>;
Q_TEMPLATE_EXTERN template class Q_CORE_EXPORT QVector<QPoint>;
#endif
+QVector<uint> QStringView::toUcs4() const { return qConvertToUcs4(*this); }
+
QT_END_NAMESPACE
#endif // QVECTOR_H
diff --git a/tests/auto/corelib/tools/qstringapisymmetry/tst_qstringapisymmetry.cpp b/tests/auto/corelib/tools/qstringapisymmetry/tst_qstringapisymmetry.cpp
index 7bc571c83f..1a7d25e400 100644
--- a/tests/auto/corelib/tools/qstringapisymmetry/tst_qstringapisymmetry.cpp
+++ b/tests/auto/corelib/tools/qstringapisymmetry/tst_qstringapisymmetry.cpp
@@ -269,21 +269,29 @@ private Q_SLOTS:
void toLocal8Bit_QString() { toLocal8Bit_impl<QString>(); }
void toLocal8Bit_QStringRef_data() { toLocal8Bit_data(); }
void toLocal8Bit_QStringRef() { toLocal8Bit_impl<QStringRef>(); }
+ void toLocal8Bit_QStringView_data() { toLocal8Bit_data(); }
+ void toLocal8Bit_QStringView() { toLocal8Bit_impl<QStringView>(); }
void toLatin1_QString_data() { toLatin1_data(); }
void toLatin1_QString() { toLatin1_impl<QString>(); }
void toLatin1_QStringRef_data() { toLatin1_data(); }
void toLatin1_QStringRef() { toLatin1_impl<QStringRef>(); }
+ void toLatin1_QStringView_data() { toLatin1_data(); }
+ void toLatin1_QStringView() { toLatin1_impl<QStringView>(); }
void toUtf8_QString_data() { toUtf8_data(); }
void toUtf8_QString() { toUtf8_impl<QString>(); }
void toUtf8_QStringRef_data() { toUtf8_data(); }
void toUtf8_QStringRef() { toUtf8_impl<QStringRef>(); }
+ void toUtf8_QStringView_data() { toUtf8_data(); }
+ void toUtf8_QStringView() { toUtf8_impl<QStringView>(); }
void toUcs4_QString_data() { toUcs4_data(); }
void toUcs4_QString() { toUcs4_impl<QString>(); }
void toUcs4_QStringRef_data() { toUcs4_data(); }
void toUcs4_QStringRef() { toUcs4_impl<QStringRef>(); }
+ void toUcs4_QStringView_data() { toUcs4_data(); }
+ void toUcs4_QStringView() { toUcs4_impl<QStringView>(); }
};
void tst_QStringApiSymmetry::compare_data(bool hasConceptOfNullAndEmpty)
@@ -638,6 +646,7 @@ void tst_QStringApiSymmetry::truncate_impl()
template <class Str> Str make(const QString &s);
template <> QStringRef make(const QString &s) { return QStringRef(&s); }
template <> QString make(const QString &s) { return s; }
+template <> QStringView make(const QString &s) { return s; }
#define REPEAT_16X(X) X X X X X X X X X X X X X X X X
#define LONG_STRING_256 REPEAT_16X("0123456789abcdef")