summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@kdab.com>2020-05-10 23:18:18 +0200
committerMarc Mutz <marc.mutz@kdab.com>2020-05-19 10:15:38 +0200
commit2259a433ca341129253ceb69bbe9eefc546282a3 (patch)
tree662ed56adc65ae8ef7fdf858d200483a19822fb7
parent60ec350bc79d562c0611bc31e8655729272c31d6 (diff)
QString: throughly port internals to char16_t
This includes allocating QString data as char16_t instead of ushort. This isn't the end of the port, but an important milestone: the traditional foldChar() functions are now all unused. Change-Id: I766bebc2d70b6972e2045d3474c7f5770f4676d9 Reviewed-by: Lars Knoll <lars.knoll@qt.io>
-rw-r--r--src/corelib/text/qchar.cpp25
-rw-r--r--src/corelib/text/qstring.cpp151
-rw-r--r--src/corelib/text/qstring.h2
-rw-r--r--src/corelib/text/qstringliteral.h6
4 files changed, 76 insertions, 108 deletions
diff --git a/src/corelib/text/qchar.cpp b/src/corelib/text/qchar.cpp
index 358cc82f22..b7f78c3f5d 100644
--- a/src/corelib/text/qchar.cpp
+++ b/src/corelib/text/qchar.cpp
@@ -1703,15 +1703,6 @@ char32_t QChar::toTitleCase(char32_t ucs4) noexcept
return convertCase_helper(ucs4, QUnicodeTables::TitleCase);
}
-[[deprecated]]
-static inline uint foldCase(const ushort *ch, const ushort *start)
-{
- char32_t ucs4 = *ch;
- if (QChar::isLowSurrogate(ucs4) && ch > start && QChar::isHighSurrogate(*(ch - 1)))
- ucs4 = QChar::surrogateToUcs4(*(ch - 1), ucs4);
- return convertCase_helper(ucs4, QUnicodeTables::CaseFold);
-}
-
static inline char32_t foldCase(const char16_t *ch, const char16_t *start)
{
char32_t ucs4 = *ch;
@@ -1720,16 +1711,6 @@ static inline char32_t foldCase(const char16_t *ch, const char16_t *start)
return convertCase_helper(ucs4, QUnicodeTables::CaseFold);
}
-[[deprecated]]
-static inline uint foldCase(uint ch, uint &last) noexcept
-{
- char32_t ucs4 = ch;
- if (QChar::isLowSurrogate(ucs4) && QChar::isHighSurrogate(last))
- ucs4 = QChar::surrogateToUcs4(last, ucs4);
- last = ch;
- return convertCase_helper(ucs4, QUnicodeTables::CaseFold);
-}
-
static inline char32_t foldCase(char32_t ch, char32_t &last) noexcept
{
char32_t ucs4 = ch;
@@ -1739,12 +1720,6 @@ static inline char32_t foldCase(char32_t ch, char32_t &last) noexcept
return convertCase_helper(ucs4, QUnicodeTables::CaseFold);
}
-[[deprecated]]
-static inline ushort foldCase(ushort ch) noexcept
-{
- return convertCase_helper(char16_t{ch}, QUnicodeTables::CaseFold);
-}
-
static inline char16_t foldCase(char16_t ch) noexcept
{
return convertCase_helper(ch, QUnicodeTables::CaseFold);
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 1765ca0aa1..57be0eabcf 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -100,9 +100,10 @@
QT_BEGIN_NAMESPACE
-namespace {
-// temporary; to easy porting to char16_t
-char16_t *to_utf16(ushort *p) { return reinterpret_cast<char16_t *>(p); }
+template <typename T, typename Cmp = std::less<>>
+static constexpr bool points_into_range(const T *p, const T *b, const T *e, Cmp less = {}) noexcept
+{
+ return !less(p, b) && less(p, e);
}
/*
@@ -847,8 +848,8 @@ static int ucstricmp(const QChar *a, const QChar *ae, const QChar *b, const QCha
if (be - b < ae - a)
e = a + (be - b);
- uint alast = 0;
- uint blast = 0;
+ char32_t alast = 0;
+ char32_t blast = 0;
while (a < e) {
// qDebug() << Qt::hex << alast << blast;
// qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast);
@@ -891,8 +892,8 @@ static int ucstricmp(const QChar *a, const QChar *ae, const char *b, const char
#if defined(__mips_dsp)
// From qstring_mips_dsp_asm.S
-extern "C" int qt_ucstrncmp_mips_dsp_asm(const ushort *a,
- const ushort *b,
+extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
+ const char16_t *b,
unsigned len);
#endif
@@ -912,8 +913,8 @@ static int ucstrncmp(const QChar *a, const QChar *b, size_t l)
#if defined(__mips_dsp)
Q_STATIC_ASSERT(sizeof(uint) == sizeof(size_t));
if (l >= 8) {
- return qt_ucstrncmp_mips_dsp_asm(reinterpret_cast<const ushort*>(a),
- reinterpret_cast<const ushort*>(b),
+ return qt_ucstrncmp_mips_dsp_asm(reinterpret_cast<const char16_t*>(a),
+ reinterpret_cast<const char16_t*>(b),
l);
}
#endif // __mips_dsp
@@ -1061,8 +1062,8 @@ static int ucstrncmp(const QChar *a, const QChar *b, size_t l)
static int ucstrncmp(const QChar *a, const uchar *c, size_t l)
{
- const ushort *uc = reinterpret_cast<const ushort *>(a);
- const ushort *e = uc + l;
+ const char16_t *uc = reinterpret_cast<const char16_t *>(a);
+ const char16_t *e = uc + l;
#ifdef __SSE2__
__m128i nullmask = _mm_setzero_si128();
@@ -1153,7 +1154,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, size_t l)
c += offset;
# if !defined(__OPTIMIZE_SIZE__)
- const auto lambda = [=](size_t i) { return uc[i] - ushort(c[i]); };
+ const auto lambda = [=](size_t i) { return uc[i] - char16_t(c[i]); };
return UnrollTailLoop<MaxTailLength>::exec(e - uc, 0, lambda, lambda);
# endif
#endif
@@ -2115,9 +2116,9 @@ QString::QString(int size, QChar ch)
} else {
d = DataPointer(Data::allocate(size + 1), size);
d.data()[size] = '\0';
- ushort *i = d.data() + size;
- ushort *b = d.data();
- const ushort value = ch.unicode();
+ char16_t *i = d.data() + size;
+ char16_t *b = d.data();
+ const char16_t value = ch.unicode();
while (i != b)
*--i = value;
}
@@ -2387,7 +2388,7 @@ QString &QString::operator=(QLatin1String other)
if (isDetached() && other.size() <= capacity()) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
d.size = other.size();
d.data()[other.size()] = 0;
- qt_from_latin1(to_utf16(d.data()), other.latin1(), other.size());
+ qt_from_latin1(d.data(), other.latin1(), other.size());
} else {
*this = fromLatin1(other.latin1(), other.size());
}
@@ -2541,7 +2542,7 @@ QString &QString::insert(int i, QLatin1String str)
resize(size() + len);
::memmove(d.data() + i + len, d.data() + i, (d.size - i - len) * sizeof(QChar));
- qt_from_latin1(to_utf16(d.data() + i), s, uint(len));
+ qt_from_latin1(d.data() + i, s, uint(len));
return *this;
}
@@ -2557,14 +2558,9 @@ QString& QString::insert(int i, const QChar *unicode, int size)
if (i < 0 || size <= 0)
return *this;
- const ushort *s = (const ushort *)unicode;
- const std::less<const ushort*> less;
- if (!less(s, d.data()) && less(s, d.data() + d.size)) {
- // Part of me - take a copy
- const QVarLengthArray<ushort> copy(s, s + size);
- insert(i, reinterpret_cast<const QChar *>(copy.data()), size);
- return *this;
- }
+ const auto s = reinterpret_cast<const char16_t *>(unicode);
+ if (points_into_range(s, d.data(), d.data() + d.size))
+ return insert(i, QStringView{QVarLengthArray(s, s + size)});
if (Q_UNLIKELY(i > int(d.size)))
resize(i + size, QLatin1Char(' '));
@@ -2662,7 +2658,7 @@ QString &QString::append(QLatin1String str)
int len = str.size();
if (d->needsDetach() || size() + len > capacity())
reallocData(uint(size() + len) + 1u, true);
- char16_t *i = to_utf16(d.data() + d.size);
+ char16_t *i = d.data() + d.size;
qt_from_latin1(i, s, uint(len));
i[len] = '\0';
d.size += len;
@@ -2819,7 +2815,7 @@ QString &QString::remove(int pos, int len)
} else if (len > 0) {
detach();
memmove(d.data() + pos, d.data() + pos + len,
- (d.size - pos - len + 1) * sizeof(ushort));
+ (d.size - pos - len + 1) * sizeof(QChar));
d.size -= len;
}
return *this;
@@ -2867,15 +2863,11 @@ static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs
*/
QString &QString::remove(const QString &str, Qt::CaseSensitivity cs)
{
- const auto s = reinterpret_cast<const ushort *>(str.data());
- const std::less<const ushort *> less;
- if (!less(s, d.data()) && less(s, d.data() + d.size)) {
- // Part of me - take a copy
- const QVarLengthArray<ushort> copy(s, s + str.size());
- removeStringImpl(*this, QStringView{copy.data(), copy.size()}, cs);
- } else {
+ const auto s = str.d.data();
+ if (points_into_range(s, d.data(), d.data() + d.size))
+ removeStringImpl(*this, QStringView{QVarLengthArray(s, s + str.size())}, cs);
+ else
removeStringImpl(*this, qToStringViewIgnoringNull(str), cs);
- }
return *this;
}
@@ -3031,7 +3023,7 @@ QChar *textCopy(const QChar *start, int len)
return copy;
}
-bool pointsIntoRange(const QChar *ptr, const ushort *base, int len)
+static bool pointsIntoRange(const QChar *ptr, const char16_t *base, qsizetype len)
{
const QChar *const start = reinterpret_cast<const QChar *>(base);
const std::less<const QChar *> less;
@@ -3196,7 +3188,7 @@ QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs
if (size() == 0)
return *this;
- ushort cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
+ char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
int index = 0;
while (1) {
@@ -3242,19 +3234,19 @@ QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
const int idx = indexOf(before, 0, cs);
if (idx != -1) {
detach();
- const ushort a = after.unicode();
- ushort *i = d.data();
- ushort *const e = i + d.size;
+ const char16_t a = after.unicode();
+ char16_t *i = d.data();
+ char16_t *const e = i + d.size;
i += idx;
*i = a;
if (cs == Qt::CaseSensitive) {
- const ushort b = before.unicode();
+ const char16_t b = before.unicode();
while (++i != e) {
if (*i == b)
*i = a;
}
} else {
- const ushort b = foldCase(before.unicode());
+ const char16_t b = foldCase(before.unicode());
while (++i != e) {
if (foldCase(*i) == b)
*i = a;
@@ -4563,7 +4555,7 @@ QString QString::mid(int position, int n) const
return QString();
case QContainerImplHelper::Empty:
{
- QPair<Data *, ushort *> pair = Data::allocate(0);
+ QPair<Data *, char16_t *> pair = Data::allocate(0);
DataPointer empty = { pair.first, pair.second, 0 };
return QString(empty);
}
@@ -4820,7 +4812,7 @@ QByteArray QString::toLatin1_helper_inplace(QString &s)
// We can return our own buffer to the caller.
// Conversion to Latin-1 always shrinks the buffer by half.
- const char16_t *data = to_utf16(s.d.data());
+ const char16_t *data = s.d.data();
int length = s.d.size;
// Move the d pointer over to the bytearray.
@@ -5016,7 +5008,7 @@ QString::DataPointer QString::fromLatin1_helper(const char *str, int size)
size = qstrlen(str);
d = DataPointer(Data::allocate(size + 1), size);
d.data()[size] = '\0';
- char16_t *dst = to_utf16(d.data());
+ char16_t *dst = d.data();
qt_from_latin1(dst, str, uint(size));
}
@@ -5066,7 +5058,7 @@ QString QString::fromLocal8Bit_helper(const char *str, int size)
if (!str)
return QString();
if (size == 0 || (!*str && size < 0)) {
- QPair<Data *, ushort *> pair = Data::allocate(0);
+ QPair<Data *, char16_t *> pair = Data::allocate(0);
QString::DataPointer empty = { pair.first, pair.second, 0 };
return QString(empty);
}
@@ -6018,7 +6010,7 @@ const ushort *QString::utf16() const
// ensure '\0'-termination for ::fromRawData strings
const_cast<QString*>(this)->reallocData(uint(d.size) + 1u);
}
- return d.data();
+ return reinterpret_cast<const ushort *>(d.data());
}
/*!
@@ -6570,7 +6562,7 @@ QString QString::vasprintf(const char *cformat, va_list ap)
}
case 'c': {
if (length_mod == lm_l)
- subst = QChar((ushort) va_arg(ap, int));
+ subst = QChar::fromUcs2(va_arg(ap, int));
else
subst = QLatin1Char((uchar) va_arg(ap, int));
++c;
@@ -7548,18 +7540,18 @@ QString QString::repeated(int times) const
if (result.capacity() != resultSize)
return QString(); // not enough memory
- memcpy(result.d.data(), d.data(), d.size * sizeof(ushort));
+ memcpy(result.d.data(), d.data(), d.size * sizeof(QChar));
int sizeSoFar = d.size;
- ushort *end = result.d.data() + sizeSoFar;
+ char16_t *end = result.d.data() + sizeSoFar;
const int halfResultSize = resultSize >> 1;
while (sizeSoFar <= halfResultSize) {
- memcpy(end, result.d.data(), sizeSoFar * sizeof(ushort));
+ memcpy(end, result.d.data(), sizeSoFar * sizeof(QChar));
end += sizeSoFar;
sizeSoFar <<= 1;
}
- memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(ushort));
+ memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(QChar));
result.d.data()[resultSize] = '\0';
result.d.size = resultSize;
return result;
@@ -7583,10 +7575,10 @@ void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::
if (n.version > version) {
int pos = from;
if (QChar::requiresSurrogates(n.ucs4)) {
- ushort ucs4High = QChar::highSurrogate(n.ucs4);
- ushort ucs4Low = QChar::lowSurrogate(n.ucs4);
- ushort oldHigh = QChar::highSurrogate(n.old_mapping);
- ushort oldLow = QChar::lowSurrogate(n.old_mapping);
+ char16_t ucs4High = QChar::highSurrogate(n.ucs4);
+ char16_t ucs4Low = QChar::lowSurrogate(n.ucs4);
+ char16_t oldHigh = QChar::highSurrogate(n.old_mapping);
+ char16_t oldLow = QChar::lowSurrogate(n.old_mapping);
while (pos < s.length() - 1) {
if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) {
if (!d)
@@ -8310,8 +8302,8 @@ QString QString::arg(double a, int fieldWidth, char fmt, int prec, QChar fillCha
return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar);
}
-static inline ushort to_unicode(const QChar c) { return c.unicode(); }
-static inline ushort to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
+static inline char16_t to_unicode(const QChar c) { return c.unicode(); }
+static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
template <typename Char>
static int getEscape(const Char *uc, qsizetype *pos, qsizetype len, int maxNumber = 999)
@@ -8557,10 +8549,10 @@ QString QtPrivate::argToQString(QLatin1String pattern, size_t n, const ArgBase *
*/
bool QString::isSimpleText() const
{
- const ushort *p = d.data();
- const ushort * const end = p + d.size;
+ const char16_t *p = d.data();
+ const char16_t * const end = p + d.size;
while (p < end) {
- ushort uc = *p;
+ char16_t uc = *p;
// sort out regions of complex text formatting
if (uc > 0x058f && (uc < 0x1100 || uc > 0xfb0f)) {
return false;
@@ -8727,7 +8719,7 @@ QString QString::fromRawData(const QChar *unicode, int size)
} else if (!size) {
x = DataPointer(Data::allocate(0), 0);
} else {
- x = Data::fromRawData(reinterpret_cast<const ushort *>(unicode), size);
+ x = Data::fromRawData(reinterpret_cast<const char16_t *>(unicode), size);
}
return QString(x);
}
@@ -9680,9 +9672,9 @@ QDataStream &operator<<(QDataStream &out, const QString &str)
if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
out.writeBytes(reinterpret_cast<const char *>(str.unicode()), uint(sizeof(QChar) * str.length()));
} else {
- QVarLengthArray<ushort> buffer(str.length());
- qbswap<sizeof(ushort)>(str.constData(), str.length(), buffer.data());
- out.writeBytes(reinterpret_cast<const char *>(buffer.data()), uint(sizeof(ushort) * buffer.size()));
+ QVarLengthArray<char16_t> buffer(str.length());
+ qbswap<sizeof(char16_t)>(str.constData(), str.length(), buffer.data());
+ out.writeBytes(reinterpret_cast<const char *>(buffer.data()), uint(sizeof(char16_t) * buffer.size()));
}
} else {
// write null marker
@@ -9737,7 +9729,7 @@ QDataStream &operator>>(QDataStream &in, QString &str)
if ((in.byteOrder() == QDataStream::BigEndian)
!= (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
- ushort *data = reinterpret_cast<ushort *>(str.data());
+ char16_t *data = reinterpret_cast<char16_t *>(str.data());
qbswap<sizeof(*data)>(data, len, data);
}
} else {
@@ -11009,13 +11001,13 @@ bool QStringRef::isRightToLeft() const
*/
bool QtPrivate::isRightToLeft(QStringView string) noexcept
{
- const ushort *p = reinterpret_cast<const ushort*>(string.data());
- const ushort * const end = p + string.size();
+ const char16_t *p = string.utf16();
+ const char16_t * const end = p + string.size();
int isolateLevel = 0;
while (p < end) {
uint ucs4 = *p;
if (QChar::isHighSurrogate(ucs4) && p < end - 1) {
- ushort low = p[1];
+ char16_t low = p[1];
if (QChar::isLowSurrogate(low)) {
ucs4 = QChar::surrogateToUcs4(ucs4, low);
++p;
@@ -11418,33 +11410,34 @@ bool QtPrivate::endsWith(QLatin1String haystack, QLatin1String needle, Qt::CaseS
namespace {
template <typename Pointer>
-uint foldCaseHelper(Pointer ch, Pointer start) = delete;
+char32_t foldCaseHelper(Pointer ch, Pointer start) = delete;
template <>
-uint foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
+char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
{
- return foldCase(reinterpret_cast<const ushort*>(ch), reinterpret_cast<const ushort*>(start));
+ return foldCase(reinterpret_cast<const char16_t*>(ch),
+ reinterpret_cast<const char16_t*>(start));
}
template <>
-uint foldCaseHelper<const char*>(const char* ch, const char*)
+char32_t foldCaseHelper<const char*>(const char* ch, const char*)
{
- return foldCase(ushort(uchar(*ch)));
+ return foldCase(char16_t(uchar(*ch)));
}
template <typename T>
-ushort valueTypeToUtf16(T t) = delete;
+char16_t valueTypeToUtf16(T t) = delete;
template <>
-ushort valueTypeToUtf16<QChar>(QChar t)
+char16_t valueTypeToUtf16<QChar>(QChar t)
{
return t.unicode();
}
template <>
-ushort valueTypeToUtf16<char>(char t)
+char16_t valueTypeToUtf16<char>(char t)
{
- return ushort(uchar(t));
+ return char16_t{uchar(t)};
}
}
@@ -11565,7 +11558,7 @@ static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle,
if (std::size_t(from) >= std::size_t(haystack.size()))
return -1;
if (from >= 0) {
- ushort c = needle.unicode();
+ char16_t c = needle.unicode();
const auto b = haystack.data();
auto n = b + from;
if (cs == Qt::CaseSensitive) {
diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h
index 733d796e81..801d16a853 100644
--- a/src/corelib/text/qstring.h
+++ b/src/corelib/text/qstring.h
@@ -254,7 +254,7 @@ qsizetype QStringView::lastIndexOf(QLatin1String s, qsizetype from, Qt::CaseSens
class Q_CORE_EXPORT QString
{
- typedef QTypedArrayData<ushort> Data;
+ typedef QTypedArrayData<char16_t> Data;
public:
typedef QStringPrivate DataPointer;
diff --git a/src/corelib/text/qstringliteral.h b/src/corelib/text/qstringliteral.h
index c3b274ca90..0968a9531a 100644
--- a/src/corelib/text/qstringliteral.h
+++ b/src/corelib/text/qstringliteral.h
@@ -68,15 +68,15 @@ Q_STATIC_ASSERT_X(sizeof(qunicodechar) == 2,
Q_BASIC_ATOMIC_INITIALIZER(-1), QArrayData::StaticDataFlags, 0 \
}; \
QStringPrivate holder = { \
- static_cast<QTypedArrayData<ushort> *>(const_cast<QArrayData *>(&qstring_literal)), \
- reinterpret_cast<ushort *>(const_cast<qunicodechar *>(QT_UNICODE_LITERAL(str))), \
+ static_cast<QTypedArrayData<char16_t> *>(const_cast<QArrayData *>(&qstring_literal)), \
+ const_cast<qunicodechar *>(QT_UNICODE_LITERAL(str)), \
Size \
}; \
return QString(holder); \
}()) \
/**/
-using QStringPrivate = QArrayDataPointer<ushort>;
+using QStringPrivate = QArrayDataPointer<char16_t>;
QT_END_NAMESPACE