summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-04-30 10:50:44 +0200
committerLars Knoll <lars.knoll@qt.io>2020-05-14 07:46:14 +0200
commitf64a6bd638d399403845fe52e6f8e52889f1f52b (patch)
treef3eda565c343b1b24f8711bc87e973910ec788f4 /src
parentf437c8c5f9772a0d73c8772b64fea6133d43bcc7 (diff)
Start work on a new API to replace QTextCodec
The new QStringEncoder and QStringDecoder classes (with a common QStringConverter base class) are there to replace QTextCodec in Qt 6. It currently uses a trivial wrapper around the utf encoding functionality. Added some autotests, mostly copied from the text codec tests. Change-Id: Ib6eeee55fba918b9424be244cbda9dfd5096f7eb Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/codecs/qtextcodec.cpp20
-rw-r--r--src/corelib/codecs/qtextcodec.h33
-rw-r--r--src/corelib/serialization/qtextstream.cpp2
-rw-r--r--src/corelib/text/qstringconverter.cpp186
-rw-r--r--src/corelib/text/qstringconverter.h195
-rw-r--r--src/corelib/text/text.pri2
-rw-r--r--src/tools/bootstrap/.prev_CMakeLists.txt1
-rw-r--r--src/tools/bootstrap/CMakeLists.txt1
-rw-r--r--src/tools/bootstrap/bootstrap.pro1
9 files changed, 392 insertions, 49 deletions
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp
index 8fef9ccfba..72f9dcc0a8 100644
--- a/src/corelib/codecs/qtextcodec.cpp
+++ b/src/corelib/codecs/qtextcodec.cpp
@@ -339,26 +339,6 @@ static void setup() {}
*/
/*!
- Destroys the ConverterState object.
-*/
-QTextCodec::ConverterState::~ConverterState()
-{
- clear();
-}
-
-void QTextCodec::ConverterState::clear()
-{
- if (clearFn)
- clearFn(this);
- remainingChars = 0;
- invalidChars = 0;
- state_data[0] = 0;
- state_data[1] = 0;
- state_data[2] = 0;
- state_data[3] = 0;
-}
-
-/*!
\class QTextCodec
\inmodule QtCore
\brief The QTextCodec class provides conversions between text encodings.
diff --git a/src/corelib/codecs/qtextcodec.h b/src/corelib/codecs/qtextcodec.h
index 5b502d2a98..03e8f95ce1 100644
--- a/src/corelib/codecs/qtextcodec.h
+++ b/src/corelib/codecs/qtextcodec.h
@@ -40,7 +40,7 @@
#ifndef QTEXTCODEC_H
#define QTEXTCODEC_H
-#include <QtCore/qstring.h>
+#include <QtCore/qstringconverter.h>
#include <QtCore/qlist.h>
QT_REQUIRE_CONFIG(textcodec);
@@ -53,10 +53,13 @@ class QIODevice;
class QTextDecoder;
class QTextEncoder;
-class Q_CORE_EXPORT QTextCodec
+class Q_CORE_EXPORT QTextCodec : public QStringConverterBase
{
Q_DISABLE_COPY(QTextCodec)
public:
+ using ConversionFlags = QStringConverterBase::Flags;
+ using ConverterState = QStringConverterBase::State;
+
static QTextCodec* codecForName(const QByteArray &name);
static QTextCodec* codecForName(const char *name) { return codecForName(QByteArray(name)); }
static QTextCodec* codecForMib(int mib);
@@ -89,31 +92,6 @@ public:
QByteArray fromUnicode(const QString& uc) const;
#endif
QByteArray fromUnicode(QStringView uc) const;
- enum ConversionFlag {
- DefaultConversion,
- ConvertInvalidToNull = 0x80000000,
- IgnoreHeader = 0x1
- };
- Q_DECLARE_FLAGS(ConversionFlags, ConversionFlag)
-
- struct Q_CORE_EXPORT ConverterState {
- ConverterState(ConversionFlags f = DefaultConversion)
- : flags(f), state_data{0, 0, 0, 0} {}
- ~ConverterState();
- ConversionFlags flags;
- int remainingChars = 0;
- int invalidChars = 0;
-
- union {
- uint state_data[4];
- void *d[2];
- };
- void clear();
- using ClearDataFn = void (*)(ConverterState *);
- ClearDataFn clearFn = nullptr;
- private:
- Q_DISABLE_COPY(ConverterState)
- };
QString toUnicode(const char *in, int length, ConverterState *state = nullptr) const
{ return convertToUnicode(in, length, state); }
@@ -137,7 +115,6 @@ protected:
private:
friend struct QCoreGlobalData;
};
-Q_DECLARE_OPERATORS_FOR_FLAGS(QTextCodec::ConversionFlags)
class Q_CORE_EXPORT QTextEncoder {
Q_DISABLE_COPY(QTextEncoder)
diff --git a/src/corelib/serialization/qtextstream.cpp b/src/corelib/serialization/qtextstream.cpp
index cb72b28511..a970397419 100644
--- a/src/corelib/serialization/qtextstream.cpp
+++ b/src/corelib/serialization/qtextstream.cpp
@@ -355,7 +355,7 @@ QTextStreamPrivate::~QTextStreamPrivate()
#if QT_CONFIG(textcodec)
static void resetCodecConverterStateHelper(QTextCodec::ConverterState *state)
{
- state->~ConverterState();
+ state->~State();
new (state) QTextCodec::ConverterState;
}
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
new file mode 100644
index 0000000000..1f61eee5cb
--- /dev/null
+++ b/src/corelib/text/qstringconverter.cpp
@@ -0,0 +1,186 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 The Qt Company Ltd.
+** Copyright (C) 2018 Intel Corporation.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <qstringconverter.h>
+#include <private/qutfcodec_p.h>
+
+QT_BEGIN_NAMESPACE
+
+/*!
+ \enum QStringConverter::Flag
+
+ \value DefaultConversion No flag is set.
+ \value ConvertInvalidToNull If this flag is set, each invalid input
+ character is output as a null character.
+ \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
+
+ \value Stateless Ignore possible converter states between different function calls
+ to encode or decode strings.
+*/
+
+
+void QStringConverter::State::clear()
+{
+ if (clearFn)
+ clearFn(this);
+ state_data[0] = state_data[1] = state_data[2] = state_data[3] = 0;
+ remainingChars = 0;
+ invalidChars = 0;
+}
+
+static QChar *fromUtf8(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
+{
+ QString s = QUtf8::convertToUnicode(in, length, state);
+ memcpy(out, s.constData(), s.length()*sizeof(QChar));
+ return out + s.length();
+}
+
+static char *toUtf8(char *out, QStringView in, QStringConverter::State *state)
+{
+ QByteArray s = QUtf8::convertFromUnicode(in.data(), in.length(), state);
+ memcpy(out, s.constData(), s.length());
+ return out + s.length();
+}
+
+static QChar *fromUtf16(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
+{
+ QString s = QUtf16::convertToUnicode(in, length, state);
+ memcpy(out, s.constData(), s.length()*sizeof(QChar));
+ return out + s.length();
+}
+
+static char *toUtf16(char *out, QStringView in, QStringConverter::State *state)
+{
+ QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state);
+ memcpy(out, s.constData(), s.length());
+ return out + s.length();
+}
+
+static QChar *fromUtf16BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
+{
+ QString s = QUtf16::convertToUnicode(in, length, state, BigEndianness);
+ memcpy(out, s.constData(), s.length()*sizeof(QChar));
+ return out + s.length();
+}
+
+static char *toUtf16BE(char *out, QStringView in, QStringConverter::State *state)
+{
+ QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state, BigEndianness);
+ memcpy(out, s.constData(), s.length());
+ return out + s.length();
+}
+
+static QChar *fromUtf16LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
+{
+ QString s = QUtf16::convertToUnicode(in, length, state, LittleEndianness);
+ memcpy(out, s.constData(), s.length()*sizeof(QChar));
+ return out + s.length();
+}
+
+static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state)
+{
+ QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state, LittleEndianness);
+ memcpy(out, s.constData(), s.length());
+ return out + s.length();
+}
+
+static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
+{
+ QString s = QUtf32::convertToUnicode(in, length, state);
+ memcpy(out, s.constData(), s.length()*sizeof(QChar));
+ return out + s.length();
+}
+
+static char *toUtf32(char *out, QStringView in, QStringConverter::State *state)
+{
+ QByteArray s = QUtf32::convertFromUnicode(in.data(), in.length(), state);
+ memcpy(out, s.constData(), s.length());
+ return out + s.length();
+}
+
+static QChar *fromUtf32BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
+{
+ QString s = QUtf32::convertToUnicode(in, length, state, BigEndianness);
+ memcpy(out, s.constData(), s.length()*sizeof(QChar));
+ return out + s.length();
+}
+
+static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state)
+{
+ QByteArray s = QUtf32::convertFromUnicode(in.data(), in.length(), state, BigEndianness);
+ memcpy(out, s.constData(), s.length());
+ return out + s.length();
+}
+
+static QChar *fromUtf32LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
+{
+ QString s = QUtf32::convertToUnicode(in, length, state, LittleEndianness);
+ memcpy(out, s.constData(), s.length()*sizeof(QChar));
+ return out + s.length();
+}
+
+static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state)
+{
+ QByteArray s = QUtf32::convertFromUnicode(in.data(), in.length(), state, LittleEndianness);
+ memcpy(out, s.constData(), s.length());
+ return out + s.length();
+}
+
+static qsizetype fromUtf8Len(qsizetype l) { return l + 1; }
+static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); }
+
+static qsizetype fromUtf16Len(qsizetype l) { return l/2 + 2; }
+static qsizetype toUtf16Len(qsizetype l) { return 2*(l + 1); }
+
+static qsizetype fromUtf32Len(qsizetype l) { return l + 1; }
+static qsizetype toUtf32Len(qsizetype l) { return 4*(l + 1); }
+
+const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringConverter::LastEncoding + 1] =
+{
+ { fromUtf8, fromUtf8Len, toUtf8, toUtf8Len },
+ { fromUtf16, fromUtf16Len, toUtf16, toUtf16Len },
+ { fromUtf16LE, fromUtf16Len, toUtf16LE, toUtf16Len },
+ { fromUtf16BE, fromUtf16Len, toUtf16BE, toUtf16Len },
+ { fromUtf32, fromUtf32Len, toUtf32, toUtf32Len },
+ { fromUtf32LE, fromUtf32Len, toUtf32LE, toUtf32Len },
+ { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len }
+};
+
+QT_END_NAMESPACE
diff --git a/src/corelib/text/qstringconverter.h b/src/corelib/text/qstringconverter.h
new file mode 100644
index 0000000000..e91975b70f
--- /dev/null
+++ b/src/corelib/text/qstringconverter.h
@@ -0,0 +1,195 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QSTRINGCONVERTER_H
+#define QSTRINGCONVERTER_H
+
+#include <QtCore/qstring.h>
+
+QT_BEGIN_NAMESPACE
+
+// work around a compiler bug in GCC 7
+#if defined(Q_CC_GNU) && __GNUC__ == 7
+#define QSTRINGCONVERTER_CONSTEXPR
+#else
+#define QSTRINGCONVERTER_CONSTEXPR constexpr
+#endif
+
+class QStringConverterBase
+{
+public:
+ enum Flag {
+ DefaultConversion,
+ ConvertInvalidToNull = 0x1,
+ IgnoreHeader = 0x2,
+ Stateless = 0x4
+ };
+ Q_DECLARE_FLAGS(Flags, Flag)
+
+ struct State {
+ constexpr State(Flags f = DefaultConversion)
+ : flags(f), state_data{0, 0, 0, 0} {}
+ ~State() { clear(); }
+ Q_CORE_EXPORT void clear();
+
+ Flags flags;
+ qsizetype remainingChars = 0;
+ qsizetype invalidChars = 0;
+
+ union {
+ uint state_data[4];
+ void *d[2];
+ };
+ using ClearDataFn = void (*)(State *);
+ ClearDataFn clearFn = nullptr;
+ private:
+ Q_DISABLE_COPY(State)
+ };
+};
+Q_DECLARE_OPERATORS_FOR_FLAGS(QStringConverterBase::Flags)
+
+class QStringConverter : public QStringConverterBase
+{
+public:
+
+ enum Encoding {
+ Utf8,
+ Utf16,
+ Utf16LE,
+ Utf16BE,
+ Utf32,
+ Utf32LE,
+ Utf32BE,
+ LastEncoding = Utf32BE
+ };
+protected:
+
+ struct Interface
+ {
+ // ### FIXME: need a QByteArrayView
+ using DecoderFn = QChar * (*)(QChar *out, const char *in, qsizetype length, State *state);
+ using LengthFn = qsizetype (*)(qsizetype inLength);
+ using EncoderFn = char * (*)(char *out, QStringView in, State *state);
+ DecoderFn toUtf16 = nullptr;
+ LengthFn toUtf16Len = nullptr;
+ EncoderFn fromUtf16 = nullptr;
+ LengthFn fromUtf16Len = nullptr;
+ };
+
+ QSTRINGCONVERTER_CONSTEXPR QStringConverter(Encoding encoding, Flags f)
+ : iface(&encodingInterfaces[int(encoding)]), state(f)
+ {}
+ QSTRINGCONVERTER_CONSTEXPR QStringConverter(const Interface *i)
+ : iface(i)
+ {}
+
+public:
+ bool isValid() const { return iface != nullptr; }
+
+ void resetState()
+ {
+ state.clear();
+ }
+ bool hasError() const { return state.invalidChars != 0; }
+
+protected:
+ const Interface *iface;
+ State state;
+private:
+ Q_CORE_EXPORT static const Interface encodingInterfaces[Encoding::LastEncoding + 1];
+};
+
+class QStringEncoder : public QStringConverter
+{
+protected:
+ QSTRINGCONVERTER_CONSTEXPR QStringEncoder(const Interface *i)
+ : QStringConverter(i)
+ {}
+public:
+ // ### We shouldn't write a BOM by default. Need to resolve this
+ // while keeping compat with QTextCodec
+ QSTRINGCONVERTER_CONSTEXPR QStringEncoder(Encoding encoding, Flags flags = IgnoreHeader)
+ : QStringConverter(encoding, flags)
+ {}
+
+ QByteArray operator()(const QChar *in, qsizetype length)
+ { return (*this)(QStringView(in, length)); }
+
+ QByteArray operator()(QStringView in)
+ {
+ QByteArray result(iface->fromUtf16Len(in.size()), Qt::Uninitialized);
+ char *out = result.data();
+ // ### Fixme: needs to be moved into the conversion methods to honor the other flags
+ out = iface->fromUtf16(out, in, state.flags & Stateless ? nullptr : &state);
+ result.truncate(out - result.constData());
+ return result;
+ }
+};
+
+class QStringDecoder : public QStringConverter
+{
+protected:
+ QSTRINGCONVERTER_CONSTEXPR QStringDecoder(const Interface *i)
+ : QStringConverter(i)
+ {}
+public:
+ QSTRINGCONVERTER_CONSTEXPR QStringDecoder(Encoding encoding, Flags flags = DefaultConversion)
+ : QStringConverter(encoding, flags)
+ {}
+
+ QString operator()(const char *in, qsizetype length)
+ {
+ QString result(iface->toUtf16Len(length), Qt::Uninitialized);
+ QChar *out = result.data();
+ // ### Fixme: needs to be moved into the conversion methods
+ out = iface->toUtf16(out, in, length, state.flags & Stateless ? nullptr : &state);
+ result.truncate(out - result.constData());
+ return result;
+ }
+ QString operator()(const QByteArray &ba)
+ { return (*this)(ba.constData(), ba.size()); }
+ QString operator()(const char *chars)
+ { return (*this)(chars, strlen(chars)); }
+};
+
+QT_END_NAMESPACE
+
+#undef QSTRINGCONVERTER_CONSTEXPR
+
+#endif
diff --git a/src/corelib/text/text.pri b/src/corelib/text/text.pri
index 59ca5fe6d0..4c584cf958 100644
--- a/src/corelib/text/text.pri
+++ b/src/corelib/text/text.pri
@@ -19,6 +19,7 @@ HEADERS += \
text/qstringalgorithms.h \
text/qstringalgorithms_p.h \
text/qstringbuilder.h \
+ text/qstringconverter.h \
text/qstringiterator_p.h \
text/qstringlist.h \
text/qstringliteral.h \
@@ -39,6 +40,7 @@ SOURCES += \
text/qregexp.cpp \
text/qstring.cpp \
text/qstringbuilder.cpp \
+ text/qstringconverter.cpp \
text/qstringlist.cpp \
text/qstringview.cpp \
text/qtextboundaryfinder.cpp \
diff --git a/src/tools/bootstrap/.prev_CMakeLists.txt b/src/tools/bootstrap/.prev_CMakeLists.txt
index ae7773a165..8f430c494e 100644
--- a/src/tools/bootstrap/.prev_CMakeLists.txt
+++ b/src/tools/bootstrap/.prev_CMakeLists.txt
@@ -107,6 +107,7 @@ qt_add_module(Bootstrap
../../corelib/text/qstring.cpp
../../corelib/text/qstring_compat.cpp
../../corelib/text/qstringbuilder.cpp
+ ../../corelib/text/qstringconverter.cpp
../../corelib/text/qstringlist.cpp
../../corelib/text/qstringview.cpp
../../corelib/text/qvsnprintf.cpp
diff --git a/src/tools/bootstrap/CMakeLists.txt b/src/tools/bootstrap/CMakeLists.txt
index 68c1fabe6d..5a17888003 100644
--- a/src/tools/bootstrap/CMakeLists.txt
+++ b/src/tools/bootstrap/CMakeLists.txt
@@ -108,6 +108,7 @@ qt_extend_target(Bootstrap
../../corelib/text/qstring.cpp
../../corelib/text/qstring_compat.cpp
../../corelib/text/qstringbuilder.cpp
+ ../../corelib/text/qstringconverter.cpp
../../corelib/text/qstringlist.cpp
../../corelib/text/qstringview.cpp
../../corelib/text/qvsnprintf.cpp
diff --git a/src/tools/bootstrap/bootstrap.pro b/src/tools/bootstrap/bootstrap.pro
index a3ecd3ee5f..169c5fe1c2 100644
--- a/src/tools/bootstrap/bootstrap.pro
+++ b/src/tools/bootstrap/bootstrap.pro
@@ -93,6 +93,7 @@ SOURCES += \
../../corelib/text/qregularexpression.cpp \
../../corelib/text/qstring.cpp \
../../corelib/text/qstringbuilder.cpp \
+ ../../corelib/text/qstringconverter.cpp \
../../corelib/text/qstring_compat.cpp \
../../corelib/text/qstringlist.cpp \
../../corelib/text/qstringview.cpp \