diff options
author | Lars Knoll <lars.knoll@qt.io> | 2020-04-30 10:50:44 +0200 |
---|---|---|
committer | Lars Knoll <lars.knoll@qt.io> | 2020-05-14 07:46:14 +0200 |
commit | f64a6bd638d399403845fe52e6f8e52889f1f52b (patch) | |
tree | f3eda565c343b1b24f8711bc87e973910ec788f4 /src | |
parent | f437c8c5f9772a0d73c8772b64fea6133d43bcc7 (diff) |
Start work on a new API to replace QTextCodec
The new QStringEncoder and QStringDecoder classes
(with a common QStringConverter base class) are
there to replace QTextCodec in Qt 6.
It currently uses a trivial wrapper around the utf
encoding functionality.
Added some autotests, mostly copied from the text codec
tests.
Change-Id: Ib6eeee55fba918b9424be244cbda9dfd5096f7eb
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/corelib/codecs/qtextcodec.cpp | 20 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec.h | 33 | ||||
-rw-r--r-- | src/corelib/serialization/qtextstream.cpp | 2 | ||||
-rw-r--r-- | src/corelib/text/qstringconverter.cpp | 186 | ||||
-rw-r--r-- | src/corelib/text/qstringconverter.h | 195 | ||||
-rw-r--r-- | src/corelib/text/text.pri | 2 | ||||
-rw-r--r-- | src/tools/bootstrap/.prev_CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/tools/bootstrap/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/tools/bootstrap/bootstrap.pro | 1 |
9 files changed, 392 insertions, 49 deletions
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp index 8fef9ccfba..72f9dcc0a8 100644 --- a/src/corelib/codecs/qtextcodec.cpp +++ b/src/corelib/codecs/qtextcodec.cpp @@ -339,26 +339,6 @@ static void setup() {} */ /*! - Destroys the ConverterState object. -*/ -QTextCodec::ConverterState::~ConverterState() -{ - clear(); -} - -void QTextCodec::ConverterState::clear() -{ - if (clearFn) - clearFn(this); - remainingChars = 0; - invalidChars = 0; - state_data[0] = 0; - state_data[1] = 0; - state_data[2] = 0; - state_data[3] = 0; -} - -/*! \class QTextCodec \inmodule QtCore \brief The QTextCodec class provides conversions between text encodings. diff --git a/src/corelib/codecs/qtextcodec.h b/src/corelib/codecs/qtextcodec.h index 5b502d2a98..03e8f95ce1 100644 --- a/src/corelib/codecs/qtextcodec.h +++ b/src/corelib/codecs/qtextcodec.h @@ -40,7 +40,7 @@ #ifndef QTEXTCODEC_H #define QTEXTCODEC_H -#include <QtCore/qstring.h> +#include <QtCore/qstringconverter.h> #include <QtCore/qlist.h> QT_REQUIRE_CONFIG(textcodec); @@ -53,10 +53,13 @@ class QIODevice; class QTextDecoder; class QTextEncoder; -class Q_CORE_EXPORT QTextCodec +class Q_CORE_EXPORT QTextCodec : public QStringConverterBase { Q_DISABLE_COPY(QTextCodec) public: + using ConversionFlags = QStringConverterBase::Flags; + using ConverterState = QStringConverterBase::State; + static QTextCodec* codecForName(const QByteArray &name); static QTextCodec* codecForName(const char *name) { return codecForName(QByteArray(name)); } static QTextCodec* codecForMib(int mib); @@ -89,31 +92,6 @@ public: QByteArray fromUnicode(const QString& uc) const; #endif QByteArray fromUnicode(QStringView uc) const; - enum ConversionFlag { - DefaultConversion, - ConvertInvalidToNull = 0x80000000, - IgnoreHeader = 0x1 - }; - Q_DECLARE_FLAGS(ConversionFlags, ConversionFlag) - - struct Q_CORE_EXPORT ConverterState { - ConverterState(ConversionFlags f = DefaultConversion) - : flags(f), state_data{0, 0, 0, 0} {} - ~ConverterState(); - ConversionFlags flags; - int remainingChars = 0; - int invalidChars = 0; - - union { - uint state_data[4]; - void *d[2]; - }; - void clear(); - using ClearDataFn = void (*)(ConverterState *); - ClearDataFn clearFn = nullptr; - private: - Q_DISABLE_COPY(ConverterState) - }; QString toUnicode(const char *in, int length, ConverterState *state = nullptr) const { return convertToUnicode(in, length, state); } @@ -137,7 +115,6 @@ protected: private: friend struct QCoreGlobalData; }; -Q_DECLARE_OPERATORS_FOR_FLAGS(QTextCodec::ConversionFlags) class Q_CORE_EXPORT QTextEncoder { Q_DISABLE_COPY(QTextEncoder) diff --git a/src/corelib/serialization/qtextstream.cpp b/src/corelib/serialization/qtextstream.cpp index cb72b28511..a970397419 100644 --- a/src/corelib/serialization/qtextstream.cpp +++ b/src/corelib/serialization/qtextstream.cpp @@ -355,7 +355,7 @@ QTextStreamPrivate::~QTextStreamPrivate() #if QT_CONFIG(textcodec) static void resetCodecConverterStateHelper(QTextCodec::ConverterState *state) { - state->~ConverterState(); + state->~State(); new (state) QTextCodec::ConverterState; } diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp new file mode 100644 index 0000000000..1f61eee5cb --- /dev/null +++ b/src/corelib/text/qstringconverter.cpp @@ -0,0 +1,186 @@ +/**************************************************************************** +** +** Copyright (C) 2020 The Qt Company Ltd. +** Copyright (C) 2018 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include <qstringconverter.h> +#include <private/qutfcodec_p.h> + +QT_BEGIN_NAMESPACE + +/*! + \enum QStringConverter::Flag + + \value DefaultConversion No flag is set. + \value ConvertInvalidToNull If this flag is set, each invalid input + character is output as a null character. + \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any. + + \value Stateless Ignore possible converter states between different function calls + to encode or decode strings. +*/ + + +void QStringConverter::State::clear() +{ + if (clearFn) + clearFn(this); + state_data[0] = state_data[1] = state_data[2] = state_data[3] = 0; + remainingChars = 0; + invalidChars = 0; +} + +static QChar *fromUtf8(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +{ + QString s = QUtf8::convertToUnicode(in, length, state); + memcpy(out, s.constData(), s.length()*sizeof(QChar)); + return out + s.length(); +} + +static char *toUtf8(char *out, QStringView in, QStringConverter::State *state) +{ + QByteArray s = QUtf8::convertFromUnicode(in.data(), in.length(), state); + memcpy(out, s.constData(), s.length()); + return out + s.length(); +} + +static QChar *fromUtf16(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +{ + QString s = QUtf16::convertToUnicode(in, length, state); + memcpy(out, s.constData(), s.length()*sizeof(QChar)); + return out + s.length(); +} + +static char *toUtf16(char *out, QStringView in, QStringConverter::State *state) +{ + QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state); + memcpy(out, s.constData(), s.length()); + return out + s.length(); +} + +static QChar *fromUtf16BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +{ + QString s = QUtf16::convertToUnicode(in, length, state, BigEndianness); + memcpy(out, s.constData(), s.length()*sizeof(QChar)); + return out + s.length(); +} + +static char *toUtf16BE(char *out, QStringView in, QStringConverter::State *state) +{ + QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state, BigEndianness); + memcpy(out, s.constData(), s.length()); + return out + s.length(); +} + +static QChar *fromUtf16LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +{ + QString s = QUtf16::convertToUnicode(in, length, state, LittleEndianness); + memcpy(out, s.constData(), s.length()*sizeof(QChar)); + return out + s.length(); +} + +static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state) +{ + QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state, LittleEndianness); + memcpy(out, s.constData(), s.length()); + return out + s.length(); +} + +static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +{ + QString s = QUtf32::convertToUnicode(in, length, state); + memcpy(out, s.constData(), s.length()*sizeof(QChar)); + return out + s.length(); +} + +static char *toUtf32(char *out, QStringView in, QStringConverter::State *state) +{ + QByteArray s = QUtf32::convertFromUnicode(in.data(), in.length(), state); + memcpy(out, s.constData(), s.length()); + return out + s.length(); +} + +static QChar *fromUtf32BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +{ + QString s = QUtf32::convertToUnicode(in, length, state, BigEndianness); + memcpy(out, s.constData(), s.length()*sizeof(QChar)); + return out + s.length(); +} + +static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state) +{ + QByteArray s = QUtf32::convertFromUnicode(in.data(), in.length(), state, BigEndianness); + memcpy(out, s.constData(), s.length()); + return out + s.length(); +} + +static QChar *fromUtf32LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +{ + QString s = QUtf32::convertToUnicode(in, length, state, LittleEndianness); + memcpy(out, s.constData(), s.length()*sizeof(QChar)); + return out + s.length(); +} + +static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state) +{ + QByteArray s = QUtf32::convertFromUnicode(in.data(), in.length(), state, LittleEndianness); + memcpy(out, s.constData(), s.length()); + return out + s.length(); +} + +static qsizetype fromUtf8Len(qsizetype l) { return l + 1; } +static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); } + +static qsizetype fromUtf16Len(qsizetype l) { return l/2 + 2; } +static qsizetype toUtf16Len(qsizetype l) { return 2*(l + 1); } + +static qsizetype fromUtf32Len(qsizetype l) { return l + 1; } +static qsizetype toUtf32Len(qsizetype l) { return 4*(l + 1); } + +const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringConverter::LastEncoding + 1] = +{ + { fromUtf8, fromUtf8Len, toUtf8, toUtf8Len }, + { fromUtf16, fromUtf16Len, toUtf16, toUtf16Len }, + { fromUtf16LE, fromUtf16Len, toUtf16LE, toUtf16Len }, + { fromUtf16BE, fromUtf16Len, toUtf16BE, toUtf16Len }, + { fromUtf32, fromUtf32Len, toUtf32, toUtf32Len }, + { fromUtf32LE, fromUtf32Len, toUtf32LE, toUtf32Len }, + { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len } +}; + +QT_END_NAMESPACE diff --git a/src/corelib/text/qstringconverter.h b/src/corelib/text/qstringconverter.h new file mode 100644 index 0000000000..e91975b70f --- /dev/null +++ b/src/corelib/text/qstringconverter.h @@ -0,0 +1,195 @@ +/**************************************************************************** +** +** Copyright (C) 2020 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QSTRINGCONVERTER_H +#define QSTRINGCONVERTER_H + +#include <QtCore/qstring.h> + +QT_BEGIN_NAMESPACE + +// work around a compiler bug in GCC 7 +#if defined(Q_CC_GNU) && __GNUC__ == 7 +#define QSTRINGCONVERTER_CONSTEXPR +#else +#define QSTRINGCONVERTER_CONSTEXPR constexpr +#endif + +class QStringConverterBase +{ +public: + enum Flag { + DefaultConversion, + ConvertInvalidToNull = 0x1, + IgnoreHeader = 0x2, + Stateless = 0x4 + }; + Q_DECLARE_FLAGS(Flags, Flag) + + struct State { + constexpr State(Flags f = DefaultConversion) + : flags(f), state_data{0, 0, 0, 0} {} + ~State() { clear(); } + Q_CORE_EXPORT void clear(); + + Flags flags; + qsizetype remainingChars = 0; + qsizetype invalidChars = 0; + + union { + uint state_data[4]; + void *d[2]; + }; + using ClearDataFn = void (*)(State *); + ClearDataFn clearFn = nullptr; + private: + Q_DISABLE_COPY(State) + }; +}; +Q_DECLARE_OPERATORS_FOR_FLAGS(QStringConverterBase::Flags) + +class QStringConverter : public QStringConverterBase +{ +public: + + enum Encoding { + Utf8, + Utf16, + Utf16LE, + Utf16BE, + Utf32, + Utf32LE, + Utf32BE, + LastEncoding = Utf32BE + }; +protected: + + struct Interface + { + // ### FIXME: need a QByteArrayView + using DecoderFn = QChar * (*)(QChar *out, const char *in, qsizetype length, State *state); + using LengthFn = qsizetype (*)(qsizetype inLength); + using EncoderFn = char * (*)(char *out, QStringView in, State *state); + DecoderFn toUtf16 = nullptr; + LengthFn toUtf16Len = nullptr; + EncoderFn fromUtf16 = nullptr; + LengthFn fromUtf16Len = nullptr; + }; + + QSTRINGCONVERTER_CONSTEXPR QStringConverter(Encoding encoding, Flags f) + : iface(&encodingInterfaces[int(encoding)]), state(f) + {} + QSTRINGCONVERTER_CONSTEXPR QStringConverter(const Interface *i) + : iface(i) + {} + +public: + bool isValid() const { return iface != nullptr; } + + void resetState() + { + state.clear(); + } + bool hasError() const { return state.invalidChars != 0; } + +protected: + const Interface *iface; + State state; +private: + Q_CORE_EXPORT static const Interface encodingInterfaces[Encoding::LastEncoding + 1]; +}; + +class QStringEncoder : public QStringConverter +{ +protected: + QSTRINGCONVERTER_CONSTEXPR QStringEncoder(const Interface *i) + : QStringConverter(i) + {} +public: + // ### We shouldn't write a BOM by default. Need to resolve this + // while keeping compat with QTextCodec + QSTRINGCONVERTER_CONSTEXPR QStringEncoder(Encoding encoding, Flags flags = IgnoreHeader) + : QStringConverter(encoding, flags) + {} + + QByteArray operator()(const QChar *in, qsizetype length) + { return (*this)(QStringView(in, length)); } + + QByteArray operator()(QStringView in) + { + QByteArray result(iface->fromUtf16Len(in.size()), Qt::Uninitialized); + char *out = result.data(); + // ### Fixme: needs to be moved into the conversion methods to honor the other flags + out = iface->fromUtf16(out, in, state.flags & Stateless ? nullptr : &state); + result.truncate(out - result.constData()); + return result; + } +}; + +class QStringDecoder : public QStringConverter +{ +protected: + QSTRINGCONVERTER_CONSTEXPR QStringDecoder(const Interface *i) + : QStringConverter(i) + {} +public: + QSTRINGCONVERTER_CONSTEXPR QStringDecoder(Encoding encoding, Flags flags = DefaultConversion) + : QStringConverter(encoding, flags) + {} + + QString operator()(const char *in, qsizetype length) + { + QString result(iface->toUtf16Len(length), Qt::Uninitialized); + QChar *out = result.data(); + // ### Fixme: needs to be moved into the conversion methods + out = iface->toUtf16(out, in, length, state.flags & Stateless ? nullptr : &state); + result.truncate(out - result.constData()); + return result; + } + QString operator()(const QByteArray &ba) + { return (*this)(ba.constData(), ba.size()); } + QString operator()(const char *chars) + { return (*this)(chars, strlen(chars)); } +}; + +QT_END_NAMESPACE + +#undef QSTRINGCONVERTER_CONSTEXPR + +#endif diff --git a/src/corelib/text/text.pri b/src/corelib/text/text.pri index 59ca5fe6d0..4c584cf958 100644 --- a/src/corelib/text/text.pri +++ b/src/corelib/text/text.pri @@ -19,6 +19,7 @@ HEADERS += \ text/qstringalgorithms.h \ text/qstringalgorithms_p.h \ text/qstringbuilder.h \ + text/qstringconverter.h \ text/qstringiterator_p.h \ text/qstringlist.h \ text/qstringliteral.h \ @@ -39,6 +40,7 @@ SOURCES += \ text/qregexp.cpp \ text/qstring.cpp \ text/qstringbuilder.cpp \ + text/qstringconverter.cpp \ text/qstringlist.cpp \ text/qstringview.cpp \ text/qtextboundaryfinder.cpp \ diff --git a/src/tools/bootstrap/.prev_CMakeLists.txt b/src/tools/bootstrap/.prev_CMakeLists.txt index ae7773a165..8f430c494e 100644 --- a/src/tools/bootstrap/.prev_CMakeLists.txt +++ b/src/tools/bootstrap/.prev_CMakeLists.txt @@ -107,6 +107,7 @@ qt_add_module(Bootstrap ../../corelib/text/qstring.cpp ../../corelib/text/qstring_compat.cpp ../../corelib/text/qstringbuilder.cpp + ../../corelib/text/qstringconverter.cpp ../../corelib/text/qstringlist.cpp ../../corelib/text/qstringview.cpp ../../corelib/text/qvsnprintf.cpp diff --git a/src/tools/bootstrap/CMakeLists.txt b/src/tools/bootstrap/CMakeLists.txt index 68c1fabe6d..5a17888003 100644 --- a/src/tools/bootstrap/CMakeLists.txt +++ b/src/tools/bootstrap/CMakeLists.txt @@ -108,6 +108,7 @@ qt_extend_target(Bootstrap ../../corelib/text/qstring.cpp ../../corelib/text/qstring_compat.cpp ../../corelib/text/qstringbuilder.cpp + ../../corelib/text/qstringconverter.cpp ../../corelib/text/qstringlist.cpp ../../corelib/text/qstringview.cpp ../../corelib/text/qvsnprintf.cpp diff --git a/src/tools/bootstrap/bootstrap.pro b/src/tools/bootstrap/bootstrap.pro index a3ecd3ee5f..169c5fe1c2 100644 --- a/src/tools/bootstrap/bootstrap.pro +++ b/src/tools/bootstrap/bootstrap.pro @@ -93,6 +93,7 @@ SOURCES += \ ../../corelib/text/qregularexpression.cpp \ ../../corelib/text/qstring.cpp \ ../../corelib/text/qstringbuilder.cpp \ + ../../corelib/text/qstringconverter.cpp \ ../../corelib/text/qstring_compat.cpp \ ../../corelib/text/qstringlist.cpp \ ../../corelib/text/qstringview.cpp \ |