diff options
author | Lars Knoll <lars.knoll@qt.io> | 2020-04-30 15:06:27 +0200 |
---|---|---|
committer | Lars Knoll <lars.knoll@qt.io> | 2020-05-14 07:51:32 +0200 |
commit | 1a6bf6c549bcad78f7918dc53c48e5db4fdf4c3d (patch) | |
tree | f0aef1e52795bb4b0d11a6ea69c690909c6e9cd7 /src/xml/sax/qxml.cpp | |
parent | b88720eb82056f5435f48dff728572126c04ca6a (diff) |
Remove QTextCodec dependency in the old SAX parser
Just so we can get this cleaned up as well and remove it from
Qt Core.
Change-Id: I2b5b821b039ce2c024ec3cb7338a1a9becdd2157
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/xml/sax/qxml.cpp')
-rw-r--r-- | src/xml/sax/qxml.cpp | 90 |
1 files changed, 26 insertions, 64 deletions
diff --git a/src/xml/sax/qxml.cpp b/src/xml/sax/qxml.cpp index 1898f4fbe8..d1af7e42da 100644 --- a/src/xml/sax/qxml.cpp +++ b/src/xml/sax/qxml.cpp @@ -244,9 +244,7 @@ public: int pos; int length; bool nextReturnedEndOfData; -#if QT_CONFIG(textcodec) - QTextDecoder *encMapper; -#endif + QStringDecoder toUnicode; QByteArray encodingDeclBytes; QString encodingDeclChars; @@ -1090,9 +1088,6 @@ void QXmlInputSource::init() d->inputStream = nullptr; setData(QString()); -#if QT_CONFIG(textcodec) - d->encMapper = nullptr; -#endif d->nextReturnedEndOfData = true; // first call to next() will call fetchData() d->encodingDeclBytes.clear(); @@ -1136,9 +1131,6 @@ QXmlInputSource::QXmlInputSource(QIODevice *dev) QXmlInputSource::~QXmlInputSource() { // ### close the input device. -#if QT_CONFIG(textcodec) - delete d->encMapper; -#endif delete d; } @@ -1356,77 +1348,47 @@ QString QXmlInputSource::fromRawData(const QByteArray &data, bool beginning) { if (data.size() == 0) return QString(); - if (beginning) { - delete d->encMapper; - d->encMapper = nullptr; - } - int mib = 106; // UTF-8 + if (beginning) + d->toUnicode = QStringDecoder(); // This is the initial UTF codec we will read the encoding declaration with - if (d->encMapper == nullptr) { + if (!d->toUnicode.isValid()) { d->encodingDeclBytes.clear(); d->encodingDeclChars.clear(); d->lookingForEncodingDecl = true; - // look for byte order mark and read the first 5 characters - if (data.size() >= 4) { - uchar ch1 = data.at(0); - uchar ch2 = data.at(1); - uchar ch3 = data.at(2); - uchar ch4 = data.at(3); - - if ((ch1 == 0 && ch2 == 0 && ch3 == 0xfe && ch4 == 0xff) || - (ch1 == 0xff && ch2 == 0xfe && ch3 == 0 && ch4 == 0)) - mib = 1017; // UTF-32 with byte order mark - else if (ch1 == 0x3c && ch2 == 0x00 && ch3 == 0x00 && ch4 == 0x00) - mib = 1019; // UTF-32LE - else if (ch1 == 0x00 && ch2 == 0x00 && ch3 == 0x00 && ch4 == 0x3c) - mib = 1018; // UTF-32BE - } - if (mib == 106 && data.size() >= 2) { - uchar ch1 = data.at(0); - uchar ch2 = data.at(1); - - if ((ch1 == 0xfe && ch2 == 0xff) || (ch1 == 0xff && ch2 == 0xfe)) - mib = 1015; // UTF-16 with byte order mark - else if (ch1 == 0x3c && ch2 == 0x00) - mib = 1014; // UTF-16LE - else if (ch1 == 0x00 && ch2 == 0x3c) - mib = 1013; // UTF-16BE - } - - QTextCodec *codec = QTextCodec::codecForMib(mib); - Q_ASSERT(codec); - - d->encMapper = codec->makeDecoder(); + auto encoding = QStringConverter::encodingForData(data.constData(), data.size(), char16_t('<')); + if (encoding) { + d->lookingForEncodingDecl = false; + d->toUnicode = QStringDecoder(*encoding); + } else { + d->toUnicode = QStringDecoder(QStringDecoder::Utf8); + } } - QString input = d->encMapper->toUnicode(data.constData(), data.size()); + QString input = d->toUnicode(data.constData(), data.size()); if (d->lookingForEncodingDecl) { d->encodingDeclChars += input; bool needMoreText; - QString encoding = extractEncodingDecl(d->encodingDeclChars, &needMoreText); + QByteArray encoding = extractEncodingDecl(d->encodingDeclChars, &needMoreText).toLatin1(); if (!encoding.isEmpty()) { - if (QTextCodec *codec = QTextCodec::codecForName(std::move(encoding).toLatin1())) { - /* If the encoding is the same, we don't have to do toUnicode() all over again. */ - if(codec->mibEnum() != mib) { - delete d->encMapper; - d->encMapper = codec->makeDecoder(); - - /* The variable input can potentially be large, so we deallocate - * it before calling toUnicode() in order to avoid having two - * large QStrings in memory simultaneously. */ - input.clear(); - - // prime the decoder with the data so far - d->encMapper->toUnicode(d->encodingDeclBytes.constData(), d->encodingDeclBytes.size()); - // now feed it the new data - input = d->encMapper->toUnicode(data.constData(), data.size()); - } + auto e = QStringDecoder::encodingForData(encoding.constData(), encoding.size()); + if (e && *e != QStringDecoder::Utf8) { + d->toUnicode = QStringDecoder(*e); + + /* The variable input can potentially be large, so we deallocate + * it before calling toUnicode() in order to avoid having two + * large QStrings in memory simultaneously. */ + input.clear(); + + // prime the decoder with the data so far + d->toUnicode(d->encodingDeclBytes.constData(), d->encodingDeclBytes.size()); + // now feed it the new data + input = d->toUnicode(data.constData(), data.size()); } } |