summaryrefslogtreecommitdiffstats
path: root/src/xml/sax/qxml.cpp
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-04-30 15:06:27 +0200
committerLars Knoll <lars.knoll@qt.io>2020-05-14 07:51:32 +0200
commit1a6bf6c549bcad78f7918dc53c48e5db4fdf4c3d (patch)
treef0aef1e52795bb4b0d11a6ea69c690909c6e9cd7 /src/xml/sax/qxml.cpp
parentb88720eb82056f5435f48dff728572126c04ca6a (diff)
Remove QTextCodec dependency in the old SAX parser
Just so we can get this cleaned up as well and remove it from Qt Core. Change-Id: I2b5b821b039ce2c024ec3cb7338a1a9becdd2157 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/xml/sax/qxml.cpp')
-rw-r--r--src/xml/sax/qxml.cpp90
1 files changed, 26 insertions, 64 deletions
diff --git a/src/xml/sax/qxml.cpp b/src/xml/sax/qxml.cpp
index 1898f4fbe8..d1af7e42da 100644
--- a/src/xml/sax/qxml.cpp
+++ b/src/xml/sax/qxml.cpp
@@ -244,9 +244,7 @@ public:
int pos;
int length;
bool nextReturnedEndOfData;
-#if QT_CONFIG(textcodec)
- QTextDecoder *encMapper;
-#endif
+ QStringDecoder toUnicode;
QByteArray encodingDeclBytes;
QString encodingDeclChars;
@@ -1090,9 +1088,6 @@ void QXmlInputSource::init()
d->inputStream = nullptr;
setData(QString());
-#if QT_CONFIG(textcodec)
- d->encMapper = nullptr;
-#endif
d->nextReturnedEndOfData = true; // first call to next() will call fetchData()
d->encodingDeclBytes.clear();
@@ -1136,9 +1131,6 @@ QXmlInputSource::QXmlInputSource(QIODevice *dev)
QXmlInputSource::~QXmlInputSource()
{
// ### close the input device.
-#if QT_CONFIG(textcodec)
- delete d->encMapper;
-#endif
delete d;
}
@@ -1356,77 +1348,47 @@ QString QXmlInputSource::fromRawData(const QByteArray &data, bool beginning)
{
if (data.size() == 0)
return QString();
- if (beginning) {
- delete d->encMapper;
- d->encMapper = nullptr;
- }
- int mib = 106; // UTF-8
+ if (beginning)
+ d->toUnicode = QStringDecoder();
// This is the initial UTF codec we will read the encoding declaration with
- if (d->encMapper == nullptr) {
+ if (!d->toUnicode.isValid()) {
d->encodingDeclBytes.clear();
d->encodingDeclChars.clear();
d->lookingForEncodingDecl = true;
- // look for byte order mark and read the first 5 characters
- if (data.size() >= 4) {
- uchar ch1 = data.at(0);
- uchar ch2 = data.at(1);
- uchar ch3 = data.at(2);
- uchar ch4 = data.at(3);
-
- if ((ch1 == 0 && ch2 == 0 && ch3 == 0xfe && ch4 == 0xff) ||
- (ch1 == 0xff && ch2 == 0xfe && ch3 == 0 && ch4 == 0))
- mib = 1017; // UTF-32 with byte order mark
- else if (ch1 == 0x3c && ch2 == 0x00 && ch3 == 0x00 && ch4 == 0x00)
- mib = 1019; // UTF-32LE
- else if (ch1 == 0x00 && ch2 == 0x00 && ch3 == 0x00 && ch4 == 0x3c)
- mib = 1018; // UTF-32BE
- }
- if (mib == 106 && data.size() >= 2) {
- uchar ch1 = data.at(0);
- uchar ch2 = data.at(1);
-
- if ((ch1 == 0xfe && ch2 == 0xff) || (ch1 == 0xff && ch2 == 0xfe))
- mib = 1015; // UTF-16 with byte order mark
- else if (ch1 == 0x3c && ch2 == 0x00)
- mib = 1014; // UTF-16LE
- else if (ch1 == 0x00 && ch2 == 0x3c)
- mib = 1013; // UTF-16BE
- }
-
- QTextCodec *codec = QTextCodec::codecForMib(mib);
- Q_ASSERT(codec);
-
- d->encMapper = codec->makeDecoder();
+ auto encoding = QStringConverter::encodingForData(data.constData(), data.size(), char16_t('<'));
+ if (encoding) {
+ d->lookingForEncodingDecl = false;
+ d->toUnicode = QStringDecoder(*encoding);
+ } else {
+ d->toUnicode = QStringDecoder(QStringDecoder::Utf8);
+ }
}
- QString input = d->encMapper->toUnicode(data.constData(), data.size());
+ QString input = d->toUnicode(data.constData(), data.size());
if (d->lookingForEncodingDecl) {
d->encodingDeclChars += input;
bool needMoreText;
- QString encoding = extractEncodingDecl(d->encodingDeclChars, &needMoreText);
+ QByteArray encoding = extractEncodingDecl(d->encodingDeclChars, &needMoreText).toLatin1();
if (!encoding.isEmpty()) {
- if (QTextCodec *codec = QTextCodec::codecForName(std::move(encoding).toLatin1())) {
- /* If the encoding is the same, we don't have to do toUnicode() all over again. */
- if(codec->mibEnum() != mib) {
- delete d->encMapper;
- d->encMapper = codec->makeDecoder();
-
- /* The variable input can potentially be large, so we deallocate
- * it before calling toUnicode() in order to avoid having two
- * large QStrings in memory simultaneously. */
- input.clear();
-
- // prime the decoder with the data so far
- d->encMapper->toUnicode(d->encodingDeclBytes.constData(), d->encodingDeclBytes.size());
- // now feed it the new data
- input = d->encMapper->toUnicode(data.constData(), data.size());
- }
+ auto e = QStringDecoder::encodingForData(encoding.constData(), encoding.size());
+ if (e && *e != QStringDecoder::Utf8) {
+ d->toUnicode = QStringDecoder(*e);
+
+ /* The variable input can potentially be large, so we deallocate
+ * it before calling toUnicode() in order to avoid having two
+ * large QStrings in memory simultaneously. */
+ input.clear();
+
+ // prime the decoder with the data so far
+ d->toUnicode(d->encodingDeclBytes.constData(), d->encodingDeclBytes.size());
+ // now feed it the new data
+ input = d->toUnicode(data.constData(), data.size());
}
}