diff options
author | Slava Monich <slava.monich@jolla.com> | 2020-07-26 03:03:44 +0300 |
---|---|---|
committer | Chris Adams <chris.adams@qinetic.com.au> | 2023-11-08 19:13:22 +1000 |
commit | 64520d1e87718be1410e99746d3045eed0dc07b4 (patch) | |
tree | 1b113cd16f5d5651fac2ccf8f38130e19225d3b1 /src/libraries/qmfclient/qmailmessage.cpp | |
parent | a32ca6254da5056e874965976c2ab3dc2620c23c (diff) |
Handle encoded-word's containing partial characters
That's actually a violation of the spec (RFC 2047):
https://tools.ietf.org/html/rfc2047#section-5
Each 'encoded-word' MUST represent an integral number of characters.
A multi-octet character may not be split across adjacent 'encoded-
word's.
but if charsets match, the data can be concatenated first and then
converted to characters. Which is what all other email clients seem
to be doing - I haven't found a single one which wouldn't handle it.
Change-Id: I5bbb785343ad60459fa6e52f5728fd796c7e5961
Reviewed-by: Pekka Vuorela <pvuorela@iki.fi>
Reviewed-by: Christopher Adams <chris.adams@qinetic.com.au>
Diffstat (limited to 'src/libraries/qmfclient/qmailmessage.cpp')
-rw-r--r-- | src/libraries/qmfclient/qmailmessage.cpp | 56 |
1 files changed, 38 insertions, 18 deletions
diff --git a/src/libraries/qmfclient/qmailmessage.cpp b/src/libraries/qmfclient/qmailmessage.cpp index 63ee459d..85cbe9b3 100644 --- a/src/libraries/qmfclient/qmailmessage.cpp +++ b/src/libraries/qmfclient/qmailmessage.cpp @@ -333,9 +333,10 @@ static QMailCodec* codecForEncoding(QMailMessageBody::TransferEncoding te, const } // Needs an encoded word of the form =?charset?q?word?= -static QString decodeWord(const QByteArray& encodedWord) +// Returns text and charset as QPair<QByteArray, QByteArray> +static QPair<QByteArray, QByteArray> encodedText(const QByteArray& encodedWord) { - QString result; + QPair<QByteArray, QByteArray> result; int index[4]; // Find the parts of the input @@ -356,12 +357,12 @@ static QString decodeWord(const QByteArray& encodedWord) if (encoding == "Q") { QMailQuotedPrintableCodec codec(QMailQuotedPrintableCodec::Text, QMailQuotedPrintableCodec::Rfc2047); - return codec.decode(encoded, charset); + result = qMakePair(codec.decode(encoded), charset); } else if (encoding == "B") { QMailBase64Codec codec(QMailBase64Codec::Binary); - return codec.decode(encoded, charset); + result = qMakePair(codec.decode(encoded), charset); } } } @@ -505,42 +506,61 @@ static QByteArray encodeWord(const QString &text, const QByteArray& cs, bool* en return to7BitAscii(text); } -static QString decodeWordSequence(const QByteArray& str) +static void convertAndAppend(QString& str, const QByteArray& bytes, const QByteArray& charset) { - QRegularExpression whitespace(QLatin1String("^\\s+$")); + if (!bytes.isEmpty()) { + QTextCodec* codec = QMailCodec::codecForName(charset); + if (!codec) { + codec = QTextCodec::codecForUtfText(bytes, QMailCodec::codecForName("UTF-8")); + } + str.append(codec->toUnicode(bytes)); + } +} +static QString decodeWordSequence(const QByteArray& str) +{ QString out; + QString latin1Str(QString::fromLatin1(str.constData(), str.length())); + QByteArray lastCharset; + QByteArray encodedBuf; + int pos = 0; + int lastPos = 0; // From RFC 2047 // encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" QRegularExpression encodedWord(QLatin1String("\"?=\\?[^\\s\\?]+\\?[^\\s\\?]+\\?[^\\s\\?]*\\?=\"?")); - - int pos = 0; - int lastPos = 0; - QString latin1Str(QString::fromLatin1(str.constData(), str.length())); + QRegularExpression whitespace(QLatin1String("^\\s+$")); QRegularExpressionMatchIterator it = encodedWord.globalMatch(latin1Str); - while (it.hasNext()) { QRegularExpressionMatch match = it.next(); pos = match.capturedStart(); if (pos != -1) { - int endPos = pos + match.capturedLength(); + const int endPos = pos + match.capturedLength(); + QPair<QByteArray, QByteArray> textAndCharset(encodedText(str.mid(pos, (endPos - pos)))); QString preceding(QString::fromLatin1(str.mid(lastPos, (pos - lastPos)))); - QString decoded = decodeWord(str.mid(pos, (endPos - pos))); // If there is only whitespace between two encoded words, it should not be included - if (!whitespace.match(preceding).hasMatch()) - out.append(preceding); - - out.append(decoded); + const bool precedingWhitespaceOrEmpty = (preceding.isEmpty() || whitespace.match(preceding).hasMatch()); + if ((lastCharset.isEmpty() || lastCharset == textAndCharset.second) && precedingWhitespaceOrEmpty) { + encodedBuf.append(textAndCharset.first); + } else { + convertAndAppend(out, encodedBuf, textAndCharset.second); + if (!precedingWhitespaceOrEmpty) { + out.append(preceding); + } + encodedBuf = textAndCharset.first; + } - lastPos = endPos; + lastCharset = textAndCharset.second; + pos = endPos; + lastPos = pos; } } // Copy anything left + convertAndAppend(out, encodedBuf, lastCharset); out.append(QString::fromLatin1(str.mid(lastPos))); return out; } |