summaryrefslogtreecommitdiffstats
path: root/src/libraries/qmfclient/qmailmessage.cpp
diff options
context:
space:
mode:
authorSlava Monich <slava.monich@jolla.com>2020-07-26 03:03:44 +0300
committerChris Adams <chris.adams@qinetic.com.au>2023-11-08 19:13:22 +1000
commit64520d1e87718be1410e99746d3045eed0dc07b4 (patch)
tree1b113cd16f5d5651fac2ccf8f38130e19225d3b1 /src/libraries/qmfclient/qmailmessage.cpp
parenta32ca6254da5056e874965976c2ab3dc2620c23c (diff)
Handle encoded-word's containing partial characters
That's actually a violation of the spec (RFC 2047): https://tools.ietf.org/html/rfc2047#section-5 Each 'encoded-word' MUST represent an integral number of characters. A multi-octet character may not be split across adjacent 'encoded- word's. but if charsets match, the data can be concatenated first and then converted to characters. Which is what all other email clients seem to be doing - I haven't found a single one which wouldn't handle it. Change-Id: I5bbb785343ad60459fa6e52f5728fd796c7e5961 Reviewed-by: Pekka Vuorela <pvuorela@iki.fi> Reviewed-by: Christopher Adams <chris.adams@qinetic.com.au>
Diffstat (limited to 'src/libraries/qmfclient/qmailmessage.cpp')
-rw-r--r--src/libraries/qmfclient/qmailmessage.cpp56
1 files changed, 38 insertions, 18 deletions
diff --git a/src/libraries/qmfclient/qmailmessage.cpp b/src/libraries/qmfclient/qmailmessage.cpp
index 63ee459d..85cbe9b3 100644
--- a/src/libraries/qmfclient/qmailmessage.cpp
+++ b/src/libraries/qmfclient/qmailmessage.cpp
@@ -333,9 +333,10 @@ static QMailCodec* codecForEncoding(QMailMessageBody::TransferEncoding te, const
}
// Needs an encoded word of the form =?charset?q?word?=
-static QString decodeWord(const QByteArray& encodedWord)
+// Returns text and charset as QPair<QByteArray, QByteArray>
+static QPair<QByteArray, QByteArray> encodedText(const QByteArray& encodedWord)
{
- QString result;
+ QPair<QByteArray, QByteArray> result;
int index[4];
// Find the parts of the input
@@ -356,12 +357,12 @@ static QString decodeWord(const QByteArray& encodedWord)
if (encoding == "Q")
{
QMailQuotedPrintableCodec codec(QMailQuotedPrintableCodec::Text, QMailQuotedPrintableCodec::Rfc2047);
- return codec.decode(encoded, charset);
+ result = qMakePair(codec.decode(encoded), charset);
}
else if (encoding == "B")
{
QMailBase64Codec codec(QMailBase64Codec::Binary);
- return codec.decode(encoded, charset);
+ result = qMakePair(codec.decode(encoded), charset);
}
}
}
@@ -505,42 +506,61 @@ static QByteArray encodeWord(const QString &text, const QByteArray& cs, bool* en
return to7BitAscii(text);
}
-static QString decodeWordSequence(const QByteArray& str)
+static void convertAndAppend(QString& str, const QByteArray& bytes, const QByteArray& charset)
{
- QRegularExpression whitespace(QLatin1String("^\\s+$"));
+ if (!bytes.isEmpty()) {
+ QTextCodec* codec = QMailCodec::codecForName(charset);
+ if (!codec) {
+ codec = QTextCodec::codecForUtfText(bytes, QMailCodec::codecForName("UTF-8"));
+ }
+ str.append(codec->toUnicode(bytes));
+ }
+}
+static QString decodeWordSequence(const QByteArray& str)
+{
QString out;
+ QString latin1Str(QString::fromLatin1(str.constData(), str.length()));
+ QByteArray lastCharset;
+ QByteArray encodedBuf;
+ int pos = 0;
+ int lastPos = 0;
// From RFC 2047
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
QRegularExpression encodedWord(QLatin1String("\"?=\\?[^\\s\\?]+\\?[^\\s\\?]+\\?[^\\s\\?]*\\?=\"?"));
-
- int pos = 0;
- int lastPos = 0;
- QString latin1Str(QString::fromLatin1(str.constData(), str.length()));
+ QRegularExpression whitespace(QLatin1String("^\\s+$"));
QRegularExpressionMatchIterator it = encodedWord.globalMatch(latin1Str);
-
while (it.hasNext()) {
QRegularExpressionMatch match = it.next();
pos = match.capturedStart();
if (pos != -1) {
- int endPos = pos + match.capturedLength();
+ const int endPos = pos + match.capturedLength();
+ QPair<QByteArray, QByteArray> textAndCharset(encodedText(str.mid(pos, (endPos - pos))));
QString preceding(QString::fromLatin1(str.mid(lastPos, (pos - lastPos))));
- QString decoded = decodeWord(str.mid(pos, (endPos - pos)));
// If there is only whitespace between two encoded words, it should not be included
- if (!whitespace.match(preceding).hasMatch())
- out.append(preceding);
-
- out.append(decoded);
+ const bool precedingWhitespaceOrEmpty = (preceding.isEmpty() || whitespace.match(preceding).hasMatch());
+ if ((lastCharset.isEmpty() || lastCharset == textAndCharset.second) && precedingWhitespaceOrEmpty) {
+ encodedBuf.append(textAndCharset.first);
+ } else {
+ convertAndAppend(out, encodedBuf, textAndCharset.second);
+ if (!precedingWhitespaceOrEmpty) {
+ out.append(preceding);
+ }
+ encodedBuf = textAndCharset.first;
+ }
- lastPos = endPos;
+ lastCharset = textAndCharset.second;
+ pos = endPos;
+ lastPos = pos;
}
}
// Copy anything left
+ convertAndAppend(out, encodedBuf, lastCharset);
out.append(QString::fromLatin1(str.mid(lastPos)));
return out;
}