Handle encoded-word's containing partial characters

That's actually a violation of the spec (RFC 2047): https://tools.ietf.org/html/rfc2047#section-5 Each 'encoded-word' MUST represent an integral number of characters. A multi-octet character may not be split across adjacent 'encoded- word's. but if charsets match, the data can be concatenated first and then converted to characters. Which is what all other email clients seem to be doing - I haven't found a single one which wouldn't handle it. Change-Id: I5bbb785343ad60459fa6e52f5728fd796c7e5961 Reviewed-by: Pekka Vuorela <pvuorela@iki.fi> Reviewed-by: Christopher Adams <chris.adams@qinetic.com.au>
author: Slava Monich <slava.monich@jolla.com> 2020-07-26 03:03:44 +0300
committer: Chris Adams <chris.adams@qinetic.com.au> 2023-11-08 19:13:22 +1000
commit: 64520d1e87718be1410e99746d3045eed0dc07b4 (patch)
tree: 1b113cd16f5d5651fac2ccf8f38130e19225d3b1 /src/libraries/qmfclient/qmailmessage.cpp
parent: a32ca6254da5056e874965976c2ab3dc2620c23c (diff)
1 files changed, 38 insertions, 18 deletions
diff --git a/src/libraries/qmfclient/qmailmessage.cpp b/src/libraries/qmfclient/qmailmessage.cpp
index 63ee459d..85cbe9b3 100644
--- a/src/libraries/qmfclient/qmailmessage.cpp
+++ b/src/libraries/qmfclient/qmailmessage.cpp
@@ -333,9 +333,10 @@ static QMailCodec* codecForEncoding(QMailMessageBody::TransferEncoding te, const
 }
 
 //  Needs an encoded word of the form =?charset?q?word?=
-static QString decodeWord(const QByteArray& encodedWord)
+//  Returns text and charset as QPair<QByteArray, QByteArray>
+static QPair<QByteArray, QByteArray> encodedText(const QByteArray& encodedWord)
 {
-    QString result;
+    QPair<QByteArray, QByteArray> result;
     int index[4];
 
     // Find the parts of the input
@@ -356,12 +357,12 @@ static QString decodeWord(const QByteArray& encodedWord)
                 if (encoding == "Q")
                 {
                     QMailQuotedPrintableCodec codec(QMailQuotedPrintableCodec::Text, QMailQuotedPrintableCodec::Rfc2047);
-                    return codec.decode(encoded, charset);
+                    result = qMakePair(codec.decode(encoded), charset);
                 }
                 else if (encoding == "B")
                 {
                     QMailBase64Codec codec(QMailBase64Codec::Binary);
-                    return codec.decode(encoded, charset);
+                    result = qMakePair(codec.decode(encoded), charset);
                 }
             }
         }
@@ -505,42 +506,61 @@ static QByteArray encodeWord(const QString &text, const QByteArray& cs, bool* en
     return to7BitAscii(text);
 }
 
-static QString decodeWordSequence(const QByteArray& str)
+static void convertAndAppend(QString& str, const QByteArray& bytes, const QByteArray& charset)
 {
-    QRegularExpression whitespace(QLatin1String("^\\s+$"));
+    if (!bytes.isEmpty()) {
+        QTextCodec* codec = QMailCodec::codecForName(charset);
+        if (!codec) {
+            codec = QTextCodec::codecForUtfText(bytes, QMailCodec::codecForName("UTF-8"));
+        }
+        str.append(codec->toUnicode(bytes));
+    }
+}
 
+static QString decodeWordSequence(const QByteArray& str)
+{
     QString out;
+    QString latin1Str(QString::fromLatin1(str.constData(), str.length()));
+    QByteArray lastCharset;
+    QByteArray encodedBuf;
+    int pos = 0;
+    int lastPos = 0;
 
     // From RFC 2047
     // encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
     QRegularExpression encodedWord(QLatin1String("\"?=\\?[^\\s\\?]+\\?[^\\s\\?]+\\?[^\\s\\?]*\\?=\"?"));
-
-    int pos = 0;
-    int lastPos = 0;
-    QString latin1Str(QString::fromLatin1(str.constData(), str.length()));
+    QRegularExpression whitespace(QLatin1String("^\\s+$"));
     QRegularExpressionMatchIterator it = encodedWord.globalMatch(latin1Str);
-
     while (it.hasNext()) {
         QRegularExpressionMatch match = it.next();
         pos = match.capturedStart();
 
         if (pos != -1) {
-            int endPos = pos + match.capturedLength();
+            const int endPos = pos + match.capturedLength();
 
+            QPair<QByteArray, QByteArray> textAndCharset(encodedText(str.mid(pos, (endPos - pos))));
             QString preceding(QString::fromLatin1(str.mid(lastPos, (pos - lastPos))));
-            QString decoded = decodeWord(str.mid(pos, (endPos - pos)));
 
             // If there is only whitespace between two encoded words, it should not be included
-            if (!whitespace.match(preceding).hasMatch())
-                out.append(preceding);
-
-            out.append(decoded);
+            const bool precedingWhitespaceOrEmpty = (preceding.isEmpty() || whitespace.match(preceding).hasMatch());
+            if ((lastCharset.isEmpty() || lastCharset == textAndCharset.second) && precedingWhitespaceOrEmpty) {
+                encodedBuf.append(textAndCharset.first);
+            } else {
+                convertAndAppend(out, encodedBuf, textAndCharset.second);
+                if (!precedingWhitespaceOrEmpty) {
+                    out.append(preceding);
+                }
+                encodedBuf = textAndCharset.first;
+            }
 
-            lastPos = endPos;
+            lastCharset = textAndCharset.second;
+            pos = endPos;
+            lastPos = pos;
         }
     }
 
     // Copy anything left
+    convertAndAppend(out, encodedBuf, lastCharset);
     out.append(QString::fromLatin1(str.mid(lastPos)));
     return out;
 }
author	Slava Monich <slava.monich@jolla.com>	2020-07-26 03:03:44 +0300
committer	Chris Adams <chris.adams@qinetic.com.au>	2023-11-08 19:13:22 +1000
commit	64520d1e87718be1410e99746d3045eed0dc07b4 (patch)
tree	1b113cd16f5d5651fac2ccf8f38130e19225d3b1 /src/libraries/qmfclient/qmailmessage.cpp
parent	a32ca6254da5056e874965976c2ab3dc2620c23c (diff)