diff options
author | Valério Valério <valerio.valerio@jolla.com> | 2015-03-17 12:35:34 +0200 |
---|---|---|
committer | Pekka Vuorela <pvuorela@iki.fi> | 2018-09-29 11:50:19 +0000 |
commit | 1dc5223a273a70fb9ca0e10d07c8de7a9da308bc (patch) | |
tree | 5d796e9f4a5bbf75ac506dccdc31e18e4fb0ab4d | |
parent | d4b86a15d07f031a626286047ecec53777cb4eb6 (diff) |
Use QTextDocument to parse html
Regular expression are not appropriated tool to parse a none regular language
like html, a proper parse should be used.
This commit introduces a dependency on QtGui making the messageserver
binary marginally bigger in size. Usage of Html parse is optional
can be defined via USE_HTML_PARSER compile flag.
Change-Id: I2dba9042bb7f5340bfd8c24cb59c2a769489a7c6
Reviewed-by: Damien Caliste <dcaliste@free.fr>
Reviewed-by: Matthew Vogt <matthew.vogt@qinetic.com.au>
-rw-r--r-- | src/libraries/qmfclient/qmailmessage.cpp | 78 | ||||
-rw-r--r-- | src/libraries/qmfclient/qmfclient.pro | 5 | ||||
-rw-r--r-- | src/tools/messageserver/main.cpp | 10 | ||||
-rw-r--r-- | src/tools/messageserver/messageserver.pro | 4 |
4 files changed, 65 insertions, 32 deletions
diff --git a/src/libraries/qmfclient/qmailmessage.cpp b/src/libraries/qmfclient/qmailmessage.cpp index 98108179..0ab3ca7a 100644 --- a/src/libraries/qmfclient/qmailmessage.cpp +++ b/src/libraries/qmfclient/qmailmessage.cpp @@ -54,6 +54,9 @@ #include <qtextcodec.h> #include <QTextCodec> #include <QtDebug> +#ifdef USE_HTML_PARSER +#include <QTextDocument> +#endif #include <stdlib.h> #include <limits.h> @@ -8591,12 +8594,50 @@ static void setMessagePriorityFromHeaderFields(QMailMessage *mail) return; // Normal Priority } +static QString htmlToPlainText(const QString &html) +{ +#ifdef USE_HTML_PARSER + QTextDocument doc; + doc.setHtml(html); + return doc.toPlainText(); +#else + QString plainText = html; + plainText.remove(QRegExp(QLatin1String("<\\s*(style|head|form|script)[^<]*<\\s*/\\s*\\1\\s*>"), Qt::CaseInsensitive)); + plainText.remove(QRegExp(QLatin1String("<(.)[^>]*>"))); + plainText.replace(QLatin1String("""), QLatin1String("\""), Qt::CaseInsensitive); + plainText.replace(QLatin1String(" "), QLatin1String(" "), Qt::CaseInsensitive); + plainText.replace(QLatin1String("&"), QLatin1String("&"), Qt::CaseInsensitive); + plainText.replace(QLatin1String("<"), QLatin1String("<"), Qt::CaseInsensitive); + plainText.replace(QLatin1String(">"), QLatin1String(">"), Qt::CaseInsensitive); + + // now replace stuff like "м" + int pos = 0; + while (true) { + pos = plainText.indexOf(QLatin1String("&#"), pos); + if (pos < 0) + break; + int semicolon = plainText.indexOf(';', pos+2); + if (semicolon < 0) { + ++pos; + continue; + } + int code = (plainText.mid(pos+2, semicolon-pos-2)).toInt(); + if (code == 0) { + ++pos; + continue; + } + plainText.replace(pos, semicolon-pos+1, QChar(code)); + } + + return plainText.simplified(); +#endif +} + /*! \internal */ void QMailMessage::refreshPreview() { const int maxPreviewLength = 280; // TODO: don't load entire body into memory - // TODO: parse html correctly, e.g. closing brackets in quotes in tags QMailMessagePartContainer *htmlPart= findHtmlContainer(); QMailMessagePartContainer *plainTextPart= findPlainTextContainer(); @@ -8604,40 +8645,13 @@ void QMailMessage::refreshPreview() plainTextPart=0; if ( plainTextPart && plainTextPart->hasBody()) { - QString plaintext(plainTextPart->body().data()); - plaintext.remove(QRegExp(QLatin1String("\\[(image|cid):[^\\]]*\\]"), Qt::CaseInsensitive)); - metaDataImpl()->setPreview(plaintext.left(maxPreviewLength)); + QString plainText = plainTextPart->body().data(); + metaDataImpl()->setPreview(plainText.left(maxPreviewLength)); } else if (htmlPart && ( multipartType() == MultipartRelated || htmlPart->hasBody())) { QString markup = htmlPart->body().data(); - markup.remove(QRegExp(QLatin1String("<\\s*(style|head|form|script)[^<]*<\\s*/\\s*\\1\\s*>"), Qt::CaseInsensitive)); - markup.remove(QRegExp(QLatin1String("<(.)[^>]*>"))); - markup.replace(QLatin1String("""), QLatin1String("\""), Qt::CaseInsensitive); - markup.replace(QLatin1String(" "), QLatin1String(" "), Qt::CaseInsensitive); - markup.replace(QLatin1String("&"), QLatin1String("&"), Qt::CaseInsensitive); - markup.replace(QLatin1String("<"), QLatin1String("<"), Qt::CaseInsensitive); - markup.replace(QLatin1String(">"), QLatin1String(">"), Qt::CaseInsensitive); - - // now replace stuff like "м" - for (int pos = 0; ; ) { - pos = markup.indexOf(QLatin1String("&#"), pos); - if (pos < 0) - break; - int semicolon = markup.indexOf(';', pos+2); - if (semicolon < 0) { - ++pos; - continue; - } - int code = (markup.mid(pos+2, semicolon-pos-2)).toInt(); - if (code == 0) { - ++pos; - continue; - } - markup.replace(pos, semicolon-pos+1, QChar(code)); - } - - metaDataImpl()->setPreview(markup.simplified().left(maxPreviewLength)); + metaDataImpl()->setPreview(htmlToPlainText(markup).left(maxPreviewLength)); } - + partContainerImpl()->setPreviewDirty(false); } diff --git a/src/libraries/qmfclient/qmfclient.pro b/src/libraries/qmfclient/qmfclient.pro index f643c4d1..665aba1b 100644 --- a/src/libraries/qmfclient/qmfclient.pro +++ b/src/libraries/qmfclient/qmfclient.pro @@ -16,6 +16,11 @@ DEFINES += QMF_INSTALL_ROOT=\\\"$$QMF_INSTALL_ROOT\\\" #DEPENDPATH += . INCLUDEPATH += support +contains(DEFINES, USE_HTML_PARSER) { + QT += gui +} + + HEADERS += \ qmailaccount.h \ qmailaccountconfiguration.h \ diff --git a/src/tools/messageserver/main.cpp b/src/tools/messageserver/main.cpp index c6f0edc9..0076e711 100644 --- a/src/tools/messageserver/main.cpp +++ b/src/tools/messageserver/main.cpp @@ -37,6 +37,10 @@ #include <qmaillog.h> #include <qloggers.h> #include <signal.h> +#include <stdlib.h> +#ifdef USE_HTML_PARSER +#include <QtGui> +#endif #if !defined(NO_SHUTDOWN_SIGNAL_HANDLING) && defined(Q_OS_UNIX) @@ -58,7 +62,13 @@ static void recreateLoggers(int n) int main(int argc, char** argv) { +#ifdef USE_HTML_PARSER + // Need for html parsing by <QTextdocument> in qmailmessage.cpp, but don't need real UI + setenv("QT_QPA_PLATFORM", "minimal", 1); + QGuiApplication app(argc, argv); +#else QCoreApplication app(argc, argv); +#endif // This is ~/.config/QtProject/Messageserver.conf qMailLoggersRecreate("QtProject", "Messageserver", "Msgsrv"); diff --git a/src/tools/messageserver/messageserver.pro b/src/tools/messageserver/messageserver.pro index a70edfe4..f7d92931 100644 --- a/src/tools/messageserver/messageserver.pro +++ b/src/tools/messageserver/messageserver.pro @@ -9,6 +9,10 @@ SERVER_AS_DLL: { TARGET = messageserver5 QT = core qmfclient qmfclient-private qmfmessageserver +contains(DEFINES, USE_HTML_PARSER) { + QT += gui +} + !contains(DEFINES,QMF_NO_WIDGETS) { QT += gui widgets } |