summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValério Valério <valerio.valerio@jolla.com>2015-03-17 12:35:34 +0200
committerPekka Vuorela <pvuorela@iki.fi>2018-09-29 11:50:19 +0000
commit1dc5223a273a70fb9ca0e10d07c8de7a9da308bc (patch)
tree5d796e9f4a5bbf75ac506dccdc31e18e4fb0ab4d
parentd4b86a15d07f031a626286047ecec53777cb4eb6 (diff)
Use QTextDocument to parse html
Regular expression are not appropriated tool to parse a none regular language like html, a proper parse should be used. This commit introduces a dependency on QtGui making the messageserver binary marginally bigger in size. Usage of Html parse is optional can be defined via USE_HTML_PARSER compile flag. Change-Id: I2dba9042bb7f5340bfd8c24cb59c2a769489a7c6 Reviewed-by: Damien Caliste <dcaliste@free.fr> Reviewed-by: Matthew Vogt <matthew.vogt@qinetic.com.au>
-rw-r--r--src/libraries/qmfclient/qmailmessage.cpp78
-rw-r--r--src/libraries/qmfclient/qmfclient.pro5
-rw-r--r--src/tools/messageserver/main.cpp10
-rw-r--r--src/tools/messageserver/messageserver.pro4
4 files changed, 65 insertions, 32 deletions
diff --git a/src/libraries/qmfclient/qmailmessage.cpp b/src/libraries/qmfclient/qmailmessage.cpp
index 98108179..0ab3ca7a 100644
--- a/src/libraries/qmfclient/qmailmessage.cpp
+++ b/src/libraries/qmfclient/qmailmessage.cpp
@@ -54,6 +54,9 @@
#include <qtextcodec.h>
#include <QTextCodec>
#include <QtDebug>
+#ifdef USE_HTML_PARSER
+#include <QTextDocument>
+#endif
#include <stdlib.h>
#include <limits.h>
@@ -8591,12 +8594,50 @@ static void setMessagePriorityFromHeaderFields(QMailMessage *mail)
return; // Normal Priority
}
+static QString htmlToPlainText(const QString &html)
+{
+#ifdef USE_HTML_PARSER
+ QTextDocument doc;
+ doc.setHtml(html);
+ return doc.toPlainText();
+#else
+ QString plainText = html;
+ plainText.remove(QRegExp(QLatin1String("<\\s*(style|head|form|script)[^<]*<\\s*/\\s*\\1\\s*>"), Qt::CaseInsensitive));
+ plainText.remove(QRegExp(QLatin1String("<(.)[^>]*>")));
+ plainText.replace(QLatin1String("&quot;"), QLatin1String("\""), Qt::CaseInsensitive);
+ plainText.replace(QLatin1String("&nbsp;"), QLatin1String(" "), Qt::CaseInsensitive);
+ plainText.replace(QLatin1String("&amp;"), QLatin1String("&"), Qt::CaseInsensitive);
+ plainText.replace(QLatin1String("&lt;"), QLatin1String("<"), Qt::CaseInsensitive);
+ plainText.replace(QLatin1String("&gt;"), QLatin1String(">"), Qt::CaseInsensitive);
+
+ // now replace stuff like "&#1084;"
+ int pos = 0;
+ while (true) {
+ pos = plainText.indexOf(QLatin1String("&#"), pos);
+ if (pos < 0)
+ break;
+ int semicolon = plainText.indexOf(';', pos+2);
+ if (semicolon < 0) {
+ ++pos;
+ continue;
+ }
+ int code = (plainText.mid(pos+2, semicolon-pos-2)).toInt();
+ if (code == 0) {
+ ++pos;
+ continue;
+ }
+ plainText.replace(pos, semicolon-pos+1, QChar(code));
+ }
+
+ return plainText.simplified();
+#endif
+}
+
/*! \internal */
void QMailMessage::refreshPreview()
{
const int maxPreviewLength = 280;
// TODO: don't load entire body into memory
- // TODO: parse html correctly, e.g. closing brackets in quotes in tags
QMailMessagePartContainer *htmlPart= findHtmlContainer();
QMailMessagePartContainer *plainTextPart= findPlainTextContainer();
@@ -8604,40 +8645,13 @@ void QMailMessage::refreshPreview()
plainTextPart=0;
if ( plainTextPart && plainTextPart->hasBody()) {
- QString plaintext(plainTextPart->body().data());
- plaintext.remove(QRegExp(QLatin1String("\\[(image|cid):[^\\]]*\\]"), Qt::CaseInsensitive));
- metaDataImpl()->setPreview(plaintext.left(maxPreviewLength));
+ QString plainText = plainTextPart->body().data();
+ metaDataImpl()->setPreview(plainText.left(maxPreviewLength));
} else if (htmlPart && ( multipartType() == MultipartRelated || htmlPart->hasBody())) {
QString markup = htmlPart->body().data();
- markup.remove(QRegExp(QLatin1String("<\\s*(style|head|form|script)[^<]*<\\s*/\\s*\\1\\s*>"), Qt::CaseInsensitive));
- markup.remove(QRegExp(QLatin1String("<(.)[^>]*>")));
- markup.replace(QLatin1String("&quot;"), QLatin1String("\""), Qt::CaseInsensitive);
- markup.replace(QLatin1String("&nbsp;"), QLatin1String(" "), Qt::CaseInsensitive);
- markup.replace(QLatin1String("&amp;"), QLatin1String("&"), Qt::CaseInsensitive);
- markup.replace(QLatin1String("&lt;"), QLatin1String("<"), Qt::CaseInsensitive);
- markup.replace(QLatin1String("&gt;"), QLatin1String(">"), Qt::CaseInsensitive);
-
- // now replace stuff like "&#1084;"
- for (int pos = 0; ; ) {
- pos = markup.indexOf(QLatin1String("&#"), pos);
- if (pos < 0)
- break;
- int semicolon = markup.indexOf(';', pos+2);
- if (semicolon < 0) {
- ++pos;
- continue;
- }
- int code = (markup.mid(pos+2, semicolon-pos-2)).toInt();
- if (code == 0) {
- ++pos;
- continue;
- }
- markup.replace(pos, semicolon-pos+1, QChar(code));
- }
-
- metaDataImpl()->setPreview(markup.simplified().left(maxPreviewLength));
+ metaDataImpl()->setPreview(htmlToPlainText(markup).left(maxPreviewLength));
}
-
+
partContainerImpl()->setPreviewDirty(false);
}
diff --git a/src/libraries/qmfclient/qmfclient.pro b/src/libraries/qmfclient/qmfclient.pro
index f643c4d1..665aba1b 100644
--- a/src/libraries/qmfclient/qmfclient.pro
+++ b/src/libraries/qmfclient/qmfclient.pro
@@ -16,6 +16,11 @@ DEFINES += QMF_INSTALL_ROOT=\\\"$$QMF_INSTALL_ROOT\\\"
#DEPENDPATH += .
INCLUDEPATH += support
+contains(DEFINES, USE_HTML_PARSER) {
+ QT += gui
+}
+
+
HEADERS += \
qmailaccount.h \
qmailaccountconfiguration.h \
diff --git a/src/tools/messageserver/main.cpp b/src/tools/messageserver/main.cpp
index c6f0edc9..0076e711 100644
--- a/src/tools/messageserver/main.cpp
+++ b/src/tools/messageserver/main.cpp
@@ -37,6 +37,10 @@
#include <qmaillog.h>
#include <qloggers.h>
#include <signal.h>
+#include <stdlib.h>
+#ifdef USE_HTML_PARSER
+#include <QtGui>
+#endif
#if !defined(NO_SHUTDOWN_SIGNAL_HANDLING) && defined(Q_OS_UNIX)
@@ -58,7 +62,13 @@ static void recreateLoggers(int n)
int main(int argc, char** argv)
{
+#ifdef USE_HTML_PARSER
+ // Need for html parsing by <QTextdocument> in qmailmessage.cpp, but don't need real UI
+ setenv("QT_QPA_PLATFORM", "minimal", 1);
+ QGuiApplication app(argc, argv);
+#else
QCoreApplication app(argc, argv);
+#endif
// This is ~/.config/QtProject/Messageserver.conf
qMailLoggersRecreate("QtProject", "Messageserver", "Msgsrv");
diff --git a/src/tools/messageserver/messageserver.pro b/src/tools/messageserver/messageserver.pro
index a70edfe4..f7d92931 100644
--- a/src/tools/messageserver/messageserver.pro
+++ b/src/tools/messageserver/messageserver.pro
@@ -9,6 +9,10 @@ SERVER_AS_DLL: {
TARGET = messageserver5
QT = core qmfclient qmfclient-private qmfmessageserver
+contains(DEFINES, USE_HTML_PARSER) {
+ QT += gui
+}
+
!contains(DEFINES,QMF_NO_WIDGETS) {
QT += gui widgets
}