summaryrefslogtreecommitdiffstats
path: root/src/pdf/qpdfsearchmodel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/pdf/qpdfsearchmodel.cpp')
-rw-r--r--src/pdf/qpdfsearchmodel.cpp373
1 files changed, 373 insertions, 0 deletions
diff --git a/src/pdf/qpdfsearchmodel.cpp b/src/pdf/qpdfsearchmodel.cpp
new file mode 100644
index 000000000..a81ae77dc
--- /dev/null
+++ b/src/pdf/qpdfsearchmodel.cpp
@@ -0,0 +1,373 @@
+// Copyright (C) 2020 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
+
+#include "qpdfdocument_p.h"
+#include "qpdflink.h"
+#include "qpdfsearchmodel.h"
+#include "qpdfsearchmodel_p.h"
+
+#include "third_party/pdfium/public/fpdf_text.h"
+#include "third_party/pdfium/public/fpdfview.h"
+
+#include <QtCore/qelapsedtimer.h>
+#include <QtCore/qloggingcategory.h>
+#include <QtCore/QMetaEnum>
+
+QT_BEGIN_NAMESPACE
+
+Q_LOGGING_CATEGORY(qLcS, "qt.pdf.search")
+
+static const int UpdateTimerInterval = 100;
+static const int ContextChars = 64;
+
+/*!
+ \class QPdfSearchModel
+ \since 5.15
+ \inmodule QtPdf
+ \inherits QAbstractListModel
+
+ \brief The QPdfSearchModel class searches for a string in a PDF document
+ and holds the results.
+
+ This is used in the \l {Model/View Programming} paradigm to display
+ a list of search results, to highlight them on the rendered PDF pages,
+ and to iterate through them using the "search forward" / "search backward"
+ buttons and shortcuts that would be found in a typical document-viewing UI:
+
+ \image search-results.png
+*/
+
+/*!
+ \enum QPdfSearchModel::Role
+
+ \value Page The page number where the search result is found (int).
+ \value IndexOnPage The index of the search result on the page (int).
+ \value Location The position of the search result on the page (QPointF).
+ \value ContextBefore The adjacent text on the page, before the search string (QString).
+ \value ContextAfter The adjacent text on the page, after the search string (QString).
+ \omitvalue NRoles
+
+ \sa QPdfLink
+*/
+
+/*!
+ Constructs a new search model with parent object \a parent.
+*/
+QPdfSearchModel::QPdfSearchModel(QObject *parent)
+ : QAbstractListModel(*(new QPdfSearchModelPrivate()), parent)
+{
+ QMetaEnum rolesMetaEnum = metaObject()->enumerator(metaObject()->indexOfEnumerator("Role"));
+ for (int r = Qt::UserRole; r < int(Role::NRoles); ++r) {
+ QByteArray roleName = QByteArray(rolesMetaEnum.valueToKey(r));
+ if (roleName.isEmpty())
+ continue;
+ roleName[0] = QChar::toLower(roleName[0]);
+ m_roleNames.insert(r, roleName);
+ }
+ connect(this, &QAbstractListModel::dataChanged, this, &QPdfSearchModel::countChanged);
+ connect(this, &QAbstractListModel::modelReset, this, &QPdfSearchModel::countChanged);
+ connect(this, &QAbstractListModel::rowsRemoved, this, &QPdfSearchModel::countChanged);
+ connect(this, &QAbstractListModel::rowsInserted, this, &QPdfSearchModel::countChanged);
+}
+
+/*!
+ Destroys the model.
+*/
+QPdfSearchModel::~QPdfSearchModel() {}
+
+/*!
+ \reimp
+*/
+QHash<int, QByteArray> QPdfSearchModel::roleNames() const
+{
+ return m_roleNames;
+}
+
+/*!
+ \reimp
+
+ The number of rows in the model is equal to the number of search results found.
+*/
+int QPdfSearchModel::rowCount(const QModelIndex &parent) const
+{
+ Q_D(const QPdfSearchModel);
+ Q_UNUSED(parent);
+ return d->rowCountSoFar;
+}
+
+/*!
+ \reimp
+*/
+QVariant QPdfSearchModel::data(const QModelIndex &index, int role) const
+{
+ Q_D(const QPdfSearchModel);
+ const auto pi = const_cast<QPdfSearchModelPrivate*>(d)->pageAndIndexForResult(index.row());
+ if (pi.page < 0)
+ return QVariant();
+ switch (Role(role)) {
+ case Role::Page:
+ return pi.page;
+ case Role::IndexOnPage:
+ return pi.index;
+ case Role::Location:
+ return d->searchResults[pi.page][pi.index].location();
+ case Role::ContextBefore:
+ return d->searchResults[pi.page][pi.index].contextBefore();
+ case Role::ContextAfter:
+ return d->searchResults[pi.page][pi.index].contextAfter();
+ case Role::NRoles:
+ break;
+ }
+ if (role == Qt::DisplayRole) {
+ const QString ret = d->searchResults[pi.page][pi.index].contextBefore() +
+ QLatin1String("<b>") + d->searchString + QLatin1String("</b>") +
+ d->searchResults[pi.page][pi.index].contextAfter();
+ return ret;
+ }
+ return QVariant();
+}
+
+/*!
+ \since 6.8
+ \property QPdfSearchModel::count
+ \brief the number of search results found
+*/
+int QPdfSearchModel::count() const
+{
+ return rowCount(QModelIndex());
+}
+
+void QPdfSearchModel::updatePage(int page)
+{
+ Q_D(QPdfSearchModel);
+ d->doSearch(page);
+}
+
+/*!
+ \property QPdfSearchModel::searchString
+ \brief the string to search for
+*/
+QString QPdfSearchModel::searchString() const
+{
+ Q_D(const QPdfSearchModel);
+ return d->searchString;
+}
+
+void QPdfSearchModel::setSearchString(const QString &searchString)
+{
+ Q_D(QPdfSearchModel);
+ if (d->searchString == searchString)
+ return;
+
+ d->searchString = searchString;
+ beginResetModel();
+ d->clearResults();
+ emit searchStringChanged();
+ endResetModel();
+}
+
+/*!
+ Returns the list of all results found on the given \a page.
+*/
+QList<QPdfLink> QPdfSearchModel::resultsOnPage(int page) const
+{
+ Q_D(const QPdfSearchModel);
+ const_cast<QPdfSearchModelPrivate *>(d)->doSearch(page);
+ if (d->searchResults.size() <= page)
+ return {};
+ return d->searchResults[page];
+}
+
+/*!
+ Returns a result found by \a index in the \l document, regardless of the
+ page on which it was found. \a index must be less than \l rowCount.
+*/
+QPdfLink QPdfSearchModel::resultAtIndex(int index) const
+{
+ Q_D(const QPdfSearchModel);
+ const auto pi = const_cast<QPdfSearchModelPrivate*>(d)->pageAndIndexForResult(index);
+ if (pi.page < 0 || index < 0)
+ return {};
+ return d->searchResults[pi.page][pi.index];
+}
+
+/*!
+ \property QPdfSearchModel::document
+ \brief the document to search
+*/
+QPdfDocument *QPdfSearchModel::document() const
+{
+ Q_D(const QPdfSearchModel);
+ return d->document;
+}
+
+void QPdfSearchModel::setDocument(QPdfDocument *document)
+{
+ Q_D(QPdfSearchModel);
+ if (d->document == document)
+ return;
+
+ disconnect(d->documentConnection);
+ d->documentConnection = connect(document, &QPdfDocument::pageCountChanged, this,
+ [this]() { d_func()->clearResults(); });
+
+ d->document = document;
+ d->clearResults();
+ emit documentChanged();
+}
+
+void QPdfSearchModel::timerEvent(QTimerEvent *event)
+{
+ Q_D(QPdfSearchModel);
+ if (event->timerId() != d->updateTimerId)
+ return;
+ if (!d->document || d->nextPageToUpdate >= d->document->pageCount()) {
+ if (d->document)
+ qCDebug(qLcS) << "done updating search results on" << d->searchResults.size() << "pages";
+ killTimer(d->updateTimerId);
+ d->updateTimerId = -1;
+ }
+ d->doSearch(d->nextPageToUpdate++);
+}
+
+QPdfSearchModelPrivate::QPdfSearchModelPrivate() : QAbstractItemModelPrivate()
+{
+}
+
+void QPdfSearchModelPrivate::clearResults()
+{
+ Q_Q(QPdfSearchModel);
+ rowCountSoFar = 0;
+ searchResults.clear();
+ pagesSearched.clear();
+ if (document) {
+ searchResults.resize(document->pageCount());
+ pagesSearched.resize(document->pageCount());
+ }
+ nextPageToUpdate = 0;
+ updateTimerId = q->startTimer(UpdateTimerInterval);
+}
+
+bool QPdfSearchModelPrivate::doSearch(int page)
+{
+ if (page < 0 || page >= pagesSearched.size() || searchString.isEmpty())
+ return false;
+ if (pagesSearched[page])
+ return true;
+ Q_Q(QPdfSearchModel);
+
+ const QPdfMutexLocker lock;
+ QElapsedTimer timer;
+ timer.start();
+ FPDF_PAGE pdfPage = FPDF_LoadPage(document->d->doc, page);
+ if (!pdfPage) {
+ qWarning() << "failed to load page" << page;
+ return false;
+ }
+ FPDF_TEXTPAGE textPage = FPDFText_LoadPage(pdfPage);
+ if (!textPage) {
+ qWarning() << "failed to load text of page" << page;
+ FPDF_ClosePage(pdfPage);
+ return false;
+ }
+ FPDF_SCHHANDLE sh = FPDFText_FindStart(textPage, searchString.utf16(), 0, 0);
+ QList<QPdfLink> newSearchResults;
+ constexpr double CharacterHitTolerance = 6.0;
+ while (FPDFText_FindNext(sh)) {
+ int idx = FPDFText_GetSchResultIndex(sh);
+ int count = FPDFText_GetSchCount(sh);
+ int rectCount = FPDFText_CountRects(textPage, idx, count);
+ QList<QRectF> rects;
+ int startIndex = -1;
+ int endIndex = -1;
+ for (int r = 0; r < rectCount; ++r) {
+ // get bounding box of search result in page coordinates
+ double left, top, right, bottom;
+ FPDFText_GetRect(textPage, r, &left, &top, &right, &bottom);
+ // deal with any internal PDF transforms and
+ // convert to the 1x (pixels = points) 4th-quadrant coordinate system
+ rects << document->d->mapPageToView(pdfPage, left, top, right, bottom);
+ if (r == 0) {
+ startIndex = FPDFText_GetCharIndexAtPos(textPage, left, top,
+ CharacterHitTolerance, CharacterHitTolerance);
+ }
+ if (r == rectCount - 1) {
+ endIndex = FPDFText_GetCharIndexAtPos(textPage, right, top,
+ CharacterHitTolerance, CharacterHitTolerance);
+ }
+ qCDebug(qLcS) << rects.last() << "char idx" << startIndex << "->" << endIndex
+ << "from page rect" << left << top << right << bottom;
+ }
+ QString contextBefore, contextAfter;
+ if (startIndex >= 0 || endIndex >= 0) {
+ startIndex = qMax(0, startIndex - ContextChars);
+ endIndex += ContextChars;
+ int count = endIndex - startIndex + 1;
+ if (count > 0) {
+ QList<ushort> buf(count + 1);
+ int len = FPDFText_GetText(textPage, startIndex, count, buf.data());
+ Q_ASSERT(len - 1 <= count); // len is number of characters written, including the terminator
+ QString context = QString::fromUtf16(
+ reinterpret_cast<const char16_t *>(buf.constData()), len - 1);
+ context = context.replace(QLatin1Char('\n'), QStringLiteral("\u23CE"));
+ context = context.remove(QLatin1Char('\r'));
+ // try to find the search string near the middle of the context if possible
+ int si = context.indexOf(searchString, ContextChars - 5, Qt::CaseInsensitive);
+ if (si < 0)
+ si = context.indexOf(searchString, Qt::CaseInsensitive);
+ if (si < 0)
+ qWarning() << "search string" << searchString << "not found in context" << context;
+ contextBefore = context.mid(0, si);
+ contextAfter = context.mid(si + searchString.size());
+ }
+ }
+ if (!rects.isEmpty())
+ newSearchResults << QPdfLink(page, rects, contextBefore, contextAfter);
+ }
+ FPDFText_FindClose(sh);
+ FPDFText_ClosePage(textPage);
+ FPDF_ClosePage(pdfPage);
+ qCDebug(qLcS) << searchString << "took" << timer.elapsed() << "ms to find"
+ << newSearchResults.size() << "results on page" << page;
+
+ pagesSearched[page] = true;
+ searchResults[page] = newSearchResults;
+ if (newSearchResults.size() > 0) {
+ int rowsBefore = rowsBeforePage(page);
+ qCDebug(qLcS) << "from row" << rowsBefore << "rowCount" << rowCountSoFar << "increasing by" << newSearchResults.size();
+ rowCountSoFar += newSearchResults.size();
+ q->beginInsertRows(QModelIndex(), rowsBefore, rowsBefore + newSearchResults.size() - 1);
+ q->endInsertRows();
+ }
+ return true;
+}
+
+QPdfSearchModelPrivate::PageAndIndex QPdfSearchModelPrivate::pageAndIndexForResult(int resultIndex)
+{
+ if (pagesSearched.isEmpty())
+ return {-1, -1};
+ const int pageCount = document->pageCount();
+ int totalSoFar = 0;
+ int previousTotalSoFar = 0;
+ for (int page = 0; page < pageCount; ++page) {
+ if (!pagesSearched[page])
+ doSearch(page);
+ totalSoFar += searchResults[page].size();
+ if (totalSoFar > resultIndex)
+ return {page, resultIndex - previousTotalSoFar};
+ previousTotalSoFar = totalSoFar;
+ }
+ return {-1, -1};
+}
+
+int QPdfSearchModelPrivate::rowsBeforePage(int page)
+{
+ int ret = 0;
+ for (int i = 0; i < page; ++i)
+ ret += searchResults[i].size();
+ return ret;
+}
+
+QT_END_NAMESPACE
+
+#include "moc_qpdfsearchmodel.cpp"