diff options
Diffstat (limited to 'src/pdf/qpdfsearchmodel.cpp')
-rw-r--r-- | src/pdf/qpdfsearchmodel.cpp | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/src/pdf/qpdfsearchmodel.cpp b/src/pdf/qpdfsearchmodel.cpp new file mode 100644 index 000000000..4129c7cb7 --- /dev/null +++ b/src/pdf/qpdfsearchmodel.cpp @@ -0,0 +1,310 @@ +/**************************************************************************** +** +** Copyright (C) 2020 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the QtPDF module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL3$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPLv3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or later as published by the Free +** Software Foundation and appearing in the file LICENSE.GPL included in +** the packaging of this file. Please review the following information to +** ensure the GNU General Public License version 2.0 requirements will be +** met: http://www.gnu.org/licenses/gpl-2.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qpdfdestination.h" +#include "qpdfdocument_p.h" +#include "qpdfsearchmodel.h" +#include "qpdfsearchmodel_p.h" +#include "qpdfsearchresult_p.h" + +#include "third_party/pdfium/public/fpdf_doc.h" +#include "third_party/pdfium/public/fpdf_text.h" + +#include <QtCore/qelapsedtimer.h> +#include <QtCore/qloggingcategory.h> +#include <QtCore/QMetaEnum> + +QT_BEGIN_NAMESPACE + +Q_LOGGING_CATEGORY(qLcS, "qt.pdf.search") + +static const int UpdateTimerInterval = 100; +static const int ContextChars = 20; +static const double CharacterHitTolerance = 6.0; + +QPdfSearchModel::QPdfSearchModel(QObject *parent) + : QAbstractListModel(*(new QPdfSearchModelPrivate()), parent) +{ + QMetaEnum rolesMetaEnum = metaObject()->enumerator(metaObject()->indexOfEnumerator("Role")); + for (int r = Qt::UserRole; r < int(Role::_Count); ++r) { + QByteArray roleName = QByteArray(rolesMetaEnum.valueToKey(r)); + if (roleName.isEmpty()) + continue; + roleName[0] = QChar::toLower(roleName[0]); + m_roleNames.insert(r, roleName); + } +} + +QPdfSearchModel::~QPdfSearchModel() {} + +QHash<int, QByteArray> QPdfSearchModel::roleNames() const +{ + return m_roleNames; +} + +int QPdfSearchModel::rowCount(const QModelIndex &parent) const +{ + Q_D(const QPdfSearchModel); + Q_UNUSED(parent) + return d->rowCountSoFar; +} + +QVariant QPdfSearchModel::data(const QModelIndex &index, int role) const +{ + Q_D(const QPdfSearchModel); + const auto pi = const_cast<QPdfSearchModelPrivate*>(d)->pageAndIndexForResult(index.row()); + if (pi.page < 0) + return QVariant(); + switch (Role(role)) { + case Role::Page: + return pi.page; + case Role::IndexOnPage: + return pi.index; + case Role::Location: + return d->searchResults[pi.page][pi.index].location(); + case Role::Context: + return d->searchResults[pi.page][pi.index].context(); + case Role::_Count: + break; + } + if (role == Qt::DisplayRole) + return d->searchResults[pi.page][pi.index].context(); + return QVariant(); +} + +void QPdfSearchModel::updatePage(int page) +{ + Q_D(QPdfSearchModel); + d->doSearch(page); +} + +QString QPdfSearchModel::searchString() const +{ + Q_D(const QPdfSearchModel); + return d->searchString; +} + +void QPdfSearchModel::setSearchString(QString searchString) +{ + Q_D(QPdfSearchModel); + if (d->searchString == searchString) + return; + + d->searchString = searchString; + emit searchStringChanged(); + beginResetModel(); + d->clearResults(); + endResetModel(); +} + +QVector<QPdfSearchResult> QPdfSearchModel::resultsOnPage(int page) const +{ + Q_D(const QPdfSearchModel); + const_cast<QPdfSearchModelPrivate *>(d)->doSearch(page); + if (d->searchResults.count() <= page) + return {}; + return d->searchResults[page]; +} + +QPdfSearchResult QPdfSearchModel::resultAtIndex(int index) const +{ + Q_D(const QPdfSearchModel); + const auto pi = const_cast<QPdfSearchModelPrivate*>(d)->pageAndIndexForResult(index); + if (pi.page < 0) + return QPdfSearchResult(); + return d->searchResults[pi.page][pi.index]; +} + +QPdfDocument *QPdfSearchModel::document() const +{ + Q_D(const QPdfSearchModel); + return d->document; +} + +void QPdfSearchModel::setDocument(QPdfDocument *document) +{ + Q_D(QPdfSearchModel); + if (d->document == document) + return; + + d->document = document; + emit documentChanged(); + d->clearResults(); +} + +void QPdfSearchModel::timerEvent(QTimerEvent *event) +{ + Q_D(QPdfSearchModel); + if (event->timerId() != d->updateTimerId) + return; + if (!d->document || d->nextPageToUpdate >= d->document->pageCount()) { + if (d->document) + qCDebug(qLcS, "done updating search results on %d pages", d->searchResults.count()); + killTimer(d->updateTimerId); + d->updateTimerId = -1; + } + d->doSearch(d->nextPageToUpdate++); +} + +QPdfSearchModelPrivate::QPdfSearchModelPrivate() +{ +} + +void QPdfSearchModelPrivate::clearResults() +{ + Q_Q(QPdfSearchModel); + rowCountSoFar = 0; + searchResults.clear(); + pagesSearched.clear(); + if (document) { + searchResults.resize(document->pageCount()); + pagesSearched.resize(document->pageCount()); + } else { + searchResults.resize(0); + pagesSearched.resize(0); + } + nextPageToUpdate = 0; + updateTimerId = q->startTimer(UpdateTimerInterval); +} + +bool QPdfSearchModelPrivate::doSearch(int page) +{ + if (page < 0 || page >= pagesSearched.count() || searchString.isEmpty()) + return false; + if (pagesSearched[page]) + return true; + Q_Q(QPdfSearchModel); + + const QPdfMutexLocker lock; + QElapsedTimer timer; + timer.start(); + FPDF_PAGE pdfPage = FPDF_LoadPage(document->d->doc, page); + if (!pdfPage) { + qWarning() << "failed to load page" << page; + return false; + } + double pageHeight = FPDF_GetPageHeight(pdfPage); + FPDF_TEXTPAGE textPage = FPDFText_LoadPage(pdfPage); + if (!textPage) { + qWarning() << "failed to load text of page" << page; + FPDF_ClosePage(pdfPage); + return false; + } + FPDF_SCHHANDLE sh = FPDFText_FindStart(textPage, searchString.utf16(), 0, 0); + QVector<QPdfSearchResult> newSearchResults; + while (FPDFText_FindNext(sh)) { + int idx = FPDFText_GetSchResultIndex(sh); + int count = FPDFText_GetSchCount(sh); + int rectCount = FPDFText_CountRects(textPage, idx, count); + QVector<QRectF> rects; + int startIndex = -1; + int endIndex = -1; + for (int r = 0; r < rectCount; ++r) { + double left, top, right, bottom; + FPDFText_GetRect(textPage, r, &left, &top, &right, &bottom); + rects << QRectF(left, pageHeight - top, right - left, top - bottom); + if (r == 0) { + startIndex = FPDFText_GetCharIndexAtPos(textPage, left, top, + CharacterHitTolerance, CharacterHitTolerance); + } + if (r == rectCount - 1) { + endIndex = FPDFText_GetCharIndexAtPos(textPage, right, top, + CharacterHitTolerance, CharacterHitTolerance); + } + qCDebug(qLcS) << rects.last() << "char idx" << startIndex << "->" << endIndex; + } + QString context; + if (startIndex >= 0 || endIndex >= 0) { + startIndex = qMax(0, startIndex - ContextChars); + endIndex += ContextChars; + int count = endIndex - startIndex + 1; + if (count > 0) { + QVector<ushort> buf(count + 1); + int len = FPDFText_GetText(textPage, startIndex, count, buf.data()); + Q_ASSERT(len - 1 <= count); // len is number of characters written, including the terminator + context = QString::fromUtf16(buf.constData(), len - 1); + context = context.replace(QLatin1Char('\n'), QLatin1Char(' ')); + context = context.replace(searchString, + QLatin1String("<b>") + searchString + QLatin1String("</b>")); + } + } + newSearchResults << QPdfSearchResult(page, rects, context); + } + FPDFText_FindClose(sh); + FPDFText_ClosePage(textPage); + FPDF_ClosePage(pdfPage); + qCDebug(qLcS) << searchString << "took" << timer.elapsed() << "ms to find" + << newSearchResults.count() << "results on page" << page; + + pagesSearched[page] = true; + searchResults[page] = newSearchResults; + if (newSearchResults.count() > 0) { + int rowsBefore = rowsBeforePage(page); + qCDebug(qLcS) << "from row" << rowsBefore << "rowCount" << rowCountSoFar << "increasing by" << newSearchResults.count(); + rowCountSoFar += newSearchResults.count(); + q->beginInsertRows(QModelIndex(), rowsBefore, rowsBefore + newSearchResults.count() - 1); + q->endInsertRows(); + } + return true; +} + +QPdfSearchModelPrivate::PageAndIndex QPdfSearchModelPrivate::pageAndIndexForResult(int resultIndex) +{ + const int pageCount = document->pageCount(); + int totalSoFar = 0; + int previousTotalSoFar = 0; + for (int page = 0; page < pageCount; ++page) { + if (!pagesSearched[page]) + doSearch(page); + totalSoFar += searchResults[page].count(); + if (totalSoFar > resultIndex) + return {page, resultIndex - previousTotalSoFar}; + previousTotalSoFar = totalSoFar; + } + return {-1, -1}; +} + +int QPdfSearchModelPrivate::rowsBeforePage(int page) +{ + int ret = 0; + for (int i = 0; i < page; ++i) + ret += searchResults[i].count(); + return ret; +} + +QT_END_NAMESPACE + +#include "moc_qpdfsearchmodel.cpp" |