summaryrefslogtreecommitdiffstats
path: root/src/assistant/lib/qhelpsearchindexreader_clucene.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/assistant/lib/qhelpsearchindexreader_clucene.cpp')
-rw-r--r--src/assistant/lib/qhelpsearchindexreader_clucene.cpp481
1 files changed, 481 insertions, 0 deletions
diff --git a/src/assistant/lib/qhelpsearchindexreader_clucene.cpp b/src/assistant/lib/qhelpsearchindexreader_clucene.cpp
new file mode 100644
index 000000000..537e1d765
--- /dev/null
+++ b/src/assistant/lib/qhelpsearchindexreader_clucene.cpp
@@ -0,0 +1,481 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the Qt Assistant of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "fulltextsearch/qindexreader_p.h"
+#include "fulltextsearch/qqueryparser_p.h"
+#include "fulltextsearch/qsearchable_p.h"
+#include "qclucenefieldnames_p.h"
+#include "qhelpenginecore.h"
+
+#include "qhelpsearchindexreader_clucene_p.h"
+
+#include <QtCore/QDir>
+#include <QtCore/QSet>
+#include <QtCore/QString>
+#include <QtCore/QFileInfo>
+#include <QtCore/QSharedPointer>
+#include <QtCore/QStringList>
+#include <QtCore/QTextStream>
+#include <QtCore/QMutexLocker>
+
+QT_BEGIN_NAMESPACE
+
+namespace fulltextsearch {
+namespace clucene {
+
+QHelpSearchIndexReaderClucene::QHelpSearchIndexReaderClucene()
+ : QHelpSearchIndexReader()
+{
+ // nothing todo
+}
+
+QHelpSearchIndexReaderClucene::~QHelpSearchIndexReaderClucene()
+{
+}
+
+
+void QHelpSearchIndexReaderClucene::run()
+{
+ mutex.lock();
+
+ if (m_cancel) {
+ mutex.unlock();
+ return;
+ }
+
+ const QString collectionFile(this->m_collectionFile);
+ const QList<QHelpSearchQuery> &queryList = this->m_query;
+ const QString indexPath(m_indexFilesFolder);
+
+ mutex.unlock();
+
+ QHelpEngineCore engine(collectionFile, 0);
+ if (!engine.setupData())
+ return;
+
+ QFileInfo fInfo(indexPath);
+ if (fInfo.exists() && !fInfo.isWritable()) {
+ qWarning("Full Text Search, could not read index (missing permissions).");
+ return;
+ }
+
+ if(QCLuceneIndexReader::indexExists(indexPath)) {
+ mutex.lock();
+ if (m_cancel) {
+ mutex.unlock();
+ return;
+ }
+ mutex.unlock();
+
+ emit searchingStarted();
+
+#if !defined(QT_NO_EXCEPTIONS)
+ try {
+#endif
+ QCLuceneBooleanQuery booleanQueryTitle;
+ QCLuceneBooleanQuery booleanQueryContent;
+ QCLuceneStandardAnalyzer analyzer;
+ const QStringList& attribList =
+ engine.filterAttributes(engine.currentFilter());
+ bool titleQueryIsValid = buildQuery(queryList, TitleTokenizedField,
+ attribList, booleanQueryTitle, analyzer);
+ bool contentQueryIsValid = buildQuery(queryList, ContentField,
+ attribList, booleanQueryContent, analyzer);
+ if (!titleQueryIsValid && !contentQueryIsValid) {
+ emit searchingFinished(0);
+ return;
+ }
+
+ QCLuceneIndexSearcher indexSearcher(indexPath);
+
+ // QCLuceneHits object must be allocated on the heap, because
+ // there is no default constructor.
+ QSharedPointer<QCLuceneHits> titleHits;
+ QSharedPointer<QCLuceneHits> contentHits;
+ if (titleQueryIsValid) {
+ titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+ indexSearcher.search(booleanQueryTitle)));
+ }
+ if (contentQueryIsValid) {
+ contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+ indexSearcher.search(booleanQueryContent)));
+ }
+ bool boost = true;
+ if ((titleHits.isNull() || titleHits->length() == 0)
+ && (contentHits.isNull() || contentHits->length() == 0)) {
+ booleanQueryTitle = QCLuceneBooleanQuery();
+ booleanQueryContent = QCLuceneBooleanQuery();
+ titleQueryIsValid =
+ buildTryHarderQuery(queryList, TitleTokenizedField,
+ attribList, booleanQueryTitle, analyzer);
+ contentQueryIsValid =
+ buildTryHarderQuery(queryList, ContentField, attribList,
+ booleanQueryContent, analyzer);
+ if (!titleQueryIsValid && !contentQueryIsValid) {
+ emit searchingFinished(0);
+ return;
+ }
+ if (titleQueryIsValid) {
+ titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+ indexSearcher.search(booleanQueryTitle)));
+ }
+ if (contentQueryIsValid) {
+ contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+ indexSearcher.search(booleanQueryContent)));
+ }
+ boost = false;
+ }
+ QList<QSharedPointer<QCLuceneHits> > cluceneHitsList;
+ if (!titleHits.isNull())
+ cluceneHitsList.append(titleHits);
+ if (!contentHits.isNull())
+ cluceneHitsList.append(contentHits);
+
+ QSet<QString> pathSet;
+ QCLuceneDocument document;
+ const QStringList namespaceList = engine.registeredDocumentations();
+
+ foreach (const QSharedPointer<QCLuceneHits> &hits, cluceneHitsList) {
+ for (qint32 i = 0; i < hits->length(); i++) {
+ document = hits->document(i);
+ const QString path = document.get(PathField);
+ if (!pathSet.contains(path) && namespaceList.contains(
+ document.get(NamespaceField), Qt::CaseInsensitive)) {
+ pathSet.insert(path);
+ hitList.append(qMakePair(path, document.get(TitleField)));
+ }
+ document.clear();
+
+ mutex.lock();
+ if (m_cancel) {
+ mutex.unlock();
+ emit searchingFinished(0);
+ return;
+ }
+ mutex.unlock();
+ }
+ }
+
+ indexSearcher.close();
+ const int count = hitList.count();
+ if ((count > 0) && boost)
+ boostSearchHits(engine, hitList, queryList);
+ emit searchingFinished(hitList.count());
+
+#if !defined(QT_NO_EXCEPTIONS)
+ } catch(...) {
+ mutex.lock();
+ hitList.clear();
+ mutex.unlock();
+ emit searchingFinished(0);
+ }
+#endif
+ }
+}
+
+bool QHelpSearchIndexReaderClucene::buildQuery(
+ const QList<QHelpSearchQuery> &queries, const QString &fieldName,
+ const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
+{
+ bool queryIsValid = false;
+ foreach (const QHelpSearchQuery &query, queries) {
+ if (fieldName != ContentField && isNegativeQuery(query)) {
+ queryIsValid = false;
+ break;
+ }
+ switch (query.fieldName) {
+ case QHelpSearchQuery::FUZZY:
+ if (addFuzzyQuery(query, fieldName, booleanQuery, analyzer))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::WITHOUT:
+ if (fieldName != ContentField)
+ return false;
+ if (addWithoutQuery(query, fieldName, booleanQuery))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::PHRASE:
+ if (addPhraseQuery(query, fieldName, booleanQuery))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::ALL:
+ if (addAllQuery(query, fieldName, booleanQuery))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::DEFAULT:
+ if (addDefaultQuery(query, fieldName, true, booleanQuery, analyzer))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::ATLEAST:
+ if (addAtLeastQuery(query, fieldName, booleanQuery, analyzer))
+ queryIsValid = true;
+ break;
+ default:
+ Q_ASSERT(!"Invalid field name");
+ }
+ }
+
+ if (queryIsValid && !filterAttributes.isEmpty()) {
+ queryIsValid =
+ addAttributesQuery(filterAttributes, booleanQuery, analyzer);
+ }
+
+ return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(
+ const QList<QHelpSearchQuery> &queries, const QString &fieldName,
+ const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
+{
+ if (queries.isEmpty())
+ return false;
+ const QHelpSearchQuery &query = queries.front();
+ if (query.fieldName != QHelpSearchQuery::DEFAULT)
+ return false;
+ if (isNegativeQuery(query))
+ return false;
+ if (!addDefaultQuery(query, fieldName, false, booleanQuery, analyzer))
+ return false;
+ if (filterAttributes.isEmpty())
+ return true;
+ return addAttributesQuery(filterAttributes, booleanQuery, analyzer);
+}
+
+bool QHelpSearchIndexReaderClucene::isNegativeQuery(const QHelpSearchQuery &query) const
+{
+ const QString &search = query.wordList.join(" ");
+ return search.contains('!') || search.contains('-')
+ || search.contains(QLatin1String(" NOT "));
+}
+
+bool QHelpSearchIndexReaderClucene::addFuzzyQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
+{
+ bool queryIsValid = false;
+ const QLatin1String fuzzy("~");
+ foreach (const QString &term, query.wordList) {
+ if (!term.isEmpty()) {
+ QCLuceneQuery *lQuery =
+ QCLuceneQueryParser::parse(term + fuzzy, fieldName, analyzer);
+ if (lQuery != 0) {
+ booleanQuery.add(lQuery, true, false, false);
+ queryIsValid = true;
+ }
+ }
+ }
+ return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addWithoutQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
+{
+ bool queryIsValid = false;
+ const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords();
+ foreach (const QString &term, query.wordList) {
+ if (stopWords.contains(term, Qt::CaseInsensitive))
+ continue;
+ QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
+ fieldName, term.toLower()));
+ booleanQuery.add(lQuery, true, false, true);
+ queryIsValid = true;
+ }
+ return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addPhraseQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
+{
+ bool queryIsValid = false;
+ const QString &term = query.wordList.at(0).toLower();
+ if (term.contains(QLatin1Char(' '))) {
+ const QStringList termList = term.split(QLatin1String(" "));
+ QCLucenePhraseQuery *q = new QCLucenePhraseQuery();
+ const QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
+ foreach (const QString &term, termList) {
+ if (!stopWords.contains(term, Qt::CaseInsensitive))
+ q->addTerm(QCLuceneTerm(fieldName, term.toLower()));
+ }
+ if (!q->getTerms().isEmpty()) {
+ booleanQuery.add(q, true, true, false);
+ queryIsValid = true;
+ }
+ } else {
+ QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
+ fieldName, term.toLower()));
+ booleanQuery.add(lQuery, true, true, false);
+ queryIsValid = true;
+ }
+ return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addAllQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
+{
+ bool queryIsValid = false;
+ const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords();
+ foreach (const QString &term, query.wordList) {
+ if (stopWords.contains(term, Qt::CaseInsensitive))
+ continue;
+ QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
+ fieldName, term.toLower()));
+ booleanQuery.add(lQuery, true, true, false);
+ queryIsValid = true;
+ }
+ return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addDefaultQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, bool allTermsRequired,
+ QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
+{
+ bool queryIsValid = false;
+ foreach (const QString &term, query.wordList) {
+ QCLuceneQuery *lQuery =
+ QCLuceneQueryParser::parse(term.toLower(), fieldName, analyzer);
+ if (lQuery) {
+ booleanQuery.add(lQuery, true, allTermsRequired, false);
+ queryIsValid = true;
+ }
+ }
+ return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addAtLeastQuery(
+ const QHelpSearchQuery &query, const QString &fieldName,
+ QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer)
+{
+ bool queryIsValid = false;
+ foreach (const QString &term, query.wordList) {
+ if (!term.isEmpty()) {
+ QCLuceneQuery *lQuery =
+ QCLuceneQueryParser::parse(term, fieldName, analyzer);
+ if (lQuery) {
+ booleanQuery.add(lQuery, true, false, false);
+ queryIsValid = true;
+ }
+ }
+ }
+ return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addAttributesQuery(
+ const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
+{
+ QCLuceneQuery* lQuery = QCLuceneQueryParser::parse(QLatin1String("+")
+ + filterAttributes.join(QLatin1String(" +")), AttributeField, analyzer);
+ if (!lQuery)
+ return false;
+ booleanQuery.add(lQuery, true, true, false);
+ return true;
+}
+
+void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine,
+ QList<QHelpSearchEngine::SearchHit> &hitList, const QList<QHelpSearchQuery> &queryList)
+{
+ foreach (const QHelpSearchQuery &query, queryList) {
+ if (query.fieldName != QHelpSearchQuery::DEFAULT)
+ continue;
+
+ QString joinedQuery = query.wordList.join(QLatin1String(" "));
+
+ QCLuceneStandardAnalyzer analyzer;
+ QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse(
+ joinedQuery, ContentField, analyzer);
+
+ if (parsedQuery) {
+ joinedQuery = parsedQuery->toString();
+ delete parsedQuery;
+ }
+
+ const QString contentString(ContentField + QLatin1String(":"));
+ int length = contentString.length();
+ int index = joinedQuery.indexOf(contentString);
+
+ QString term;
+ int nextIndex = 0;
+ QStringList searchTerms;
+ while (index != -1) {
+ nextIndex = joinedQuery.indexOf(contentString, index + 1);
+ term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified();
+ if (term.startsWith(QLatin1String("\""))
+ && term.endsWith(QLatin1String("\""))) {
+ searchTerms.append(term.remove(QLatin1String("\"")));
+ } else {
+ searchTerms += term.split(QLatin1Char(' '));
+ }
+ index = nextIndex;
+ }
+ searchTerms.removeDuplicates();
+
+ int count = qMin(75, hitList.count());
+ QMap<int, QHelpSearchEngine::SearchHit> hitMap;
+ for (int i = 0; i < count; ++i) {
+ const QHelpSearchEngine::SearchHit &hit = hitList.at(i);
+ QString data = QString::fromUtf8(engine.fileData(hit.first));
+
+ int counter = 0;
+ foreach (const QString &term, searchTerms)
+ counter += data.count(term, Qt::CaseInsensitive);
+ hitMap.insertMulti(counter, hit);
+ }
+
+ QList<QHelpSearchEngine::SearchHit> boostedList;
+ QMap<int, QHelpSearchEngine::SearchHit>::const_iterator it = hitMap.constEnd();
+ do {
+ --it;
+ boostedList.append(it.value());
+ } while (it != hitMap.constBegin());
+ boostedList += hitList.mid(count, hitList.count());
+ mutex.lock();
+ hitList = boostedList;
+ mutex.unlock();
+ }
+}
+
+} // namespace clucene
+} // namespace fulltextsearch
+
+QT_END_NAMESPACE