/**************************************************************************** ** ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). ** All rights reserved. ** Contact: Nokia Corporation (qt-info@nokia.com) ** ** This file is part of the Qt Assistant of the Qt Toolkit. ** ** $QT_BEGIN_LICENSE:LGPL$ ** No Commercial Usage ** This file contains pre-release code and may not be distributed. ** You may use this file in accordance with the terms and conditions ** contained in the Technology Preview License Agreement accompanying ** this package. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser ** General Public License version 2.1 as published by the Free Software ** Foundation and appearing in the file LICENSE.LGPL included in the ** packaging of this file. Please review the following information to ** ensure the GNU Lesser General Public License version 2.1 requirements ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. ** ** In addition, as a special exception, Nokia gives you certain additional ** rights. These rights are described in the Nokia Qt LGPL Exception ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. ** ** If you have questions regarding the use of this file, please contact ** Nokia at qt-info@nokia.com. ** ** ** ** ** ** ** ** ** $QT_END_LICENSE$ ** ****************************************************************************/ #include "qhelpenginecore.h" #include "fulltextsearch/qsearchable_p.h" #include "fulltextsearch/qqueryparser_p.h" #include "fulltextsearch/qindexreader_p.h" #include "qhelpsearchindexreader_clucene_p.h" #include #include #include #include #include #include #include QT_BEGIN_NAMESPACE namespace qt { namespace fulltextsearch { namespace clucene { QHelpSearchIndexReaderClucene::QHelpSearchIndexReaderClucene() : QHelpSearchIndexReader() { // nothing todo } QHelpSearchIndexReaderClucene::~QHelpSearchIndexReaderClucene() { } void QHelpSearchIndexReaderClucene::run() { mutex.lock(); if (m_cancel) { mutex.unlock(); return; } const QString collectionFile(this->m_collectionFile); const QList &queryList = this->m_query; const QString indexPath(m_indexFilesFolder); mutex.unlock(); QHelpEngineCore engine(collectionFile, 0); if (!engine.setupData()) return; QFileInfo fInfo(indexPath); if (fInfo.exists() && !fInfo.isWritable()) { qWarning("Full Text Search, could not read index (missing permissions)."); return; } if(QCLuceneIndexReader::indexExists(indexPath)) { mutex.lock(); if (m_cancel) { mutex.unlock(); return; } mutex.unlock(); emit searchingStarted(); #if !defined(QT_NO_EXCEPTIONS) try { #endif QCLuceneBooleanQuery booleanQuery; QCLuceneStandardAnalyzer analyzer; if (!buildQuery(booleanQuery, queryList, analyzer)) { emit searchingFinished(0); return; } const QStringList attribList = engine.filterAttributes(engine.currentFilter()); if (!attribList.isEmpty()) { QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer); if (!query) { emit searchingFinished(0); return; } booleanQuery.add(query, true, true, false); } QCLuceneIndexSearcher indexSearcher(indexPath); QCLuceneHits hits = indexSearcher.search(booleanQuery); bool boost = true; QCLuceneBooleanQuery tryHarderQuery; if (hits.length() == 0) { if (buildTryHarderQuery(tryHarderQuery, queryList, analyzer)) { if (!attribList.isEmpty()) { QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer); tryHarderQuery.add(query, true, true, false); } hits = indexSearcher.search(tryHarderQuery); boost = (hits.length() == 0); } } QSet pathSet; QCLuceneDocument document; const QStringList namespaceList = engine.registeredDocumentations(); for (qint32 i = 0; i < hits.length(); i++) { document = hits.document(i); const QString path = document.get(QLatin1String("path")); if (!pathSet.contains(path) && namespaceList.contains( document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) { pathSet.insert(path); hitList.append(qMakePair(path, document.get(QLatin1String("title")))); } document.clear(); mutex.lock(); if (m_cancel) { mutex.unlock(); emit searchingFinished(0); return; } mutex.unlock(); } indexSearcher.close(); const int count = hitList.count(); if ((count > 0) && boost) boostSearchHits(engine, hitList, queryList); emit searchingFinished(hitList.count()); #if !defined(QT_NO_EXCEPTIONS) } catch(...) { mutex.lock(); hitList.clear(); mutex.unlock(); emit searchingFinished(0); } #endif } } bool QHelpSearchIndexReaderClucene::defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery, QCLuceneStandardAnalyzer &analyzer) { const QLatin1String c("content"); const QLatin1String t("titleTokenized"); QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer); QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer); if (query && query2) { booleanQuery.add(query, true, false, false); booleanQuery.add(query2, true, false, false); return true; } return false; } bool QHelpSearchIndexReaderClucene::buildQuery(QCLuceneBooleanQuery &booleanQuery, const QList &queryList, QCLuceneStandardAnalyzer &analyzer) { foreach (const QHelpSearchQuery query, queryList) { switch (query.fieldName) { case QHelpSearchQuery::FUZZY: { const QLatin1String fuzzy("~"); foreach (const QString &term, query.wordList) { if (term.isEmpty() || !defaultQuery(term.toLower() + fuzzy, booleanQuery, analyzer)) { return false; } } } break; case QHelpSearchQuery::WITHOUT: { QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); foreach (const QString &term, query.wordList) { if (stopWords.contains(term, Qt::CaseInsensitive)) continue; QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( QLatin1String("content"), term.toLower())); QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( QLatin1String("titleTokenized"), term.toLower())); if (query && query2) { booleanQuery.add(query, true, false, true); booleanQuery.add(query2, true, false, true); } else { return false; } } } break; case QHelpSearchQuery::PHRASE: { const QString &term = query.wordList.at(0).toLower(); if (term.contains(QLatin1Char(' '))) { QStringList termList = term.split(QLatin1String(" ")); QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); foreach (const QString &term, termList) { if (!stopWords.contains(term, Qt::CaseInsensitive)) q->addTerm(QCLuceneTerm(QLatin1String("content"), term.toLower())); } booleanQuery.add(q, true, true, false); } else { QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( QLatin1String("content"), term.toLower())); QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( QLatin1String("titleTokenized"), term.toLower())); if (query && query2) { booleanQuery.add(query, true, true, false); booleanQuery.add(query2, true, false, false); } else { return false; } } } break; case QHelpSearchQuery::ALL: { QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); foreach (const QString &term, query.wordList) { if (stopWords.contains(term, Qt::CaseInsensitive)) continue; QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( QLatin1String("content"), term.toLower())); if (query) { booleanQuery.add(query, true, true, false); } else { return false; } } } break; case QHelpSearchQuery::DEFAULT: { foreach (const QString &term, query.wordList) { QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(), QLatin1String("content"), analyzer); if (query) booleanQuery.add(query, true, true, false); } } break; case QHelpSearchQuery::ATLEAST: { foreach (const QString &term, query.wordList) { if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery, analyzer)) return false; } } } } return true; } bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery, const QList &queryList, QCLuceneStandardAnalyzer &analyzer) { bool retVal = false; foreach (const QHelpSearchQuery query, queryList) { switch (query.fieldName) { default: break; case QHelpSearchQuery::DEFAULT: { foreach (const QString &term, query.wordList) { QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(), QLatin1String("content"), analyzer); if (query) { retVal = true; booleanQuery.add(query, true, false, false); } } } break; } } return retVal; } void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine, QList &hitList, const QList &queryList) { foreach (const QHelpSearchQuery query, queryList) { if (query.fieldName != QHelpSearchQuery::DEFAULT) continue; QString joinedQuery = query.wordList.join(QLatin1String(" ")); QCLuceneStandardAnalyzer analyzer; QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse( joinedQuery, QLatin1String("content"), analyzer); if (parsedQuery) { joinedQuery = parsedQuery->toString(); delete parsedQuery; } int length = QString(QLatin1String("content:")).length(); int index = joinedQuery.indexOf(QLatin1String("content:")); QString term; int nextIndex = 0; QStringList searchTerms; while (index != -1) { nextIndex = joinedQuery.indexOf(QLatin1String("content:"), index + 1); term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified(); if (term.startsWith(QLatin1String("\"")) && term.endsWith(QLatin1String("\""))) { searchTerms.append(term.remove(QLatin1String("\""))); } else { searchTerms += term.split(QLatin1Char(' ')); } index = nextIndex; } searchTerms.removeDuplicates(); int count = qMin(75, hitList.count()); QMap hitMap; for (int i = 0; i < count; ++i) { const QHelpSearchEngine::SearchHit &hit = hitList.at(i); QString data = QString::fromUtf8(engine.fileData(hit.first)); int counter = 0; foreach (const QString &term, searchTerms) counter += data.count(term, Qt::CaseInsensitive); hitMap.insertMulti(counter, hit); } QList boostedList; QMap::const_iterator it = hitMap.constEnd(); do { --it; boostedList.append(it.value()); } while (it != hitMap.constBegin()); boostedList += hitList.mid(count, hitList.count()); mutex.lock(); hitList = boostedList; mutex.unlock(); } } } // namespace clucene } // namespace fulltextsearch } // namespace qt QT_END_NAMESPACE