summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/search/IndexSearcher.cpp
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/IndexSearcher.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/search/IndexSearcher.cpp362
1 files changed, 362 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/IndexSearcher.cpp b/3rdparty/clucene/src/CLucene/search/IndexSearcher.cpp
new file mode 100644
index 000000000..c948cfa4b
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/search/IndexSearcher.cpp
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+ *
+ * Distributable under the terms of either the Apache License (Version 2.0) or
+ * the GNU Lesser General Public License, as specified in the COPYING file.
+ *
+ * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
+*/
+#include "CLucene/StdHeader.h"
+#include "IndexSearcher.h"
+
+#include "SearchHeader.h"
+#include "Scorer.h"
+#include "FieldDocSortedHitQueue.h"
+#include "CLucene/store/Directory.h"
+#include "CLucene/document/Document.h"
+#include "CLucene/index/IndexReader.h"
+#include "CLucene/index/Term.h"
+#include "CLucene/util/BitSet.h"
+#include "FieldSortedHitQueue.h"
+
+CL_NS_USE(index)
+CL_NS_USE(util)
+CL_NS_USE(document)
+
+CL_NS_DEF(search)
+
+class SimpleTopDocsCollector : public HitCollector
+{
+private:
+ qreal minScore;
+ const CL_NS(util)::BitSet* bits;
+ HitQueue* hq;
+ size_t nDocs;
+ int32_t* totalHits;
+
+public:
+ SimpleTopDocsCollector(const CL_NS(util)::BitSet* bs, HitQueue* hitQueue,
+ int32_t* totalhits, size_t ndocs, const qreal ms=-1.0f)
+ : minScore(ms),
+ bits(bs),
+ hq(hitQueue),
+ nDocs(ndocs),
+ totalHits(totalhits) {}
+ ~SimpleTopDocsCollector() {}
+
+ void collect(const int32_t doc, const qreal score)
+ {
+ if (score > 0.0f // ignore zeroed buckets
+ && (bits == NULL || bits->get(doc))) { // skip docs not in bits
+ ++totalHits[0];
+ if (hq->size() < nDocs || (minScore==-1.0f || score >= minScore)) {
+ ScoreDoc sd = {doc, score};
+ hq->insert(sd); // update hit queue
+ if ( minScore != -1.0f )
+ minScore = hq->top().score; // maintain minScore
+ }
+ }
+ }
+};
+
+class SortedTopDocsCollector : public HitCollector
+{
+private:
+ const CL_NS(util)::BitSet* bits;
+ FieldSortedHitQueue* hq;
+ size_t nDocs;
+ int32_t* totalHits;
+public:
+ SortedTopDocsCollector(const CL_NS(util)::BitSet* bs,
+ FieldSortedHitQueue* hitQueue, int32_t* totalhits, size_t _nDocs)
+ : bits(bs),
+ hq(hitQueue),
+ nDocs(_nDocs),
+ totalHits(totalhits)
+ {
+ }
+ ~SortedTopDocsCollector() {}
+
+ void collect(const int32_t doc, const qreal score)
+ {
+ if (score > 0.0f && // ignore zeroed buckets
+ (bits==NULL || bits->get(doc))) { // skip docs not in bits
+ ++totalHits[0];
+ // TODO: see jlucene way... with fields def???
+ FieldDoc* fd = _CLNEW FieldDoc(doc, score);
+ if ( !hq->insert(fd) ) // update hit queue
+ _CLDELETE(fd);
+ }
+ }
+};
+
+class SimpleFilteredCollector : public HitCollector
+{
+private:
+ CL_NS(util)::BitSet* bits;
+ HitCollector* results;
+public:
+ SimpleFilteredCollector(CL_NS(util)::BitSet* bs, HitCollector* collector)
+ : bits(bs),
+ results(collector) {}
+ ~SimpleFilteredCollector() {}
+
+protected:
+ void collect(const int32_t doc, const qreal score)
+ {
+ // skip docs not in bits
+ if (bits->get(doc))
+ results->collect(doc, score);
+ }
+};
+
+
+IndexSearcher::IndexSearcher(const QString& path)
+{
+ //Func - Constructor
+ // Creates a searcher searching the index in the named directory.
+ //Pre - path != NULL
+ //Post - The instance has been created
+
+ CND_PRECONDITION(!path.isEmpty(), "path is NULL");
+
+ reader = IndexReader::open(path);
+ readerOwner = true;
+}
+
+IndexSearcher::IndexSearcher(CL_NS(store)::Directory* directory)
+{
+ //Func - Constructor
+ // Creates a searcher searching the index in the specified directory.
+ //Pre - path != NULL
+ //Post - The instance has been created
+
+ CND_PRECONDITION(directory != NULL, "directory is NULL");
+
+ reader = IndexReader::open(directory);
+ readerOwner = true;
+}
+
+IndexSearcher::IndexSearcher(IndexReader* r)
+{
+ //Func - Constructor
+ // Creates a searcher searching the index with the provide IndexReader
+ //Pre - path != NULL
+ //Post - The instance has been created
+
+ reader = r;
+ readerOwner = false;
+}
+
+IndexSearcher::~IndexSearcher()
+{
+ //Func - Destructor
+ //Pre - true
+ //Post - The instance has been destroyed
+
+ close();
+}
+
+void IndexSearcher::close()
+{
+ //Func - Frees resources associated with this Searcher.
+ //Pre - true
+ //Post - The resources associated have been freed
+ if (readerOwner && reader){
+ reader->close();
+ _CLDELETE(reader);
+ }
+}
+
+// inherit javadoc
+int32_t IndexSearcher::docFreq(const Term* term) const
+{
+ //Func -
+ //Pre - reader != NULL
+ //Post -
+
+ CND_PRECONDITION(reader != NULL, "reader is NULL");
+ return reader->docFreq(term);
+}
+
+// inherit javadoc
+bool IndexSearcher::doc(int32_t i, CL_NS(document)::Document* d)
+{
+ //Func - Retrieves i-th document found
+ // For use by HitCollector implementations.
+ //Pre - reader != NULL
+ //Post - The i-th document has been returned
+
+ CND_PRECONDITION(reader != NULL, "reader is NULL");
+ return reader->document(i,d);
+}
+
+// inherit javadoc
+int32_t IndexSearcher::maxDoc() const
+{
+ //Func - Return total number of documents including the ones marked deleted
+ //Pre - reader != NULL
+ //Post - The total number of documents including the ones marked deleted
+ // has been returned
+
+ CND_PRECONDITION(reader != NULL, "reader is NULL");
+ return reader->maxDoc();
+}
+
+TopDocs* IndexSearcher::_search(Query* query, Filter* filter, const int32_t nDocs)
+{
+ //Func -
+ //Pre - reader != NULL
+ //Post -
+
+ CND_PRECONDITION(reader != NULL, "reader is NULL");
+ CND_PRECONDITION(query != NULL, "query is NULL");
+
+ Weight* weight = query->weight(this);
+ Scorer* scorer = weight->scorer(reader);
+ if (scorer == NULL){
+ return _CLNEW TopDocs(0, NULL, 0);
+ }
+
+ BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
+ HitQueue* hq = _CLNEW HitQueue(nDocs);
+
+ //Check hq has been allocated properly
+ CND_CONDITION(hq != NULL, "Could not allocate memory for HitQueue hq");
+
+ int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
+ totalHits[0] = 0;
+
+ SimpleTopDocsCollector hitCol(bits,hq,totalHits,nDocs,0.0f);
+ scorer->score( &hitCol );
+ _CLDELETE(scorer);
+
+ int32_t scoreDocsLength = hq->size();
+
+ ScoreDoc* scoreDocs = _CL_NEWARRAY(ScoreDoc,scoreDocsLength);
+
+ for (int32_t i = scoreDocsLength-1; i >= 0; --i) // put docs in array
+ scoreDocs[i] = hq->pop();
+
+ int32_t totalHitsInt = totalHits[0];
+
+ _CLDELETE(hq);
+ if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
+ _CLDELETE(bits);
+ _CLDELETE_ARRAY(totalHits);
+ Query* wq = weight->getQuery();
+ if ( query != wq ) //query was re-written
+ _CLLDELETE(wq);
+ _CLDELETE(weight);
+
+ return _CLNEW TopDocs(totalHitsInt, scoreDocs, scoreDocsLength);
+}
+
+// inherit javadoc
+TopFieldDocs* IndexSearcher::_search(Query* query, Filter* filter,
+ const int32_t nDocs, const Sort* sort)
+{
+ CND_PRECONDITION(reader != NULL, "reader is NULL");
+ CND_PRECONDITION(query != NULL, "query is NULL");
+
+ Weight* weight = query->weight(this);
+ Scorer* scorer = weight->scorer(reader);
+ if (scorer == NULL) {
+ return _CLNEW TopFieldDocs(0, NULL, 0, NULL );
+ }
+
+ BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
+ FieldSortedHitQueue hq(reader, sort->getSort(), nDocs);
+ int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
+ totalHits[0]=0;
+
+ SortedTopDocsCollector hitCol(bits,&hq,totalHits,nDocs);
+ scorer->score(&hitCol);
+ _CLDELETE(scorer);
+
+ int32_t hqLen = hq.size();
+ FieldDoc** fieldDocs = _CL_NEWARRAY(FieldDoc*,hqLen);
+ for (int32_t i = hqLen-1; i >= 0; --i){ // put docs in array
+ fieldDocs[i] = hq.fillFields (hq.pop());
+ }
+
+ Query* wq = weight->getQuery();
+ if ( query != wq ) //query was re-written
+ _CLLDELETE(wq);
+ _CLDELETE(weight);
+
+ SortField** hqFields = hq.getFields();
+ hq.setFields(NULL); //move ownership of memory over to TopFieldDocs
+ int32_t totalHits0 = totalHits[0];
+ if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
+ _CLDELETE(bits);
+ _CLDELETE_ARRAY(totalHits);
+ return _CLNEW TopFieldDocs(totalHits0, fieldDocs, hqLen, hqFields );
+}
+
+void IndexSearcher::_search(Query* query, Filter* filter, HitCollector* results)
+{
+ //Func - _search an index and fetch the results
+ // Applications should only use this if they need all of the
+ // matching documents. The high-level search API (search(Query))
+ // is usually more efficient, as it skips non-high-scoring hits.
+ //Pre - query is a valid reference to a query filter may or may not be NULL
+ // results is a valid reference to a HitCollector and used to store the results
+ //Post - filter if non-NULL, a bitset used to eliminate some documents
+
+ CND_PRECONDITION(reader != NULL, "reader is NULL");
+ CND_PRECONDITION(query != NULL, "query is NULL");
+
+ BitSet* bits = NULL;
+ SimpleFilteredCollector* fc = NULL;
+
+ if (filter != NULL){
+ bits = filter->bits(reader);
+ fc = _CLNEW SimpleFilteredCollector(bits, results);
+ }
+
+ Weight* weight = query->weight(this);
+ Scorer* scorer = weight->scorer(reader);
+ if (scorer != NULL) {
+ if (fc == NULL){
+ scorer->score(results);
+ }else{
+ scorer->score((HitCollector*)fc);
+ }
+ _CLDELETE(scorer);
+ }
+
+ _CLDELETE(fc);
+ _CLDELETE(weight);
+ if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
+ _CLDELETE(bits);
+}
+
+Query* IndexSearcher::rewrite(Query* original)
+{
+ Query* query = original;
+ Query* last = original;
+ for (Query* rewrittenQuery = query->rewrite(reader);
+ rewrittenQuery != query;
+ rewrittenQuery = query->rewrite(reader)) {
+ query = rewrittenQuery;
+ if ( query != last && last != original) {
+ _CLDELETE(last);
+ }
+ last = query;
+ }
+ return query;
+}
+
+void IndexSearcher::explain(Query* query, int32_t doc, Explanation* ret)
+{
+ Weight* weight = query->weight(this);
+ weight->explain(reader, doc, ret);
+
+ Query* wq = weight->getQuery();
+ if ( query != wq ) //query was re-written
+ _CLLDELETE(wq);
+ _CLDELETE(weight);
+}
+
+CL_NS_END