summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp227
1 files changed, 227 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp b/3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp
new file mode 100644
index 000000000..bed7f0d61
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp
@@ -0,0 +1,227 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "MultiSearcher.h"
+
+#include "SearchHeader.h"
+#include "HitQueue.h"
+#include "CLucene/document/Document.h"
+#include "CLucene/index/Term.h"
+#include "FieldDocSortedHitQueue.h"
+
+CL_NS_USE(index)
+CL_NS_USE(util)
+CL_NS_USE(document)
+
+CL_NS_DEF(search)
+
+ /** Creates a searcher which searches <i>searchers</i>. */
+ MultiSearcher::MultiSearcher(Searchable** _searchables):
+ _maxDoc(0) {
+ searchablesLen = 0;
+ while ( _searchables[searchablesLen] != NULL )
+ ++searchablesLen;
+
+ searchables=_CL_NEWARRAY(Searchable*,searchablesLen+1);
+ starts = _CL_NEWARRAY(int32_t,searchablesLen + 1); // build starts array
+ for (int32_t i = 0; i < searchablesLen; ++i) {
+ searchables[i]=_searchables[i];
+ starts[i] = _maxDoc;
+ _maxDoc += searchables[i]->maxDoc(); // compute maxDocs
+ }
+ starts[searchablesLen] = _maxDoc;
+ }
+
+ MultiSearcher::~MultiSearcher() {
+ _CLDELETE_ARRAY(searchables);
+ _CLDELETE_ARRAY(starts);
+ }
+
+
+ // inherit javadoc
+ void MultiSearcher::close() {
+ for (int32_t i = 0; i < searchablesLen; ++i){
+ searchables[i]->close();
+ searchables[i]=NULL;
+ }
+ }
+
+ int32_t MultiSearcher::docFreq(const Term* term) const {
+ int32_t docFreq = 0;
+ for (int32_t i = 0; i < searchablesLen; ++i)
+ docFreq += searchables[i]->docFreq(term);
+ return docFreq;
+ }
+
+ /** For use by {@link HitCollector} implementations. */
+ bool MultiSearcher::doc(int32_t n, Document* d) {
+ int32_t i = subSearcher(n); // find searcher index
+ return searchables[i]->doc(n - starts[i], d); // dispatch to searcher
+ }
+
+ int32_t MultiSearcher::searcherIndex(int32_t n) const{
+ return subSearcher(n);
+ }
+
+ /** Returns index of the searcher for document <code>n</code> in the array
+ * used to construct this searcher. */
+ int32_t MultiSearcher::subSearcher(int32_t n) const{
+ // replace w/ call to Arrays.binarySearch in Java 1.2
+ int32_t lo = 0; // search starts array
+ int32_t hi = searchablesLen - 1; // for first element less
+ // than n, return its index
+ int32_t mid,midValue;
+ while (hi >= lo) {
+ mid = (lo + hi) >> 1;
+ midValue = starts[mid];
+ if (n < midValue)
+ hi = mid - 1;
+ else if (n > midValue)
+ lo = mid + 1;
+ else{ // found a match
+ while (mid+1 < searchablesLen && starts[mid+1] == midValue) {
+ ++mid; // scan to last match
+ }
+ return mid;
+ }
+ }
+ return hi;
+ }
+
+ /** Returns the document number of document <code>n</code> within its
+ * sub-index. */
+ int32_t MultiSearcher::subDoc(int32_t n) const{
+ return n - starts[subSearcher(n)];
+ }
+
+ int32_t MultiSearcher::maxDoc() const{
+ return _maxDoc;
+ }
+
+ TopDocs* MultiSearcher::_search(Query* query, Filter* filter, const int32_t nDocs) {
+ HitQueue* hq = _CLNEW HitQueue(nDocs);
+ int32_t totalHits = 0;
+ TopDocs* docs;
+ int32_t j;
+ ScoreDoc* scoreDocs;
+ for (int32_t i = 0; i < searchablesLen; i++) { // search each searcher
+ docs = searchables[i]->_search(query, filter, nDocs);
+ totalHits += docs->totalHits; // update totalHits
+ scoreDocs = docs->scoreDocs;
+ for ( j = 0; j <docs->scoreDocsLength; ++j) { // merge scoreDocs int_to hq
+ scoreDocs[j].doc += starts[i]; // convert doc
+ if ( !hq->insert(scoreDocs[j]))
+ break; // no more scores > minScore
+ }
+
+ _CLDELETE(docs);
+ }
+
+ int32_t scoreDocsLen = hq->size();
+ scoreDocs = _CL_NEWARRAY(ScoreDoc, scoreDocsLen);
+ {//MSVC 6 scope fix
+ for (int32_t i = scoreDocsLen-1; i >= 0; --i) // put docs in array
+ scoreDocs[i] = hq->pop();
+ }
+
+ //cleanup
+ _CLDELETE(hq);
+
+ return _CLNEW TopDocs(totalHits, scoreDocs, scoreDocsLen);
+ }
+
+ /** Lower-level search API.
+ *
+ * <p>{@link HitCollector#collect(int32_t,qreal)} is called for every non-zero
+ * scoring document.
+ *
+ * <p>Applications should only use this if they need <i>all</i> of the
+ * matching documents. The high-level search API ({@link
+ * Searcher#search(Query)}) is usually more efficient, as it skips
+ * non-high-scoring hits.
+ *
+ * @param query to match documents
+ * @param filter if non-null, a bitset used to eliminate some documents
+ * @param results to receive hits
+ */
+ void MultiSearcher::_search(Query* query, Filter* filter, HitCollector* results){
+ for (int32_t i = 0; i < searchablesLen; ++i) {
+ /* DSR:CL_BUG: Old implementation leaked and was misconceived. We need
+ ** to have the original HitCollector ($results) collect *all* hits;
+ ** the MultiHitCollector instantiated below serves only to adjust
+ ** (forward by starts[i]) the docNo passed to $results.
+ ** Old implementation instead created a sort of linked list of
+ ** MultiHitCollectors that applied the adjustments in $starts
+ ** cumulatively (and was never deleted). */
+ HitCollector *docNoAdjuster = _CLNEW MultiHitCollector(results, starts[i]);
+ searchables[i]->_search(query, filter, docNoAdjuster);
+ _CLDELETE(docNoAdjuster);
+ }
+ }
+
+ TopFieldDocs* MultiSearcher::_search (Query* query, Filter* filter, const int32_t n, const Sort* sort){
+ FieldDocSortedHitQueue* hq = NULL;
+ int32_t totalHits = 0;
+ TopFieldDocs* docs;
+ int32_t j;
+ FieldDoc** fieldDocs;
+
+ for (int32_t i = 0; i < searchablesLen; ++i) { // search each searcher
+ docs = searchables[i]->_search (query, filter, n, sort);
+ if (hq == NULL){
+ hq = _CLNEW FieldDocSortedHitQueue (docs->fields, n);
+ docs->fields = NULL; //hit queue takes fields memory
+ }
+
+ totalHits += docs->totalHits; // update totalHits
+ fieldDocs = docs->fieldDocs;
+ for(j = 0;j<docs->scoreDocsLength;++j){ // merge scoreDocs into hq
+ fieldDocs[j]->scoreDoc.doc += starts[i]; // convert doc
+ if (!hq->insert (fieldDocs[j]) )
+ break; // no more scores > minScore
+ }
+ for ( int32_t x=0;x<j;++x )
+ fieldDocs[x]=NULL; //move ownership of FieldDoc to the hitqueue
+
+ _CLDELETE(docs);
+ }
+
+ int32_t hqlen = hq->size();
+ fieldDocs = _CL_NEWARRAY(FieldDoc*,hqlen);
+ for (j = hqlen - 1; j >= 0; j--) // put docs in array
+ fieldDocs[j] = hq->pop();
+
+ SortField** hqFields = hq->getFields();
+ hq->setFields(NULL); //move ownership of memory over to TopFieldDocs
+ _CLDELETE(hq);
+
+ return _CLNEW TopFieldDocs (totalHits, fieldDocs, hqlen, hqFields);
+ }
+
+ Query* MultiSearcher::rewrite(Query* original) {
+ Query** queries = _CL_NEWARRAY(Query*,searchablesLen+1);
+ for (int32_t i = 0; i < searchablesLen; ++i)
+ queries[i] = searchables[i]->rewrite(original);
+ queries[searchablesLen]=NULL;
+ return original->combine(queries);
+ }
+
+ void MultiSearcher::explain(Query* query, int32_t doc, Explanation* ret) {
+ int32_t i = subSearcher(doc); // find searcher index
+ searchables[i]->explain(query,doc-starts[i], ret); // dispatch to searcher
+ }
+
+ MultiHitCollector::MultiHitCollector(HitCollector* _results, int32_t _start):
+ results(_results),
+ start(_start) {
+ }
+
+ void MultiHitCollector::collect(const int32_t doc, const qreal score) {
+ results->collect(doc + start, score);
+ }
+
+CL_NS_END