summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp
diff options
context:
space:
mode:
authorQt by Nokia <qt-info@nokia.com>2011-04-27 12:05:43 +0200
committeraxis <qt-info@nokia.com>2011-04-27 12:05:43 +0200
commit50123887ba0f33cf47520bee7c419d68742af2d1 (patch)
tree0eb8679b9e4e4370e59b44bfdcae616816e39aca /3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you want to look at revision history older than this, please refer to the Qt Git wiki for how to use Git history grafting. At the time of writing, this wiki is located here: http://qt.gitorious.org/qt/pages/GitIntroductionWithQt If you have already performed the grafting and you don't see any history beyond this commit, try running "git log" with the "--follow" argument. Branched from the monolithic repo, Qt master branch, at commit 896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp227
1 files changed, 227 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp b/3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp
new file mode 100644
index 000000000..bed7f0d61
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/search/MultiSearcher.cpp
@@ -0,0 +1,227 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "MultiSearcher.h"
+
+#include "SearchHeader.h"
+#include "HitQueue.h"
+#include "CLucene/document/Document.h"
+#include "CLucene/index/Term.h"
+#include "FieldDocSortedHitQueue.h"
+
+CL_NS_USE(index)
+CL_NS_USE(util)
+CL_NS_USE(document)
+
+CL_NS_DEF(search)
+
+ /** Creates a searcher which searches <i>searchers</i>. */
+ MultiSearcher::MultiSearcher(Searchable** _searchables):
+ _maxDoc(0) {
+ searchablesLen = 0;
+ while ( _searchables[searchablesLen] != NULL )
+ ++searchablesLen;
+
+ searchables=_CL_NEWARRAY(Searchable*,searchablesLen+1);
+ starts = _CL_NEWARRAY(int32_t,searchablesLen + 1); // build starts array
+ for (int32_t i = 0; i < searchablesLen; ++i) {
+ searchables[i]=_searchables[i];
+ starts[i] = _maxDoc;
+ _maxDoc += searchables[i]->maxDoc(); // compute maxDocs
+ }
+ starts[searchablesLen] = _maxDoc;
+ }
+
+ MultiSearcher::~MultiSearcher() {
+ _CLDELETE_ARRAY(searchables);
+ _CLDELETE_ARRAY(starts);
+ }
+
+
+ // inherit javadoc
+ void MultiSearcher::close() {
+ for (int32_t i = 0; i < searchablesLen; ++i){
+ searchables[i]->close();
+ searchables[i]=NULL;
+ }
+ }
+
+ int32_t MultiSearcher::docFreq(const Term* term) const {
+ int32_t docFreq = 0;
+ for (int32_t i = 0; i < searchablesLen; ++i)
+ docFreq += searchables[i]->docFreq(term);
+ return docFreq;
+ }
+
+ /** For use by {@link HitCollector} implementations. */
+ bool MultiSearcher::doc(int32_t n, Document* d) {
+ int32_t i = subSearcher(n); // find searcher index
+ return searchables[i]->doc(n - starts[i], d); // dispatch to searcher
+ }
+
+ int32_t MultiSearcher::searcherIndex(int32_t n) const{
+ return subSearcher(n);
+ }
+
+ /** Returns index of the searcher for document <code>n</code> in the array
+ * used to construct this searcher. */
+ int32_t MultiSearcher::subSearcher(int32_t n) const{
+ // replace w/ call to Arrays.binarySearch in Java 1.2
+ int32_t lo = 0; // search starts array
+ int32_t hi = searchablesLen - 1; // for first element less
+ // than n, return its index
+ int32_t mid,midValue;
+ while (hi >= lo) {
+ mid = (lo + hi) >> 1;
+ midValue = starts[mid];
+ if (n < midValue)
+ hi = mid - 1;
+ else if (n > midValue)
+ lo = mid + 1;
+ else{ // found a match
+ while (mid+1 < searchablesLen && starts[mid+1] == midValue) {
+ ++mid; // scan to last match
+ }
+ return mid;
+ }
+ }
+ return hi;
+ }
+
+ /** Returns the document number of document <code>n</code> within its
+ * sub-index. */
+ int32_t MultiSearcher::subDoc(int32_t n) const{
+ return n - starts[subSearcher(n)];
+ }
+
+ int32_t MultiSearcher::maxDoc() const{
+ return _maxDoc;
+ }
+
+ TopDocs* MultiSearcher::_search(Query* query, Filter* filter, const int32_t nDocs) {
+ HitQueue* hq = _CLNEW HitQueue(nDocs);
+ int32_t totalHits = 0;
+ TopDocs* docs;
+ int32_t j;
+ ScoreDoc* scoreDocs;
+ for (int32_t i = 0; i < searchablesLen; i++) { // search each searcher
+ docs = searchables[i]->_search(query, filter, nDocs);
+ totalHits += docs->totalHits; // update totalHits
+ scoreDocs = docs->scoreDocs;
+ for ( j = 0; j <docs->scoreDocsLength; ++j) { // merge scoreDocs int_to hq
+ scoreDocs[j].doc += starts[i]; // convert doc
+ if ( !hq->insert(scoreDocs[j]))
+ break; // no more scores > minScore
+ }
+
+ _CLDELETE(docs);
+ }
+
+ int32_t scoreDocsLen = hq->size();
+ scoreDocs = _CL_NEWARRAY(ScoreDoc, scoreDocsLen);
+ {//MSVC 6 scope fix
+ for (int32_t i = scoreDocsLen-1; i >= 0; --i) // put docs in array
+ scoreDocs[i] = hq->pop();
+ }
+
+ //cleanup
+ _CLDELETE(hq);
+
+ return _CLNEW TopDocs(totalHits, scoreDocs, scoreDocsLen);
+ }
+
+ /** Lower-level search API.
+ *
+ * <p>{@link HitCollector#collect(int32_t,qreal)} is called for every non-zero
+ * scoring document.
+ *
+ * <p>Applications should only use this if they need <i>all</i> of the
+ * matching documents. The high-level search API ({@link
+ * Searcher#search(Query)}) is usually more efficient, as it skips
+ * non-high-scoring hits.
+ *
+ * @param query to match documents
+ * @param filter if non-null, a bitset used to eliminate some documents
+ * @param results to receive hits
+ */
+ void MultiSearcher::_search(Query* query, Filter* filter, HitCollector* results){
+ for (int32_t i = 0; i < searchablesLen; ++i) {
+ /* DSR:CL_BUG: Old implementation leaked and was misconceived. We need
+ ** to have the original HitCollector ($results) collect *all* hits;
+ ** the MultiHitCollector instantiated below serves only to adjust
+ ** (forward by starts[i]) the docNo passed to $results.
+ ** Old implementation instead created a sort of linked list of
+ ** MultiHitCollectors that applied the adjustments in $starts
+ ** cumulatively (and was never deleted). */
+ HitCollector *docNoAdjuster = _CLNEW MultiHitCollector(results, starts[i]);
+ searchables[i]->_search(query, filter, docNoAdjuster);
+ _CLDELETE(docNoAdjuster);
+ }
+ }
+
+ TopFieldDocs* MultiSearcher::_search (Query* query, Filter* filter, const int32_t n, const Sort* sort){
+ FieldDocSortedHitQueue* hq = NULL;
+ int32_t totalHits = 0;
+ TopFieldDocs* docs;
+ int32_t j;
+ FieldDoc** fieldDocs;
+
+ for (int32_t i = 0; i < searchablesLen; ++i) { // search each searcher
+ docs = searchables[i]->_search (query, filter, n, sort);
+ if (hq == NULL){
+ hq = _CLNEW FieldDocSortedHitQueue (docs->fields, n);
+ docs->fields = NULL; //hit queue takes fields memory
+ }
+
+ totalHits += docs->totalHits; // update totalHits
+ fieldDocs = docs->fieldDocs;
+ for(j = 0;j<docs->scoreDocsLength;++j){ // merge scoreDocs into hq
+ fieldDocs[j]->scoreDoc.doc += starts[i]; // convert doc
+ if (!hq->insert (fieldDocs[j]) )
+ break; // no more scores > minScore
+ }
+ for ( int32_t x=0;x<j;++x )
+ fieldDocs[x]=NULL; //move ownership of FieldDoc to the hitqueue
+
+ _CLDELETE(docs);
+ }
+
+ int32_t hqlen = hq->size();
+ fieldDocs = _CL_NEWARRAY(FieldDoc*,hqlen);
+ for (j = hqlen - 1; j >= 0; j--) // put docs in array
+ fieldDocs[j] = hq->pop();
+
+ SortField** hqFields = hq->getFields();
+ hq->setFields(NULL); //move ownership of memory over to TopFieldDocs
+ _CLDELETE(hq);
+
+ return _CLNEW TopFieldDocs (totalHits, fieldDocs, hqlen, hqFields);
+ }
+
+ Query* MultiSearcher::rewrite(Query* original) {
+ Query** queries = _CL_NEWARRAY(Query*,searchablesLen+1);
+ for (int32_t i = 0; i < searchablesLen; ++i)
+ queries[i] = searchables[i]->rewrite(original);
+ queries[searchablesLen]=NULL;
+ return original->combine(queries);
+ }
+
+ void MultiSearcher::explain(Query* query, int32_t doc, Explanation* ret) {
+ int32_t i = subSearcher(doc); // find searcher index
+ searchables[i]->explain(query,doc-starts[i], ret); // dispatch to searcher
+ }
+
+ MultiHitCollector::MultiHitCollector(HitCollector* _results, int32_t _start):
+ results(_results),
+ start(_start) {
+ }
+
+ void MultiHitCollector::collect(const int32_t doc, const qreal score) {
+ results->collect(doc + start, score);
+ }
+
+CL_NS_END