summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/search/TermScorer.cpp
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/TermScorer.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/search/TermScorer.cpp120
1 files changed, 120 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/TermScorer.cpp b/3rdparty/clucene/src/CLucene/search/TermScorer.cpp
new file mode 100644
index 000000000..ddd7f74ed
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/search/TermScorer.cpp
@@ -0,0 +1,120 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "TermScorer.h"
+
+#include "CLucene/index/Terms.h"
+#include "TermQuery.h"
+
+CL_NS_USE(index)
+CL_NS_DEF(search)
+
+ //TermScorer takes TermDocs and delets it when TermScorer is cleaned up
+ TermScorer::TermScorer(Weight* w, CL_NS(index)::TermDocs* td,
+ Similarity* similarity,uint8_t* _norms):
+ Scorer(similarity),
+ termDocs(td),
+ norms(_norms),
+ weight(w),
+ weightValue(w->getValue()),
+ _doc(0),
+ pointer(0),
+ pointerMax(0)
+ {
+ memset(docs,0,32*sizeof(int32_t));
+ memset(freqs,0,32*sizeof(int32_t));
+
+ for (int32_t i = 0; i < LUCENE_SCORE_CACHE_SIZE; i++)
+ scoreCache[i] = getSimilarity()->tf(i) * weightValue;
+ }
+
+ TermScorer::~TermScorer(){
+ _CLDELETE(termDocs);
+ }
+ bool TermScorer::next(){
+ pointer++;
+ if (pointer >= pointerMax) {
+ pointerMax = termDocs->read(docs, freqs, 32); // refill buffer
+ if (pointerMax != 0) {
+ pointer = 0;
+ } else {
+ termDocs->close(); // close stream
+ _doc = LUCENE_INT32_MAX_SHOULDBE; // set to sentinel value
+ return false;
+ }
+ }
+ _doc = docs[pointer];
+ return true;
+ }
+
+ bool TermScorer::skipTo(int32_t target) {
+ // first scan in cache
+ for (pointer++; pointer < pointerMax; pointer++) {
+ if (docs[pointer] >= target) {
+ _doc = docs[pointer];
+ return true;
+ }
+ }
+
+ // not found in cache, seek underlying stream
+ bool result = termDocs->skipTo(target);
+ if (result) {
+ pointerMax = 1;
+ pointer = 0;
+ docs[pointer] = _doc = termDocs->doc();
+ freqs[pointer] = termDocs->freq();
+ } else {
+ _doc = LUCENE_INT32_MAX_SHOULDBE;
+ }
+ return result;
+ }
+
+ void TermScorer::explain(int32_t doc, Explanation* tfExplanation) {
+ TermQuery* query = (TermQuery*)weight->getQuery();
+ int32_t tf = 0;
+ while (pointer < pointerMax) {
+ if (docs[pointer] == doc)
+ tf = freqs[pointer];
+ pointer++;
+ }
+ if (tf == 0) {
+ while (termDocs->next()) {
+ if (termDocs->doc() == doc) {
+ tf = termDocs->freq();
+ }
+ }
+ }
+ termDocs->close();
+ tfExplanation->setValue(getSimilarity()->tf(tf));
+
+ TCHAR buf[LUCENE_SEARCH_EXPLANATION_DESC_LEN+1];
+ TCHAR* termToString = query->getTerm(false)->toString();
+ _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,_T("tf(termFreq(%s)=%d)"), termToString, tf);
+ _CLDELETE_CARRAY(termToString);
+ tfExplanation->setDescription(buf);
+ }
+
+ TCHAR* TermScorer::toString() {
+ TCHAR* wb = weight->toString();
+ int32_t rl = _tcslen(wb) + 9; //9=_tcslen("scorer(" ")") + 1
+ TCHAR* ret = _CL_NEWARRAY(TCHAR,rl);
+ _sntprintf(ret,rl,_T("scorer(%s)"), wb);
+ _CLDELETE_ARRAY(wb);
+ return ret;
+ }
+
+ qreal TermScorer::score(){
+ int32_t f = freqs[pointer];
+ qreal raw = // compute tf(f)*weight
+ f < LUCENE_SCORE_CACHE_SIZE // check cache
+ ? scoreCache[f] // cache hit
+ : getSimilarity()->tf(f) * weightValue; // cache miss
+
+ return raw * Similarity::decodeNorm(norms[_doc]); // normalize for field
+ }
+
+CL_NS_END