diff options
author | Qt by Nokia <qt-info@nokia.com> | 2011-04-27 12:05:43 +0200 |
---|---|---|
committer | axis <qt-info@nokia.com> | 2011-04-27 12:05:43 +0200 |
commit | 50123887ba0f33cf47520bee7c419d68742af2d1 (patch) | |
tree | 0eb8679b9e4e4370e59b44bfdcae616816e39aca /3rdparty/clucene/src/CLucene/search/TermScorer.cpp |
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you
want to look at revision history older than this, please refer to the
Qt Git wiki for how to use Git history grafting. At the time of
writing, this wiki is located here:
http://qt.gitorious.org/qt/pages/GitIntroductionWithQt
If you have already performed the grafting and you don't see any
history beyond this commit, try running "git log" with the "--follow"
argument.
Branched from the monolithic repo, Qt master branch, at commit
896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/TermScorer.cpp')
-rw-r--r-- | 3rdparty/clucene/src/CLucene/search/TermScorer.cpp | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/TermScorer.cpp b/3rdparty/clucene/src/CLucene/search/TermScorer.cpp new file mode 100644 index 000000000..ddd7f74ed --- /dev/null +++ b/3rdparty/clucene/src/CLucene/search/TermScorer.cpp @@ -0,0 +1,120 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "TermScorer.h" + +#include "CLucene/index/Terms.h" +#include "TermQuery.h" + +CL_NS_USE(index) +CL_NS_DEF(search) + + //TermScorer takes TermDocs and delets it when TermScorer is cleaned up + TermScorer::TermScorer(Weight* w, CL_NS(index)::TermDocs* td, + Similarity* similarity,uint8_t* _norms): + Scorer(similarity), + termDocs(td), + norms(_norms), + weight(w), + weightValue(w->getValue()), + _doc(0), + pointer(0), + pointerMax(0) + { + memset(docs,0,32*sizeof(int32_t)); + memset(freqs,0,32*sizeof(int32_t)); + + for (int32_t i = 0; i < LUCENE_SCORE_CACHE_SIZE; i++) + scoreCache[i] = getSimilarity()->tf(i) * weightValue; + } + + TermScorer::~TermScorer(){ + _CLDELETE(termDocs); + } + bool TermScorer::next(){ + pointer++; + if (pointer >= pointerMax) { + pointerMax = termDocs->read(docs, freqs, 32); // refill buffer + if (pointerMax != 0) { + pointer = 0; + } else { + termDocs->close(); // close stream + _doc = LUCENE_INT32_MAX_SHOULDBE; // set to sentinel value + return false; + } + } + _doc = docs[pointer]; + return true; + } + + bool TermScorer::skipTo(int32_t target) { + // first scan in cache + for (pointer++; pointer < pointerMax; pointer++) { + if (docs[pointer] >= target) { + _doc = docs[pointer]; + return true; + } + } + + // not found in cache, seek underlying stream + bool result = termDocs->skipTo(target); + if (result) { + pointerMax = 1; + pointer = 0; + docs[pointer] = _doc = termDocs->doc(); + freqs[pointer] = termDocs->freq(); + } else { + _doc = LUCENE_INT32_MAX_SHOULDBE; + } + return result; + } + + void TermScorer::explain(int32_t doc, Explanation* tfExplanation) { + TermQuery* query = (TermQuery*)weight->getQuery(); + int32_t tf = 0; + while (pointer < pointerMax) { + if (docs[pointer] == doc) + tf = freqs[pointer]; + pointer++; + } + if (tf == 0) { + while (termDocs->next()) { + if (termDocs->doc() == doc) { + tf = termDocs->freq(); + } + } + } + termDocs->close(); + tfExplanation->setValue(getSimilarity()->tf(tf)); + + TCHAR buf[LUCENE_SEARCH_EXPLANATION_DESC_LEN+1]; + TCHAR* termToString = query->getTerm(false)->toString(); + _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,_T("tf(termFreq(%s)=%d)"), termToString, tf); + _CLDELETE_CARRAY(termToString); + tfExplanation->setDescription(buf); + } + + TCHAR* TermScorer::toString() { + TCHAR* wb = weight->toString(); + int32_t rl = _tcslen(wb) + 9; //9=_tcslen("scorer(" ")") + 1 + TCHAR* ret = _CL_NEWARRAY(TCHAR,rl); + _sntprintf(ret,rl,_T("scorer(%s)"), wb); + _CLDELETE_ARRAY(wb); + return ret; + } + + qreal TermScorer::score(){ + int32_t f = freqs[pointer]; + qreal raw = // compute tf(f)*weight + f < LUCENE_SCORE_CACHE_SIZE // check cache + ? scoreCache[f] // cache hit + : getSimilarity()->tf(f) * weightValue; // cache miss + + return raw * Similarity::decodeNorm(norms[_doc]); // normalize for field + } + +CL_NS_END |