diff options
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/PhraseScorer.cpp')
-rw-r--r-- | 3rdparty/clucene/src/CLucene/search/PhraseScorer.cpp | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/PhraseScorer.cpp b/3rdparty/clucene/src/CLucene/search/PhraseScorer.cpp new file mode 100644 index 000000000..b2da2316a --- /dev/null +++ b/3rdparty/clucene/src/CLucene/search/PhraseScorer.cpp @@ -0,0 +1,225 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "PhraseScorer.h" + +#include "PhraseQueue.h" +#include "PhrasePositions.h" +#include "Scorer.h" +#include "Similarity.h" + +CL_NS_USE(index) +CL_NS_USE(util) +CL_NS_DEF(search) + + + PhraseScorer::PhraseScorer(Weight* weight, TermPositions** tps, + int32_t* positions, Similarity* similarity, uint8_t* norms): + Scorer(similarity) + { + //Func - Constructor + //Pre - tps != NULL and is an array of TermPositions + // tpsLength >= 0 + // n != NULL + //Post - The instance has been created + + CND_PRECONDITION(tps != NULL,"tps is NULL"); + + //norms are only used if phraseFreq returns more than 0.0 + //phraseFreq should only return more than 0.0 if norms != NULL + //CND_PRECONDITION(n != NULL,"n is NULL"); + + firstTime = true; + more = true; + this->norms = norms; + this->weight = weight; + this->value = weight->getValue(); + + //reset internal pointers + first = NULL; + last = NULL; + + //use pq to build a sorted list of PhrasePositions + int32_t i = 0; + while(tps[i] != NULL){ + PhrasePositions *pp = _CLNEW PhrasePositions(tps[i], positions[i]); + CND_CONDITION(pp != NULL,"Could not allocate memory for pp"); + + //Store PhrasePos into the PhrasePos pq + if (last != NULL) { // add next to end of list + last->_next = pp; + } else + first = pp; + last = pp; + + i++; + } + + pq = _CLNEW PhraseQueue(i); //i==tps.length + CND_CONDITION(pq != NULL,"Could not allocate memory for pq"); + } + + PhraseScorer::~PhraseScorer() { + //Func - Destructor + //Pre - true + //Post - The instance has been destroyed + + //The PhraseQueue pq (which is a PriorityQueue) pq is actually empty at present, the elements + //having been transferred by pqToList() to the linked list starting with + //first. The nodes of that linked list are deleted by the destructor of + //first, rather than the destructor of pq. + _CLDELETE(first); + _CLDELETE(pq); + } + + bool PhraseScorer::next(){ + if (firstTime) { + init(); + firstTime = false; + } else if (more) { + more = last->next(); // trigger further scanning + } + return doNext(); + } + + // next without initial increment + bool PhraseScorer::doNext() { + while (more) { + while (more && first->doc < last->doc) { // find doc w/ all the terms + more = first->skipTo(last->doc); // skip first upto last + firstToLast(); // and move it to the end + } + + if (more) { + // found a doc with all of the terms + freq = phraseFreq(); // check for phrase + if (freq == 0.0f) // no match + more = last->next(); // trigger further scanning + else + return true; // found a match + } + } + return false; // no more matches + } + + qreal PhraseScorer::score(){ + //System.out.println("scoring " + first.doc); + qreal raw = getSimilarity()->tf(freq) * value; // raw score + return raw * Similarity::decodeNorm(norms[first->doc]); // normalize + } + + bool PhraseScorer::skipTo(int32_t target) { + for (PhrasePositions* pp = first; more && pp != NULL; pp = pp->_next) { + more = pp->skipTo(target); + } + if (more) + sort(); // re-sort + return doNext(); + } + + void PhraseScorer::init() { + for (PhrasePositions* pp = first; more && pp != NULL; pp = pp->_next) + more = pp->next(); + if(more) + sort(); + } + + void PhraseScorer::sort() { + pq->clear(); + for (PhrasePositions* pp = first; pp != NULL; pp = pp->_next) + pq->put(pp); + pqToList(); + } + + + + void PhraseScorer::pqToList(){ + //Func - Transfers the PhrasePositions from the PhraseQueue pq to + // the PhrasePositions list with first as its first element + //Pre - pq != NULL + // first = NULL + // last = NULL + //Post - All PhrasePositions have been transfered to the list + // of PhrasePositions of which the first element is pointed to by first + // and the last element is pointed to by last + + CND_PRECONDITION(pq != NULL,"pq is NULL"); + + last = first = NULL; + + PhrasePositions* PhrasePos = NULL; + + //As long pq is not empty + while (pq->top() != NULL){ + //Pop a PhrasePositions instance + PhrasePos = pq->pop(); + + // add next to end of list + if (last != NULL) { + last->_next = PhrasePos; + } else { + first = PhrasePos; + } + + //Let last point to the new last PhrasePositions instance just added + last = PhrasePos; + //Reset the next of last to NULL + last->_next = NULL; + } + + //Check to see that pq is empty now + CND_CONDITION(pq->size()==0, "pq is not empty while it should be"); + } + + void PhraseScorer::firstToLast(){ + //Func - Moves first to the end of the list + //Pre - first is NULL or points to an PhrasePositions Instance + // last is NULL or points to an PhrasePositions Instance + // first and last both are NULL or both are not NULL + //Post - The first element has become the last element in the list + + CND_PRECONDITION(((first==NULL && last==NULL) ||(first !=NULL && last != NULL)), + "Either first or last is NULL but not both"); + + //Check if first and last are valid pointers + if(first && last){ + last->_next = first; + last = first; + first = first->_next; + last->_next = NULL; + } + } + + + void PhraseScorer::explain(int32_t _doc, Explanation* tfExplanation) { + while (next() && doc() < _doc){ + } + + qreal phraseFreq = (doc() == _doc) ? freq : 0.0f; + tfExplanation->setValue(getSimilarity()->tf(phraseFreq)); + + StringBuffer buf; + buf.append(_T("tf(phraseFreq=")); + buf.appendFloat(phraseFreq,2); + buf.append(_T(")")); + tfExplanation->setDescription(buf.getBuffer()); + } + + TCHAR* PhraseScorer::toString() { + StringBuffer buf; + buf.append(_T("scorer(")); + + TCHAR* tmp = weight->toString(); + buf.append(tmp); + _CLDELETE_CARRAY(tmp); + + buf.append(_T(")")); + + return buf.toString(); + } + +CL_NS_END |