diff options
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/SloppyPhraseScorer.cpp')
-rw-r--r-- | 3rdparty/clucene/src/CLucene/search/SloppyPhraseScorer.cpp | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/SloppyPhraseScorer.cpp b/3rdparty/clucene/src/CLucene/search/SloppyPhraseScorer.cpp new file mode 100644 index 000000000..b7683b018 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/search/SloppyPhraseScorer.cpp @@ -0,0 +1,106 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "SloppyPhraseScorer.h" + +#include "PhraseScorer.h" +#include "CLucene/index/Terms.h" + +CL_NS_USE(index) +CL_NS_DEF(search) + + SloppyPhraseScorer::SloppyPhraseScorer(Weight* weight, CL_NS(index)::TermPositions** tps, + int32_t* positions, Similarity* similarity, + int32_t slop, uint8_t* norms): + PhraseScorer(weight,tps,positions,similarity,norms){ + //Func - Constructor + //Pre - tps != NULL + // tpsLength >= 0 + // n != NULL + //Post - Instance has been created + + CND_PRECONDITION(tps != NULL, "tps is NULL"); + //CND_PRECONDITION(n != NULL, _T("n is NULL")) = checked in PhraseScorer; + + this->slop = slop; + } + + qreal SloppyPhraseScorer::phraseFreq() { + //Func - Returns the freqency of the phrase + //Pre - first != NULL + // last != NULL + // pq != NULL + //Post - The frequency of the phrase has been returned + + CND_PRECONDITION(first != NULL,"first is NULL"); + CND_PRECONDITION(last != NULL,"last is NULL"); + CND_PRECONDITION(pq != NULL,"pq is NULL"); + + //Clear the PhraseQueue pq; + pq->clear(); + + int32_t end = 0; + + //declare iterator + PhrasePositions* pp = NULL; + + // build pq from list + + //Sort the list of PhrasePositions using pq + for (pp = first; pp != NULL; pp = pp->_next) { + //Read the first TermPosition of the current PhrasePositions pp + pp->firstPosition(); + //Check if the position of the pp is bigger than end + if (pp->position > end){ + end = pp->position; + } + //Store the current PhrasePositions pp into the PhraseQueue pp + pq->put(pp); + } + + qreal freq = 0.0f; + + bool done = false; + + do { + //Pop a PhrasePositions pp from the PhraseQueue pp + pp = pq->pop(); + //Get start position + int32_t start = pp->position; + //Get next position + int32_t next = pq->top()->position; + + for (int32_t pos = start; pos <= next; pos = pp->position) { + //advance pp to min window + start = pos; + + if (!pp->nextPosition()) { + //ran out of a term -- done + done = true; + break; + } + } + + //Calculate matchLength + int32_t matchLength = end - start; + //Check if matchLength is smaller than slop + if (matchLength <= slop){ + // penalize longer matches + freq += 1.0 / (matchLength + 1); + } + + if (pp->position > end){ + end = pp->position; + } + + //restore pq + pq->put(pp); + }while (!done); + + return freq; + } +CL_NS_END |