summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/search/Similarity.cpp
diff options
context:
space:
mode:
authorQt by Nokia <qt-info@nokia.com>2011-04-27 12:05:43 +0200
committeraxis <qt-info@nokia.com>2011-04-27 12:05:43 +0200
commit50123887ba0f33cf47520bee7c419d68742af2d1 (patch)
tree0eb8679b9e4e4370e59b44bfdcae616816e39aca /3rdparty/clucene/src/CLucene/search/Similarity.cpp
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you want to look at revision history older than this, please refer to the Qt Git wiki for how to use Git history grafting. At the time of writing, this wiki is located here: http://qt.gitorious.org/qt/pages/GitIntroductionWithQt If you have already performed the grafting and you don't see any history beyond this commit, try running "git log" with the "--follow" argument. Branched from the monolithic repo, Qt master branch, at commit 896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/Similarity.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/search/Similarity.cpp233
1 files changed, 233 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/Similarity.cpp b/3rdparty/clucene/src/CLucene/search/Similarity.cpp
new file mode 100644
index 000000000..d33a036e4
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/search/Similarity.cpp
@@ -0,0 +1,233 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "Similarity.h"
+
+#include "CLucene/index/Term.h"
+#include "SearchHeader.h"
+
+CL_NS_USE(index)
+CL_NS_DEF(search)
+
+#ifdef _CL_HAVE_NO_FLOAT_BYTE
+ #if defined(_LUCENE_PRAGMA_WARNINGS)
+ #pragma message ("==================Using fallback float<->byte encodings!!!==================")
+ #else
+ #warning "==================Using fallback float<->byte encodings!!!=================="
+ #endif
+
+ //if the autoconf figured out that we can't do the conversions properly, then
+ //we fall back on the old, inaccurate way of doing things.
+ qreal NORM_TABLE[] = {
+ 0.0,5.820766E-10,6.9849193E-10,8.1490725E-10,9.313226E-10,1.1641532E-9,1.3969839E-9,
+ 1.6298145E-9,1.8626451E-9,2.3283064E-9,2.7939677E-9,3.259629E-9,3.7252903E-9,
+ 4.656613E-9,5.5879354E-9,6.519258E-9,7.4505806E-9,9.313226E-9,1.1175871E-8,1.3038516E-8,
+ 1.4901161E-8,1.8626451E-8,2.2351742E-8,2.6077032E-8,2.9802322E-8,3.7252903E-8,4.4703484E-8,
+ 5.2154064E-8,5.9604645E-8,7.4505806E-8,8.940697E-8,1.0430813E-7,1.1920929E-7,1.4901161E-7,
+ 1.7881393E-7,2.0861626E-7,2.3841858E-7,2.9802322E-7,3.5762787E-7,4.172325E-7,4.7683716E-7,
+ 5.9604645E-7,7.1525574E-7,8.34465E-7,9.536743E-7,1.1920929E-6,1.4305115E-6,1.66893E-6,
+ 1.9073486E-6,2.3841858E-6,2.861023E-6,3.33786E-6,3.8146973E-6,4.7683716E-6,5.722046E-6,
+ 6.67572E-6,7.6293945E-6,9.536743E-6,1.1444092E-5,1.335144E-5,1.5258789E-5,1.9073486E-5,
+ 2.2888184E-5,2.670288E-5,3.0517578E-5,3.8146973E-5,4.5776367E-5,5.340576E-5,6.1035156E-5,
+ 7.6293945E-5,9.1552734E-5,1.0681152E-4,1.2207031E-4,1.5258789E-4,1.8310547E-4,2.1362305E-4,
+ 2.4414062E-4,3.0517578E-4,3.6621094E-4,4.272461E-4,4.8828125E-4,6.1035156E-4,7.324219E-4,
+ 8.544922E-4,9.765625E-4,0.0012207031,0.0014648438,0.0017089844,0.001953125,0.0024414062,
+ 0.0029296875,0.0034179688,0.00390625,0.0048828125,0.005859375,0.0068359375,
+ 0.0078125,0.009765625,0.01171875,0.013671875,0.015625,0.01953125,0.0234375,
+ 0.02734375,0.03125,0.0390625,0.046875,0.0546875,0.0625,0.078125,0.09375,0.109375,
+ 0.125,0.15625,0.1875,0.21875,0.25,0.3125,0.375,0.4375,0.5,0.625,0.75,
+ 0.875,1.0,1.25,1.5,1.75,2,2.5,3,3.5,4.0,5.0,6.0,7.0,8.0,10.0,12.0,14.0,16.0,20.0,24.0,28.0,32.0,40.0,48.0,56.0,
+ 64.0,80.0,96.0,112.0,128.0,160.0,192.0,224.0,256.0,320.0,384.0,448.0,512.0,640.0,768.0,896.0,1024.0,1280.0,1536.0,1792.0,
+ 2048.0,2560.0,3072.0,3584.0,4096.0,5120.0,6144.0,7168.0,8192.0,10240.0,12288.0,14336.0,16384.0,20480.0,24576.0,
+ 28672.0,32768.0,40960.0,49152.0,57344.0,65536.0,81920.0,98304.0,114688.0,131072.0,163840.0,196608.0,
+ 229376.0,262144.0,327680.0,393216.0,458752.0,524288.0,655360.0,786432.0,917504.0,1048576.0,1310720.0,
+ 1572864.0,1835008.0,2097152.0,2621440.0,3145728.0,3670016.0,4194304.0,5242880.0,6291456.0,7340032.0,
+ 8388608.0,10485760.0,12582912.0,14680064.0,16777216.0,20971520.0,25165824.0,29360128.0,33554432.0,
+ 41943040.0,50331648.0,58720256.0,67108864.0,83886080.0,100663296.0,117440512.0,134217728.0,
+ 167772160.0,201326592.0,234881024.0,268435456.0,335544320.0,402653184.0,469762048.0,536870912.0,
+ 671088640.0,805306368.0,939524096.0,1073741824.0,1342177280.0,1610612736.0,1879048192.0,
+ 2147483648.0,2684354560.0,3221225472.0,3758096384.0,4294967296.0,5368709120.0,6442450944.0,7516192768.0
+ };
+
+ qreal Similarity::byteToFloat(uint8_t b) {
+ return NORM_TABLE[b];
+ }
+
+ uint8_t Similarity::floatToByte(qreal f) {
+ return Similarity::encodeNorm(f);
+ }
+
+#else
+
+ /** Cache of decoded bytes. */
+ qreal NORM_TABLE[256];
+ bool NORM_TABLE_initd=false;
+
+ //float to bits conversion utilities...
+ union clvalue {
+ int32_t i;
+ float f; //must use a float type, else types dont match up
+ };
+
+ int32_t floatToIntBits(qreal value)
+ {
+ clvalue u;
+ int32_t e, f;
+ u.f = (float)value;
+ e = u.i & 0x7f800000;
+ f = u.i & 0x007fffff;
+
+ if (e == 0x7f800000 && f != 0)
+ u.i = 0x7fc00000;
+
+ return u.i;
+ }
+
+ qreal intBitsToFloat(int32_t bits)
+ {
+ clvalue u;
+ u.i = bits;
+ return u.f;
+ }
+
+
+ qreal Similarity::byteToFloat(uint8_t b) {
+ if (b == 0) // zero is a special case
+ return 0.0f;
+ int32_t mantissa = b & 7;
+ int32_t exponent = (b >> 3) & 31;
+ int32_t bits = ((exponent+(63-15)) << 24) | (mantissa << 21);
+ return intBitsToFloat(bits);
+ }
+
+ uint8_t Similarity::floatToByte(qreal f) {
+ if (f < 0.0f) // round negatives up to zero
+ f = 0.0f;
+
+ if (f == 0.0f) // zero is a special case
+ return 0;
+
+ int32_t bits = floatToIntBits(f); // parse qreal into parts
+ int32_t mantissa = (bits & 0xffffff) >> 21;
+ int32_t exponent = (((bits >> 24) & 0x7f) - 63) + 15;
+
+ if (exponent > 31) { // overflow: use max value
+ exponent = 31;
+ mantissa = 7;
+ }
+
+ if (exponent < 0) { // underflow: use min value
+ exponent = 0;
+ mantissa = 1;
+ }
+
+ return (uint8_t)((exponent << 3) | mantissa); // pack into a uint8_t
+ }
+#endif
+
+ /** The Similarity implementation used by default. */
+ Similarity* _defaultImpl=NULL;
+
+ void Similarity::setDefault(Similarity* similarity) {
+ _defaultImpl = similarity;
+ }
+
+ Similarity* Similarity::getDefault() {
+ if ( _defaultImpl == NULL ){
+ _defaultImpl = _CLNEW DefaultSimilarity();
+ }
+ return _defaultImpl;
+ }
+
+ qreal Similarity::decodeNorm(uint8_t b) {
+#ifndef _CL_HAVE_NO_FLOAT_BYTE
+ if ( !NORM_TABLE_initd ){
+ for (int i = 0; i < 256; i++)
+ NORM_TABLE[i] = byteToFloat(i);
+ NORM_TABLE_initd=true;
+ }
+#endif
+ return NORM_TABLE[b];
+ }
+
+ uint8_t Similarity::encodeNorm(qreal f) {
+#ifdef _CL_HAVE_NO_FLOAT_BYTE
+ int32_t i=0;
+ if ( f <= 0 )
+ return 0;
+
+ while ( i<256 && f > NORM_TABLE[i] ){
+ i++;
+ }
+ if ( i == 0 )
+ return 0;
+ else if ( i == 255 && f>NORM_TABLE[255] )
+ return 255;
+ else
+ return i;
+#else
+ return floatToByte(f);
+#endif
+ }
+
+
+ qreal Similarity::idf(Term* term, Searcher* searcher) {
+ return idf(searcher->docFreq(term), searcher->maxDoc());
+ }
+
+
+ qreal Similarity::idf(CL_NS(util)::CLVector<Term*>* terms, Searcher* searcher) {
+ qreal _idf = 0.0f;
+ for (CL_NS(util)::CLVector<Term*>::iterator i = terms->begin(); i != terms->end(); i++ ) {
+ _idf += idf((Term*)*i, searcher);
+ }
+ return _idf;
+ }
+
+ Similarity::~Similarity(){
+ }
+
+
+
+
+ DefaultSimilarity::DefaultSimilarity(){
+ }
+ DefaultSimilarity::~DefaultSimilarity(){
+ }
+
+ qreal DefaultSimilarity::lengthNorm(const TCHAR* fieldName, int32_t numTerms) {
+ if ( numTerms == 0 ) //prevent div by zero
+ return 0;
+ qreal ret = (qreal)(1.0 / sqrt((qreal)numTerms));
+ return ret;
+ }
+
+ qreal DefaultSimilarity::queryNorm(qreal sumOfSquaredWeights) {
+ if ( sumOfSquaredWeights == 0 ) //prevent div by zero
+ return 0.0f;
+ qreal ret = (qreal)(1.0 / sqrt(sumOfSquaredWeights));
+ return ret;
+ }
+
+ qreal DefaultSimilarity::tf(qreal freq) {
+ return sqrt(freq);
+ }
+
+ qreal DefaultSimilarity::sloppyFreq(int32_t distance) {
+ return 1.0f / (distance + 1);
+ }
+
+ qreal DefaultSimilarity::idf(int32_t docFreq, int32_t numDocs) {
+ return (qreal)(log(numDocs/(qreal)(docFreq+1)) + 1.0);
+ }
+
+ qreal DefaultSimilarity::coord(int32_t overlap, int32_t maxOverlap) {
+ if ( maxOverlap == 0 )
+ return 0.0f;
+ return overlap / (qreal)maxOverlap;
+ }
+CL_NS_END