diff options
author | Qt by Nokia <qt-info@nokia.com> | 2011-04-27 12:05:43 +0200 |
---|---|---|
committer | axis <qt-info@nokia.com> | 2011-04-27 12:05:43 +0200 |
commit | 50123887ba0f33cf47520bee7c419d68742af2d1 (patch) | |
tree | 0eb8679b9e4e4370e59b44bfdcae616816e39aca /3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp |
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you
want to look at revision history older than this, please refer to the
Qt Git wiki for how to use Git history grafting. At the time of
writing, this wiki is located here:
http://qt.gitorious.org/qt/pages/GitIntroductionWithQt
If you have already performed the grafting and you don't see any
history beyond this commit, try running "git log" with the "--follow"
argument.
Branched from the monolithic repo, Qt master branch, at commit
896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp')
-rw-r--r-- | 3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp b/3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp new file mode 100644 index 000000000..50951e9ba --- /dev/null +++ b/3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp @@ -0,0 +1,216 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "SegmentHeader.h" + +#include "CLucene/store/IndexInput.h" +#include "Term.h" + +CL_NS_DEF(index) + + SegmentTermDocs::SegmentTermDocs(const SegmentReader* _parent){ + //Func - Constructor + //Pre - Paren != NULL + //Post - The instance has been created + + CND_PRECONDITION(_parent != NULL,"Parent is NULL"); + + parent = _parent; + deletedDocs = parent->deletedDocs; + + _doc = 0; + _freq = 0; + count = 0; + df = 0; + + skipInterval=0; + numSkips=0; + skipCount=0; + skipStream=NULL; + skipDoc=0; + freqPointer=0; + proxPointer=0; + skipPointer=0; + haveSkipped=false; + + freqStream = parent->freqStream->clone(); + skipInterval = parent->tis->getSkipInterval(); + } + + SegmentTermDocs::~SegmentTermDocs() { + //Func - Destructor + //Pre - true + //Post - The instance has been destroyed + + close(); + } + + TermPositions* SegmentTermDocs::__asTermPositions(){ + return NULL; + } + + void SegmentTermDocs::seek(Term* term) { + TermInfo* ti = parent->tis->get(term); + seek(ti); + _CLDELETE(ti); + } + + void SegmentTermDocs::seek(TermEnum* termEnum){ + TermInfo* ti=NULL; + + // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs + if ( termEnum->getObjectName() == SegmentTermEnum::getClassName() ){ + SegmentTermEnum* te = (SegmentTermEnum*)termEnum; + te->fieldInfos = parent->fieldInfos; + ti = te->getTermInfo(); + }else{ + ti = parent->tis->get(termEnum->term(false)); + } + + seek(ti); + _CLDELETE(ti); + } + void SegmentTermDocs::seek(const TermInfo* ti) { + count = 0; + if (ti == NULL) { + df = 0; + } else { + df = ti->docFreq; + _doc = 0; + skipDoc = 0; + skipCount = 0; + numSkips = df / skipInterval; + freqPointer = ti->freqPointer; + proxPointer = ti->proxPointer; + skipPointer = freqPointer + ti->skipOffset; + freqStream->seek(freqPointer); + haveSkipped = false; + } + } + + void SegmentTermDocs::close() { + + //Check if freqStream still exists + if (freqStream != NULL){ + freqStream->close(); //todo: items like these can probably be delete, because deleting the object also closes it...do everywhere + _CLDELETE( freqStream ); + } + if (skipStream != NULL){ + skipStream->close(); + _CLDELETE( skipStream ); + } + } + + int32_t SegmentTermDocs::doc()const { + return _doc; + } + int32_t SegmentTermDocs::freq()const { + return _freq; + } + + +bool SegmentTermDocs::next() +{ + while (true) { + if (count == df) + return false; + + uint32_t docCode = freqStream->readVInt(); + _doc += docCode >> 1; //unsigned shift + if ((docCode & 1) != 0) // if low bit is set + _freq = 1; // _freq is one + else + _freq = freqStream->readVInt(); // else read _freq + count++; + + if (deletedDocs == NULL || (_doc >= 0 && !deletedDocs->get(_doc))) + break; + skippingDoc(); + } + return true; +} + + +int32_t SegmentTermDocs::read(int32_t* docs, int32_t* freqs, int32_t length) +{ + int32_t i = 0; + // TODO: one optimization would be to get the pointer buffer for ram or mmap + // dirs and iterate over them instead of using readByte() intensive functions. + while (i < length && count < df) { + uint32_t docCode = freqStream->readVInt(); + _doc += docCode >> 1; + if ((docCode & 1) != 0) // if low bit is set + _freq = 1; // _freq is one + else + _freq = freqStream->readVInt(); // else read _freq + count++; + + if (deletedDocs == NULL || (_doc >= 0 && !deletedDocs->get(_doc))) { + docs[i] = _doc; + freqs[i] = _freq; + i++; + } + } + return i; +} + + bool SegmentTermDocs::skipTo(const int32_t target){ + if (df >= skipInterval) { // optimized case + if (skipStream == NULL) + skipStream = freqStream->clone(); // lazily clone + + if (!haveSkipped) { // lazily seek skip stream + skipStream->seek(skipPointer); + haveSkipped = true; + } + + // scan skip data + int32_t lastSkipDoc = skipDoc; + int64_t lastFreqPointer = freqStream->getFilePointer(); + int64_t lastProxPointer = -1; + int32_t numSkipped = -1 - (count % skipInterval); + + while (target > skipDoc) { + lastSkipDoc = skipDoc; + lastFreqPointer = freqPointer; + lastProxPointer = proxPointer; + + if (skipDoc != 0 && skipDoc >= _doc) + numSkipped += skipInterval; + + if(skipCount >= numSkips) + break; + + skipDoc += skipStream->readVInt(); + freqPointer += skipStream->readVInt(); + proxPointer += skipStream->readVInt(); + + skipCount++; + } + + // if we found something to skip, then skip it + if (lastFreqPointer > freqStream->getFilePointer()) { + freqStream->seek(lastFreqPointer); + skipProx(lastProxPointer); + + _doc = lastSkipDoc; + count += numSkipped; + } + + } + + // done skipping, now just scan + + do { + if (!next()) + return false; + } while (target > _doc); + return true; + } + + +CL_NS_END |