summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp
diff options
context:
space:
mode:
authorQt by Nokia <qt-info@nokia.com>2011-04-27 12:05:43 +0200
committeraxis <qt-info@nokia.com>2011-04-27 12:05:43 +0200
commit50123887ba0f33cf47520bee7c419d68742af2d1 (patch)
tree0eb8679b9e4e4370e59b44bfdcae616816e39aca /3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you want to look at revision history older than this, please refer to the Qt Git wiki for how to use Git history grafting. At the time of writing, this wiki is located here: http://qt.gitorious.org/qt/pages/GitIntroductionWithQt If you have already performed the grafting and you don't see any history beyond this commit, try running "git log" with the "--follow" argument. Branched from the monolithic repo, Qt master branch, at commit 896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp216
1 files changed, 216 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp b/3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp
new file mode 100644
index 000000000..50951e9ba
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/SegmentTermDocs.cpp
@@ -0,0 +1,216 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "SegmentHeader.h"
+
+#include "CLucene/store/IndexInput.h"
+#include "Term.h"
+
+CL_NS_DEF(index)
+
+ SegmentTermDocs::SegmentTermDocs(const SegmentReader* _parent){
+ //Func - Constructor
+ //Pre - Paren != NULL
+ //Post - The instance has been created
+
+ CND_PRECONDITION(_parent != NULL,"Parent is NULL");
+
+ parent = _parent;
+ deletedDocs = parent->deletedDocs;
+
+ _doc = 0;
+ _freq = 0;
+ count = 0;
+ df = 0;
+
+ skipInterval=0;
+ numSkips=0;
+ skipCount=0;
+ skipStream=NULL;
+ skipDoc=0;
+ freqPointer=0;
+ proxPointer=0;
+ skipPointer=0;
+ haveSkipped=false;
+
+ freqStream = parent->freqStream->clone();
+ skipInterval = parent->tis->getSkipInterval();
+ }
+
+ SegmentTermDocs::~SegmentTermDocs() {
+ //Func - Destructor
+ //Pre - true
+ //Post - The instance has been destroyed
+
+ close();
+ }
+
+ TermPositions* SegmentTermDocs::__asTermPositions(){
+ return NULL;
+ }
+
+ void SegmentTermDocs::seek(Term* term) {
+ TermInfo* ti = parent->tis->get(term);
+ seek(ti);
+ _CLDELETE(ti);
+ }
+
+ void SegmentTermDocs::seek(TermEnum* termEnum){
+ TermInfo* ti=NULL;
+
+ // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
+ if ( termEnum->getObjectName() == SegmentTermEnum::getClassName() ){
+ SegmentTermEnum* te = (SegmentTermEnum*)termEnum;
+ te->fieldInfos = parent->fieldInfos;
+ ti = te->getTermInfo();
+ }else{
+ ti = parent->tis->get(termEnum->term(false));
+ }
+
+ seek(ti);
+ _CLDELETE(ti);
+ }
+ void SegmentTermDocs::seek(const TermInfo* ti) {
+ count = 0;
+ if (ti == NULL) {
+ df = 0;
+ } else {
+ df = ti->docFreq;
+ _doc = 0;
+ skipDoc = 0;
+ skipCount = 0;
+ numSkips = df / skipInterval;
+ freqPointer = ti->freqPointer;
+ proxPointer = ti->proxPointer;
+ skipPointer = freqPointer + ti->skipOffset;
+ freqStream->seek(freqPointer);
+ haveSkipped = false;
+ }
+ }
+
+ void SegmentTermDocs::close() {
+
+ //Check if freqStream still exists
+ if (freqStream != NULL){
+ freqStream->close(); //todo: items like these can probably be delete, because deleting the object also closes it...do everywhere
+ _CLDELETE( freqStream );
+ }
+ if (skipStream != NULL){
+ skipStream->close();
+ _CLDELETE( skipStream );
+ }
+ }
+
+ int32_t SegmentTermDocs::doc()const {
+ return _doc;
+ }
+ int32_t SegmentTermDocs::freq()const {
+ return _freq;
+ }
+
+
+bool SegmentTermDocs::next()
+{
+ while (true) {
+ if (count == df)
+ return false;
+
+ uint32_t docCode = freqStream->readVInt();
+ _doc += docCode >> 1; //unsigned shift
+ if ((docCode & 1) != 0) // if low bit is set
+ _freq = 1; // _freq is one
+ else
+ _freq = freqStream->readVInt(); // else read _freq
+ count++;
+
+ if (deletedDocs == NULL || (_doc >= 0 && !deletedDocs->get(_doc)))
+ break;
+ skippingDoc();
+ }
+ return true;
+}
+
+
+int32_t SegmentTermDocs::read(int32_t* docs, int32_t* freqs, int32_t length)
+{
+ int32_t i = 0;
+ // TODO: one optimization would be to get the pointer buffer for ram or mmap
+ // dirs and iterate over them instead of using readByte() intensive functions.
+ while (i < length && count < df) {
+ uint32_t docCode = freqStream->readVInt();
+ _doc += docCode >> 1;
+ if ((docCode & 1) != 0) // if low bit is set
+ _freq = 1; // _freq is one
+ else
+ _freq = freqStream->readVInt(); // else read _freq
+ count++;
+
+ if (deletedDocs == NULL || (_doc >= 0 && !deletedDocs->get(_doc))) {
+ docs[i] = _doc;
+ freqs[i] = _freq;
+ i++;
+ }
+ }
+ return i;
+}
+
+ bool SegmentTermDocs::skipTo(const int32_t target){
+ if (df >= skipInterval) { // optimized case
+ if (skipStream == NULL)
+ skipStream = freqStream->clone(); // lazily clone
+
+ if (!haveSkipped) { // lazily seek skip stream
+ skipStream->seek(skipPointer);
+ haveSkipped = true;
+ }
+
+ // scan skip data
+ int32_t lastSkipDoc = skipDoc;
+ int64_t lastFreqPointer = freqStream->getFilePointer();
+ int64_t lastProxPointer = -1;
+ int32_t numSkipped = -1 - (count % skipInterval);
+
+ while (target > skipDoc) {
+ lastSkipDoc = skipDoc;
+ lastFreqPointer = freqPointer;
+ lastProxPointer = proxPointer;
+
+ if (skipDoc != 0 && skipDoc >= _doc)
+ numSkipped += skipInterval;
+
+ if(skipCount >= numSkips)
+ break;
+
+ skipDoc += skipStream->readVInt();
+ freqPointer += skipStream->readVInt();
+ proxPointer += skipStream->readVInt();
+
+ skipCount++;
+ }
+
+ // if we found something to skip, then skip it
+ if (lastFreqPointer > freqStream->getFilePointer()) {
+ freqStream->seek(lastFreqPointer);
+ skipProx(lastProxPointer);
+
+ _doc = lastSkipDoc;
+ count += numSkipped;
+ }
+
+ }
+
+ // done skipping, now just scan
+
+ do {
+ if (!next())
+ return false;
+ } while (target > _doc);
+ return true;
+ }
+
+
+CL_NS_END