diff options
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/TermInfosWriter.cpp')
-rw-r--r-- | 3rdparty/clucene/src/CLucene/index/TermInfosWriter.cpp | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/TermInfosWriter.cpp b/3rdparty/clucene/src/CLucene/index/TermInfosWriter.cpp new file mode 100644 index 000000000..c5b5340c3 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/index/TermInfosWriter.cpp @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +#include "CLucene/StdHeader.h" +#include "TermInfosWriter.h" + +#include "CLucene/store/Directory.h" +#include "CLucene/util/Misc.h" +#include "FieldInfos.h" +#include "Term.h" +#include "TermInfo.h" +#include "IndexWriter.h" + +CL_NS_USE(util) +CL_NS_USE(store) +CL_NS_DEF(index) + +TermInfosWriter::TermInfosWriter(Directory* directory, const QString& segment, + FieldInfos* fis, int32_t interval) + : fieldInfos(fis) +{ + //Func - Constructor + //Pre - directory contains a valid reference to a Directory + // segment != NULL + // fis contains a valid reference to a reference FieldInfos + //Post - The instance has been created + + CND_PRECONDITION(!segment.isEmpty(), "segment is NULL"); + //Initialize instance + initialise(directory, segment, interval, false); + + other = _CLNEW TermInfosWriter(directory, segment, fieldInfos, interval, true); + + CND_CONDITION(other != NULL, "other is NULL"); + + other->other = this; +} + +TermInfosWriter::TermInfosWriter(Directory* directory, const QString& segment, + FieldInfos* fis, int32_t interval, bool isIndex) + : fieldInfos(fis) +{ + //Func - Constructor + //Pre - directory contains a valid reference to a Directory + // segment != NULL + // fis contains a valid reference to a reference FieldInfos + // isIndex is true or false + //Post - The instance has been created + + CND_PRECONDITION(!segment.isEmpty(), "segment is NULL"); + initialise(directory, segment, interval, isIndex); +} + +void TermInfosWriter::initialise(Directory* directory, const QString& segment, + int32_t interval, bool IsIndex) +{ + //Func - Helps constructors to initialize Instance + //Pre - directory contains a valid reference to a Directory + // segment != NULL + // fis contains a valid reference to a reference FieldInfos + //Post - The instance has been initialized + + lastTerm = _CLNEW Term; + + CND_CONDITION(lastTerm != NULL, "Could not allocate memory for lastTerm"); + + lastTi = _CLNEW TermInfo(); + + CND_CONDITION(lastTi != NULL, "Could not allocate memory for lastTi"); + + lastIndexPointer = 0; + size = 0; + isIndex = IsIndex; + indexInterval = interval; + skipInterval = LUCENE_DEFAULT_TERMDOCS_SKIP_INTERVAL; + + QString buf = Misc::segmentname(segment, QLatin1String(isIndex ? ".tii" : ".tis")); + output = directory->createOutput(buf); + + output->writeInt(FORMAT); // write format + output->writeLong(0); // leave space for size + output->writeInt(indexInterval);// write indexInterval + output->writeInt(skipInterval); // write skipInterval + + //Set other to NULL by Default + other = NULL; +} + +TermInfosWriter::~TermInfosWriter() +{ + //Func - Destructor + //Pre - true + //Post - de instance has been destroyed + + close(); +} + +void TermInfosWriter::add(Term* term, const TermInfo* ti) +{ + //Func - Writes a Term and TermInfo to the outputstream + //Pre - Term must be lexicographically greater than all previous Terms added. + // Pointers of TermInfo ti (freqPointer and proxPointer) must be + // positive and greater than all previous. + + CND_PRECONDITION(isIndex || (!isIndex && term->compareTo(lastTerm) > 0),"term out of order"); + CND_PRECONDITION(ti->freqPointer >= lastTi->freqPointer,"freqPointer out of order"); + CND_PRECONDITION(ti->proxPointer >= lastTi->proxPointer,"proxPointer out of order"); + + if (!isIndex && size % indexInterval == 0) { + //add an index term + other->add(lastTerm, lastTi); + } + + //write term + writeTerm(term); + // write doc freq + output->writeVInt(ti->docFreq); + //write pointers + output->writeVLong(ti->freqPointer - lastTi->freqPointer); + output->writeVLong(ti->proxPointer - lastTi->proxPointer); + if (ti->docFreq >= skipInterval) { + output->writeVInt(ti->skipOffset); + } + + if (isIndex){ + output->writeVLong(other->output->getFilePointer() - lastIndexPointer); + lastIndexPointer = other->output->getFilePointer(); // write pointer + } + + lastTi->set(ti); + size++; +} + +void TermInfosWriter::close() { + //Func - Closes the TermInfosWriter + //Pre - true + //Post - The TermInfosWriter has been closed + + if (output){ + //write size at start + output->seek(4); // write size after format + output->writeLong(size); + output->close(); + _CLDELETE(output); + + if (!isIndex){ + if(other){ + other->close(); + _CLDELETE( other ); + } + } + _CLDECDELETE(lastTerm); + + _CLDELETE(lastTi); + } +} + +void TermInfosWriter::writeTerm(Term* term) +{ + int32_t start = Misc::stringDifference(lastTerm->text(),lastTerm->textLength(), + term->text(),term->textLength()); + int32_t length = term->textLength() - start; + + output->writeVInt(start); // write shared prefix length + output->writeVInt(length); // write delta length + output->writeChars(term->text(), start, length); // write delta chars + + int32_t fieldnum = fieldInfos->fieldNumber(term->field()); + CND_PRECONDITION(fieldnum>=-1&&fieldnum<fieldInfos->size(),"Fieldnum is out of range"); + output->writeVInt(fieldnum); // write field num + + if ( lastTerm->__cl_refcount == 1 ){ + lastTerm->set(term,term->text()); + }else{ + _CLDECDELETE(lastTerm); + lastTerm = _CL_POINTER(term); + } +} + +CL_NS_END |