summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/index/SegmentReader.cpp
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/SegmentReader.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/index/SegmentReader.cpp816
1 files changed, 816 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/SegmentReader.cpp b/3rdparty/clucene/src/CLucene/index/SegmentReader.cpp
new file mode 100644
index 000000000..ba061714b
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/SegmentReader.cpp
@@ -0,0 +1,816 @@
+/*
+ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+ *
+ * Distributable under the terms of either the Apache License (Version 2.0) or
+ * the GNU Lesser General Public License, as specified in the COPYING file.
+ *
+ * Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+*/
+#include "CLucene/StdHeader.h"
+#include "SegmentHeader.h"
+
+#include "FieldInfos.h"
+#include "FieldsReader.h"
+#include "IndexReader.h"
+#include "TermInfosReader.h"
+#include "Terms.h"
+#include "CLucene/search/Similarity.h"
+
+CL_NS_USE(util)
+CL_NS_USE(store)
+CL_NS_USE(document)
+CL_NS_USE(search)
+CL_NS_DEF(index)
+
+SegmentReader::Norm::Norm(IndexInput* instrm, int32_t n, SegmentReader* r,
+ const QString& seg)
+ : number(n)
+ , reader(r)
+ , segment(seg)
+ , in(instrm)
+ , bytes(NULL)
+ , dirty(false)
+{
+ //Func - Constructor
+ //Pre - instrm is a valid reference to an IndexInput
+ //Post - A Norm instance has been created with an empty bytes array
+ bytes = NULL;
+ dirty = false;
+}
+
+SegmentReader::Norm::~Norm()
+{
+ //Func - Destructor
+ //Pre - true
+ //Post - The IndexInput in has been deleted (and closed by its destructor)
+ // and the array too.
+
+ //Close and destroy the inputstream in-> The inputstream will be closed
+ // by its destructor. Note that the IndexInput 'in' actually is a pointer!!!!!
+ _CLDELETE(in);
+
+ //Delete the bytes array
+ _CLDELETE_ARRAY(bytes);
+
+}
+
+void SegmentReader::Norm::reWrite()
+{
+ QString buf(segment + QLatin1String(".tmp"));
+
+ // NOTE: norms are re-written in regular directory, not cfs
+ IndexOutput* out = reader->getDirectory()->createOutput(buf);
+ try {
+ out->writeBytes(bytes, reader->maxDoc());
+ } _CLFINALLY (
+ out->close();
+ _CLDELETE(out)
+ );
+
+ QString fileName(segment);
+ if (reader->cfsReader == NULL)
+ fileName.append(QLatin1String(".f%1")).arg(number);
+ else // use a different file name if we have compound format
+ fileName.append(QLatin1String(".s%1")).arg(number);
+
+ reader->getDirectory()->renameFile(buf, fileName);
+ this->dirty = false;
+}
+
+SegmentReader::SegmentReader(SegmentInfo* si)
+ : IndexReader(si->getDir())
+ , _norms(false, false)
+{
+ initialize(si);
+}
+
+SegmentReader::SegmentReader(SegmentInfos* sis, SegmentInfo* si)
+ : IndexReader(si->getDir(), sis, false)
+ , _norms(false, false)
+{
+ initialize(si);
+}
+
+void SegmentReader::initialize(SegmentInfo* si)
+{
+ //Pre - si-> is a valid reference to SegmentInfo instance
+ // identified by si->
+ //Post - All files of the segment have been read
+
+ deletedDocs = NULL;
+ ones = NULL;
+ //There are no documents yet marked as deleted
+ deletedDocsDirty = false;
+
+ normsDirty=false;
+ undeleteAll=false;
+
+ //Duplicate the name of the segment from SegmentInfo to segment
+ segment = si->name;
+ // make sure that all index files have been read or are kept open
+ // so that if an index update removes them we'll still have them
+ freqStream = NULL;
+ proxStream = NULL;
+
+ //instantiate a buffer large enough to hold a directory path
+ QString buf;
+
+ // Use compound file directory for some files, if it exists
+ Directory* cfsDir = getDirectory();
+ SegmentName(buf, CL_MAX_PATH, QLatin1String(".cfs"));
+ if (cfsDir->fileExists(buf)) {
+ cfsReader = _CLNEW CompoundFileReader(cfsDir, buf);
+ cfsDir = cfsReader;
+ }else
+ cfsReader = NULL;
+
+ // Create the name of the field info file with suffix .fnm in buf
+ SegmentName(buf, CL_MAX_PATH, QLatin1String(".fnm"));
+ fieldInfos = _CLNEW FieldInfos(cfsDir, buf );
+
+ // Condition check to see if fieldInfos points to a valid instance
+ CND_CONDITION(fieldInfos != NULL,
+ "No memory could be allocated for fieldInfos");
+
+ // Create the name of the frequence file with suffix .frq in buf
+ SegmentName(buf ,CL_MAX_PATH, QLatin1String(".frq"));
+
+ // Open an IndexInput freqStream to the frequency file
+ freqStream = cfsDir->openInput( buf );
+
+ // Condition check to see if freqStream points to a valid instance and was
+ // able to open the frequency file
+ CND_CONDITION(freqStream != NULL,
+ "IndexInput freqStream could not open the frequency file");
+
+ // Create the name of the prox file with suffix .prx in buf
+ SegmentName(buf, CL_MAX_PATH, QLatin1String(".prx"));
+
+ // Open an IndexInput proxStream to the prox file
+ proxStream = cfsDir->openInput( buf );
+
+ // Condition check to see if proxStream points to a valid instance and was
+ // able to open the prox file
+ CND_CONDITION(proxStream != NULL,
+ "IndexInput proxStream could not open proximity file");
+
+ // Instantiate a FieldsReader for reading the Field Info File
+ fieldsReader = _CLNEW FieldsReader(cfsDir, segment, fieldInfos);
+
+ // Condition check to see if fieldsReader points to a valid instance
+ CND_CONDITION(fieldsReader != NULL,
+ "No memory could be allocated for fieldsReader");
+
+ //Instantiate a TermInfosReader for reading the Term Dictionary .tis file
+ tis = _CLNEW TermInfosReader(cfsDir, segment, fieldInfos);
+
+ //Condition check to see if tis points to a valid instance
+ CND_CONDITION(tis != NULL,"No memory could be allocated for tis");
+
+ // Check if the segment has deletion according to the SegmentInfo instance
+ // si-> NOTE: the bitvector is stored using the regular directory, not cfs
+ if (hasDeletions(si)) {
+ //Create a deletion file with suffix .del
+ SegmentName(buf, CL_MAX_PATH, QLatin1String(".del"));
+ // Instantiate a BitVector that manages which documents have been deleted
+ deletedDocs = _CLNEW BitSet(getDirectory(), buf);
+ }
+
+ // Open the norm file. There's a norm file for each indexed field with a
+ // byte for each document. The .f[0-9]* file contains, for each document,
+ // a byte that encodes a value that is multiplied into the score for hits
+ // on that field
+ openNorms(cfsDir);
+
+ termVectorsReaderOrig = NULL;
+ if (fieldInfos->hasVectors()) // open term vector files only as needed
+ termVectorsReaderOrig = _CLNEW TermVectorsReader(cfsDir, segment, fieldInfos);
+}
+
+SegmentReader::~SegmentReader()
+{
+ //Func - Destructor.
+ //Pre - doClose has been invoked!
+ //Post - the instance has been destroyed
+
+ doClose(); //this means that index reader doesn't need to be closed manually
+
+ _CLDELETE(fieldInfos);
+ _CLDELETE(fieldsReader);
+ _CLDELETE(tis);
+ _CLDELETE(freqStream);
+ _CLDELETE(proxStream);
+ _CLDELETE(deletedDocs);
+ _CLDELETE_ARRAY(ones);
+ _CLDELETE(termVectorsReaderOrig);
+ _CLDECDELETE(cfsReader);
+}
+
+void SegmentReader::doCommit()
+{
+ QString bufdel(segment + QLatin1String(".del"));
+
+ if (deletedDocsDirty) { // re-write deleted
+ QString buftmp(segment + QLatin1String(".tmp"));
+ deletedDocs->write(getDirectory(), buftmp);
+ getDirectory()->renameFile(buftmp, bufdel);
+ }
+
+ if(undeleteAll && getDirectory()->fileExists(bufdel))
+ getDirectory()->deleteFile(bufdel, true);
+
+ if (normsDirty) { // re-write norms
+ NormsType::iterator itr = _norms.begin();
+ while (itr != _norms.end()) {
+ Norm* norm = itr->second;
+ if (norm->dirty) {
+ norm->reWrite();
+ }
+ ++itr;
+ }
+ }
+ deletedDocsDirty = false;
+ normsDirty = false;
+ undeleteAll = false;
+}
+
+void SegmentReader::doClose()
+{
+ //Func - Closes all streams to the files of a single segment
+ //Pre - fieldsReader != NULL
+ // tis != NULL
+ //Post - All streams to files have been closed
+
+ CND_PRECONDITION(fieldsReader != NULL, "fieldsReader is NULL");
+ CND_PRECONDITION(tis != NULL, "tis is NULL");
+
+ //Close the fieldsReader
+ fieldsReader->close();
+ //Close the TermInfosReader
+ tis->close();
+
+ //Close the frequency stream
+ if (freqStream != NULL){
+ freqStream->close();
+ }
+ //Close the prox stream
+ if (proxStream != NULL){
+ proxStream->close();
+ }
+
+ //Close the norm file
+ closeNorms();
+
+ if (termVectorsReaderOrig != NULL)
+ termVectorsReaderOrig->close();
+
+ if (cfsReader != NULL)
+ cfsReader->close();
+}
+
+bool SegmentReader::hasDeletions() const
+{
+ return deletedDocs != NULL;
+}
+
+//static
+bool SegmentReader::usesCompoundFile(SegmentInfo* si)
+{
+ return si->getDir()->fileExists(si->name + QLatin1String(".cfs"));
+}
+
+//static
+bool SegmentReader::hasSeparateNorms(SegmentInfo* si)
+{
+ QString pattern(si->name);
+ pattern.append(QLatin1String(".s"));
+ size_t patternLength = pattern.length();
+
+ QStringList names = si->getDir()->list();
+ foreach (const QString& name, names) {
+ int length = name.length();
+ if (length > patternLength && name.left(patternLength) == pattern) {
+ if (name.at(patternLength) >= QLatin1Char('0')
+ && name.at(patternLength) <= QLatin1Char('9')) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool SegmentReader::hasDeletions(const SegmentInfo* si)
+{
+ //Func - Static method
+ // Checks if a segment managed by SegmentInfo si-> has deletions
+ //Pre - si-> holds a valid reference to an SegmentInfo instance
+ //Post - if the segement contains deleteions true is returned otherwise flas
+
+ //Check if the deletion file exists and return the result
+ QString f;
+ Misc::segmentname(f, CL_MAX_PATH, si->name, QLatin1String(".del"), -1);
+ return si->getDir()->fileExists(f);
+}
+
+//synchronized
+void SegmentReader::doDelete(const int32_t docNum)
+{
+ //Func - Marks document docNum as deleted
+ //Pre - docNum >=0 and DocNum < maxDoc()
+ // docNum contains the number of the document that must be
+ // marked deleted
+ //Post - The document identified by docNum has been marked deleted
+
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+
+ CND_PRECONDITION(docNum >= 0, "docNum is a negative number");
+ CND_PRECONDITION(docNum < maxDoc(),
+ "docNum is bigger than the total number of documents");
+
+ //Check if deletedDocs exists
+ if (deletedDocs == NULL) {
+ deletedDocs = _CLNEW BitSet(maxDoc());
+
+ //Condition check to see if deletedDocs points to a valid instance
+ CND_CONDITION(deletedDocs != NULL,
+ "No memory could be allocated for deletedDocs");
+ }
+ //Flag that there are documents marked deleted
+ deletedDocsDirty = true;
+ undeleteAll = false;
+ //Mark document identified by docNum as deleted
+ deletedDocs->set(docNum);
+
+}
+
+void SegmentReader::doUndeleteAll()
+{
+ _CLDELETE(deletedDocs);
+ deletedDocsDirty = false;
+ undeleteAll = true;
+}
+
+void SegmentReader::files(QStringList& retarray)
+{
+ //Func - Returns all file names managed by this SegmentReader
+ //Pre - segment != NULL
+ //Post - All filenames managed by this SegmentRead have been returned
+
+ CND_PRECONDITION(segment != NULL, "segment is NULL");
+
+ QString temp;
+ #define _ADD_SEGMENT(ext) \
+ temp = SegmentName(ext); \
+ if (getDirectory()->fileExists(temp)) \
+ retarray.push_back(temp);
+
+ //Add the name of the Field Info file
+ _ADD_SEGMENT(QLatin1String(".cfs"));
+ _ADD_SEGMENT(QLatin1String(".fnm"));
+ _ADD_SEGMENT(QLatin1String(".fdx"));
+ _ADD_SEGMENT(QLatin1String(".fdt"));
+ _ADD_SEGMENT(QLatin1String(".tii"));
+ _ADD_SEGMENT(QLatin1String(".tis"));
+ _ADD_SEGMENT(QLatin1String(".frq"));
+ _ADD_SEGMENT(QLatin1String(".prx"));
+ _ADD_SEGMENT(QLatin1String(".del"));
+ _ADD_SEGMENT(QLatin1String(".tvx"));
+ _ADD_SEGMENT(QLatin1String(".tvd"));
+ _ADD_SEGMENT(QLatin1String(".tvf"));
+ _ADD_SEGMENT(QLatin1String(".tvp"));
+
+ //iterate through the field infos
+ for (int32_t i = 0; i < fieldInfos->size(); ++i) {
+ //Get the field info for the i-th field
+ FieldInfo* fi = fieldInfos->fieldInfo(i);
+ //Check if the field has been indexed
+ if (fi->isIndexed && !fi->omitNorms) {
+ QString name;
+ if (cfsReader == NULL)
+ name = SegmentName(QLatin1String(".f"), i);
+ else
+ name = SegmentName(QLatin1String(".s"), i);
+
+ //The field has been indexed so add its norm file
+ if (getDirectory()->fileExists(name))
+ retarray.push_back(name);
+ }
+ }
+}
+
+TermEnum* SegmentReader::terms() const
+{
+ //Func - Returns an enumeration of all the Terms and TermInfos in the set.
+ //Pre - tis != NULL
+ //Post - An enumeration of all the Terms and TermInfos in the set has been returned
+
+ CND_PRECONDITION(tis != NULL, "tis is NULL");
+
+ return tis->terms();
+}
+
+TermEnum* SegmentReader::terms(const Term* t) const
+{
+ //Func - Returns an enumeration of terms starting at or after the named term t
+ //Pre - t != NULL
+ // tis != NULL
+ //Post - An enumeration of terms starting at or after the named term t
+
+ CND_PRECONDITION(t != NULL, "t is NULL");
+ CND_PRECONDITION(tis != NULL, "tis is NULL");
+
+ return tis->terms(t);
+}
+
+bool SegmentReader::document(int32_t n, Document* doc)
+{
+ //Func - Returns a document identified by n
+ //Pre - n >=0 and identifies the document n
+ //Post - if the document has been deleted then an exception has been thrown
+ // otherwise a reference to the found document has been returned
+
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+
+ CND_PRECONDITION(n >= 0, "n is a negative number");
+
+ //Check if the n-th document has been marked deleted
+ if (isDeleted(n))
+ _CLTHROWA(CL_ERR_InvalidState, "attempt to access a deleted document" );
+
+ //Retrieve the n-th document
+ return fieldsReader->doc(n, doc);
+}
+
+bool SegmentReader::isDeleted(const int32_t n)
+{
+ //Func - Checks if the n-th document has been marked deleted
+ //Pre - n >=0 and identifies the document n
+ //Post - true has been returned if document n has been deleted otherwise fralse
+
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+
+ CND_PRECONDITION(n >= 0, "n is a negative number");
+
+ //Is document n deleted
+ return (deletedDocs != NULL && deletedDocs->get(n));
+}
+
+TermDocs* SegmentReader::termDocs() const
+{
+ //Func - Returns an unpositioned TermDocs enumerator.
+ //Pre - true
+ //Post - An unpositioned TermDocs enumerator has been returned
+
+ return _CLNEW SegmentTermDocs(this);
+}
+
+TermPositions* SegmentReader::termPositions() const
+{
+ //Func - Returns an unpositioned TermPositions enumerator.
+ //Pre - true
+ //Post - An unpositioned TermPositions enumerator has been returned
+
+ return _CLNEW SegmentTermPositions(this);
+}
+
+int32_t SegmentReader::docFreq(const Term* t) const
+{
+ //Func - Returns the number of documents which contain the term t
+ //Pre - t holds a valid reference to a Term
+ //Post - The number of documents which contain term t has been returned
+
+ //Get the TermInfo ti for Term t in the set
+ TermInfo* ti = tis->get(t);
+ //Check if an TermInfo has been returned
+ if (ti) {
+ //Get the frequency of the term
+ int32_t ret = ti->docFreq;
+ //TermInfo ti is not needed anymore so delete it
+ _CLDELETE( ti );
+ //return the number of documents which containt term t
+ return ret;
+ }
+
+ //No TermInfo returned so return 0
+ return 0;
+}
+
+int32_t SegmentReader::numDocs()
+{
+ //Func - Returns the actual number of documents in the segment
+ //Pre - true
+ //Post - The actual number of documents in the segments
+
+ //Get the number of all the documents in the segment including the ones that have
+ //been marked deleted
+ int32_t n = maxDoc();
+
+ //Check if there any deleted docs
+ if (deletedDocs != NULL)
+ //Substract the number of deleted docs from the number returned by maxDoc
+ n -= deletedDocs->count();
+
+ //return the actual number of documents in the segment
+ return n;
+}
+
+int32_t SegmentReader::maxDoc() const
+{
+ //Func - Returns the number of all the documents in the segment including
+ // the ones that have been marked deleted
+ //Pre - true
+ //Post - The total number of documents in the segment has been returned
+
+ return fieldsReader->size();
+}
+
+void SegmentReader::getFieldNames(FieldOption fldOption,
+ StringArrayWithDeletor& retarray)
+{
+ size_t len = fieldInfos->size();
+ for (size_t i = 0; i < len; i++) {
+ bool v = false;
+ FieldInfo* fi = fieldInfos->fieldInfo(i);
+ if (fldOption & IndexReader::ALL) {
+ v = true;
+ } else {
+ if (!fi->isIndexed && (fldOption & IndexReader::UNINDEXED)) {
+ v = true;
+ }
+
+ if (fi->isIndexed && (fldOption & IndexReader::INDEXED)) {
+ v = true;
+ }
+
+ if (fi->isIndexed && fi->storeTermVector == false
+ && (fldOption & IndexReader::INDEXED_NO_TERMVECTOR)) {
+ v = true;
+ }
+
+ if ((fldOption & IndexReader::TERMVECTOR)
+ && fi->storeTermVector == true
+ && fi->storePositionWithTermVector == false
+ && fi->storeOffsetWithTermVector == false) {
+ v = true;
+ }
+
+ if (fi->isIndexed && fi->storeTermVector
+ && (fldOption & IndexReader::INDEXED_WITH_TERMVECTOR)) {
+ v = true;
+ }
+
+ if (fi->storePositionWithTermVector
+ && fi->storeOffsetWithTermVector == false
+ && (fldOption & IndexReader::TERMVECTOR_WITH_POSITION)) {
+ v = true;
+ }
+
+ if (fi->storeOffsetWithTermVector
+ && fi->storePositionWithTermVector == false
+ && (fldOption & IndexReader::TERMVECTOR_WITH_OFFSET)) {
+ v = true;
+ }
+
+ if ((fi->storeOffsetWithTermVector && fi->storePositionWithTermVector)
+ && (fldOption & IndexReader::TERMVECTOR_WITH_POSITION_OFFSET)) {
+ v = true;
+ }
+ }
+
+ if (v)
+ retarray.push_back(STRDUP_TtoT(fi->name));
+ }
+}
+
+bool SegmentReader::hasNorms(const TCHAR* field) const
+{
+ return _norms.find(field) != _norms.end();
+}
+
+
+void SegmentReader::norms(const TCHAR* field, uint8_t* bytes)
+{
+ //Func - Reads the Norms for field from disk starting at offset in the inputstream
+ //Pre - field != NULL
+ // bytes != NULL is an array of bytes which is to be used to read the norms into.
+ // it is advisable to have bytes initalized by zeroes!
+ //Post - The if an inputstream to the norm file could be retrieved the bytes have been read
+ // You are never sure whether or not the norms have been read into bytes properly!!!!!!!!!!!!!!!!!
+
+ CND_PRECONDITION(field != NULL, "field is NULL");
+ CND_PRECONDITION(bytes != NULL, "field is NULL");
+
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+
+ Norm* norm = _norms.get(field);
+ if ( norm == NULL ){
+ memcpy(bytes, fakeNorms(), maxDoc());
+ return;
+ }
+
+ if (norm->bytes != NULL) { // can copy from cache
+ memcpy(bytes, norm->bytes, maxDoc());
+ return;
+ }
+
+ IndexInput* _normStream = norm->in->clone();
+ CND_PRECONDITION(_normStream != NULL, "normStream==NULL")
+
+ // read from disk
+ try {
+ _normStream->seek(0);
+ _normStream->readBytes(bytes, maxDoc());
+ } _CLFINALLY (
+ //Have the normstream closed
+ _normStream->close();
+ //Destroy the normstream
+ _CLDELETE( _normStream );
+ );
+}
+
+uint8_t* SegmentReader::createFakeNorms(int32_t size)
+{
+ uint8_t* ones = _CL_NEWARRAY(uint8_t,size);
+ memset(ones, DefaultSimilarity::encodeNorm(1.0f), size);
+ return ones;
+}
+
+uint8_t* SegmentReader::fakeNorms()
+{
+ if (ones == NULL)
+ ones = createFakeNorms(maxDoc());
+ return ones;
+}
+
+// can return null if norms aren't stored
+uint8_t* SegmentReader::getNorms(const TCHAR* field)
+{
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+ Norm* norm = _norms.get(field);
+ if (norm == NULL)
+ return NULL; // not indexed, or norms not stored
+
+ if (norm->bytes == NULL) { // value not yet read
+ uint8_t* bytes = _CL_NEWARRAY(uint8_t, maxDoc());
+ norms(field, bytes);
+ norm->bytes = bytes; // cache it
+ }
+ return norm->bytes;
+}
+
+uint8_t* SegmentReader::norms(const TCHAR* field)
+{
+ //Func - Returns the bytes array that holds the norms of a named field
+ //Pre - field != NULL and contains the name of the field for which the norms
+ // must be retrieved
+ //Post - If there was norm for the named field then a bytes array has been allocated
+ // and returned containing the norms for that field. If the named field is unknown NULL is returned.
+
+ CND_PRECONDITION(field != NULL, "field is NULL");
+
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+
+ uint8_t* bytes = getNorms(field);
+ if (bytes == NULL)
+ bytes = fakeNorms();
+ return bytes;
+}
+
+void SegmentReader::doSetNorm(int32_t doc, const TCHAR* field, uint8_t value)
+{
+ Norm* norm = _norms.get(field);
+ if (norm == NULL) // not an indexed field
+ return;
+
+ norm->dirty = true; // mark it dirty
+ normsDirty = true;
+
+ uint8_t* bits = norms(field);
+ bits[doc] = value; // set the value
+}
+
+QString SegmentReader::SegmentName(const QString& ext, const int32_t x)
+{
+ //Func - Returns an allocated buffer in which it creates a filename by
+ // concatenating segment with ext and x
+ //Pre ext != NULL and holds the extension
+ // x contains a number
+ //Post - A buffer has been instantiated an when x = -1 buffer contains the concatenation of
+ // segment and ext otherwise buffer contains the contentation of segment, ext and x
+
+ CND_PRECONDITION(!ext.isEmpty(), "ext is NULL");
+
+ QString buf;
+ SegmentName(buf, CL_MAX_PATH, ext, x);
+ return buf;
+}
+
+void SegmentReader::SegmentName(QString& buffer, int32_t bufferLen,
+ const QString& ext, const int32_t x)
+{
+ //Func - Creates a filename in buffer by concatenating segment with ext and x
+ //Pre - buffer != NULL
+ // ext != NULL
+ // x contains a number
+ //Post - When x = -1 buffer contains the concatenation of segment and ext otherwise
+ // buffer contains the contentation of segment, ext and x
+
+ CND_PRECONDITION(!segment.isEmpty(), "Segment is NULL");
+
+ Misc::segmentname(buffer, bufferLen, segment, ext, x);
+}
+
+void SegmentReader::openNorms(Directory* cfsDir)
+{
+ //Func - Open all norms files for all fields
+ // Creates for each field a norm Instance with an open inputstream to
+ // a corresponding norm file ready to be read
+ //Pre - true
+ //Post - For each field a norm instance has been created with an open inputstream to
+ // a corresponding norm file ready to be read
+
+ //Iterate through all the fields
+ for (int32_t i = 0; i < fieldInfos->size(); i++) {
+ //Get the FieldInfo for the i-th field
+ FieldInfo* fi = fieldInfos->fieldInfo(i);
+ //Check if the field is indexed
+ if (fi->isIndexed && !fi->omitNorms ) {
+ //Allocate a buffer
+ QString fileName;
+
+ // look first if there are separate norms in compound format
+ SegmentName(fileName, CL_MAX_PATH, QLatin1String(".s"), fi->number);
+ Directory* d = getDirectory();
+ if(!d->fileExists(fileName)){
+ SegmentName(fileName, CL_MAX_PATH, QLatin1String(".f"), fi->number);
+ d = cfsDir;
+ }
+
+ _norms.put(fi->name, _CLNEW Norm(d->openInput(fileName),
+ fi->number, this, segment));
+ }
+ }
+}
+
+void SegmentReader::closeNorms()
+{
+ //Func - Close all the norms stored in norms
+ //Pre - true
+ //Post - All the norms have been destroyed
+
+ SCOPED_LOCK_MUTEX(_norms.THIS_LOCK)
+
+ //Create an interator initialized at the beginning of norms
+ NormsType::iterator itr = _norms.begin();
+ //Iterate through all the norms
+ while (itr != _norms.end()) {
+ // Get, delete the norm
+ _CLDELETE(itr->second);
+ // Move the interator to the next norm in the norms collection.
+ // Note ++ is an overloaded operator
+ ++itr;
+ }
+ _norms.clear(); //bvk: they're deleted, so clear them so that they are not re-used
+}
+
+TermVectorsReader* SegmentReader::getTermVectorsReader()
+{
+ TermVectorsReader* tvReader = termVectorsLocal.get();
+ if (tvReader == NULL) {
+ tvReader = termVectorsReaderOrig->clone();
+ termVectorsLocal.set(tvReader);
+ }
+ return tvReader;
+}
+
+TermFreqVector* SegmentReader::getTermFreqVector(int32_t docNumber,
+ const TCHAR* field)
+{
+ if (field) {
+ FieldInfo* fi = fieldInfos->fieldInfo(field);
+ // Check if this field is invalid or has no stored term vector
+ if (fi == NULL || !fi->storeTermVector || termVectorsReaderOrig == NULL)
+ return NULL;
+ }
+
+ TermVectorsReader* termVectorsReader = getTermVectorsReader();
+ if (termVectorsReader == NULL)
+ return NULL;
+
+ return termVectorsReader->get(docNumber, field);
+}
+
+bool SegmentReader::getTermFreqVectors(int32_t docNumber,
+ Array<TermFreqVector*>& result)
+{
+ if (termVectorsReaderOrig == NULL)
+ return false;
+
+ TermVectorsReader* termVectorsReader = getTermVectorsReader();
+ if (termVectorsReader == NULL)
+ return false;
+
+ return termVectorsReader->get(docNumber, result);
+}
+
+CL_NS_END