diff options
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/SegmentReader.cpp')
-rw-r--r-- | 3rdparty/clucene/src/CLucene/index/SegmentReader.cpp | 816 |
1 files changed, 816 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/SegmentReader.cpp b/3rdparty/clucene/src/CLucene/index/SegmentReader.cpp new file mode 100644 index 000000000..ba061714b --- /dev/null +++ b/3rdparty/clucene/src/CLucene/index/SegmentReader.cpp @@ -0,0 +1,816 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +*/ +#include "CLucene/StdHeader.h" +#include "SegmentHeader.h" + +#include "FieldInfos.h" +#include "FieldsReader.h" +#include "IndexReader.h" +#include "TermInfosReader.h" +#include "Terms.h" +#include "CLucene/search/Similarity.h" + +CL_NS_USE(util) +CL_NS_USE(store) +CL_NS_USE(document) +CL_NS_USE(search) +CL_NS_DEF(index) + +SegmentReader::Norm::Norm(IndexInput* instrm, int32_t n, SegmentReader* r, + const QString& seg) + : number(n) + , reader(r) + , segment(seg) + , in(instrm) + , bytes(NULL) + , dirty(false) +{ + //Func - Constructor + //Pre - instrm is a valid reference to an IndexInput + //Post - A Norm instance has been created with an empty bytes array + bytes = NULL; + dirty = false; +} + +SegmentReader::Norm::~Norm() +{ + //Func - Destructor + //Pre - true + //Post - The IndexInput in has been deleted (and closed by its destructor) + // and the array too. + + //Close and destroy the inputstream in-> The inputstream will be closed + // by its destructor. Note that the IndexInput 'in' actually is a pointer!!!!! + _CLDELETE(in); + + //Delete the bytes array + _CLDELETE_ARRAY(bytes); + +} + +void SegmentReader::Norm::reWrite() +{ + QString buf(segment + QLatin1String(".tmp")); + + // NOTE: norms are re-written in regular directory, not cfs + IndexOutput* out = reader->getDirectory()->createOutput(buf); + try { + out->writeBytes(bytes, reader->maxDoc()); + } _CLFINALLY ( + out->close(); + _CLDELETE(out) + ); + + QString fileName(segment); + if (reader->cfsReader == NULL) + fileName.append(QLatin1String(".f%1")).arg(number); + else // use a different file name if we have compound format + fileName.append(QLatin1String(".s%1")).arg(number); + + reader->getDirectory()->renameFile(buf, fileName); + this->dirty = false; +} + +SegmentReader::SegmentReader(SegmentInfo* si) + : IndexReader(si->getDir()) + , _norms(false, false) +{ + initialize(si); +} + +SegmentReader::SegmentReader(SegmentInfos* sis, SegmentInfo* si) + : IndexReader(si->getDir(), sis, false) + , _norms(false, false) +{ + initialize(si); +} + +void SegmentReader::initialize(SegmentInfo* si) +{ + //Pre - si-> is a valid reference to SegmentInfo instance + // identified by si-> + //Post - All files of the segment have been read + + deletedDocs = NULL; + ones = NULL; + //There are no documents yet marked as deleted + deletedDocsDirty = false; + + normsDirty=false; + undeleteAll=false; + + //Duplicate the name of the segment from SegmentInfo to segment + segment = si->name; + // make sure that all index files have been read or are kept open + // so that if an index update removes them we'll still have them + freqStream = NULL; + proxStream = NULL; + + //instantiate a buffer large enough to hold a directory path + QString buf; + + // Use compound file directory for some files, if it exists + Directory* cfsDir = getDirectory(); + SegmentName(buf, CL_MAX_PATH, QLatin1String(".cfs")); + if (cfsDir->fileExists(buf)) { + cfsReader = _CLNEW CompoundFileReader(cfsDir, buf); + cfsDir = cfsReader; + }else + cfsReader = NULL; + + // Create the name of the field info file with suffix .fnm in buf + SegmentName(buf, CL_MAX_PATH, QLatin1String(".fnm")); + fieldInfos = _CLNEW FieldInfos(cfsDir, buf ); + + // Condition check to see if fieldInfos points to a valid instance + CND_CONDITION(fieldInfos != NULL, + "No memory could be allocated for fieldInfos"); + + // Create the name of the frequence file with suffix .frq in buf + SegmentName(buf ,CL_MAX_PATH, QLatin1String(".frq")); + + // Open an IndexInput freqStream to the frequency file + freqStream = cfsDir->openInput( buf ); + + // Condition check to see if freqStream points to a valid instance and was + // able to open the frequency file + CND_CONDITION(freqStream != NULL, + "IndexInput freqStream could not open the frequency file"); + + // Create the name of the prox file with suffix .prx in buf + SegmentName(buf, CL_MAX_PATH, QLatin1String(".prx")); + + // Open an IndexInput proxStream to the prox file + proxStream = cfsDir->openInput( buf ); + + // Condition check to see if proxStream points to a valid instance and was + // able to open the prox file + CND_CONDITION(proxStream != NULL, + "IndexInput proxStream could not open proximity file"); + + // Instantiate a FieldsReader for reading the Field Info File + fieldsReader = _CLNEW FieldsReader(cfsDir, segment, fieldInfos); + + // Condition check to see if fieldsReader points to a valid instance + CND_CONDITION(fieldsReader != NULL, + "No memory could be allocated for fieldsReader"); + + //Instantiate a TermInfosReader for reading the Term Dictionary .tis file + tis = _CLNEW TermInfosReader(cfsDir, segment, fieldInfos); + + //Condition check to see if tis points to a valid instance + CND_CONDITION(tis != NULL,"No memory could be allocated for tis"); + + // Check if the segment has deletion according to the SegmentInfo instance + // si-> NOTE: the bitvector is stored using the regular directory, not cfs + if (hasDeletions(si)) { + //Create a deletion file with suffix .del + SegmentName(buf, CL_MAX_PATH, QLatin1String(".del")); + // Instantiate a BitVector that manages which documents have been deleted + deletedDocs = _CLNEW BitSet(getDirectory(), buf); + } + + // Open the norm file. There's a norm file for each indexed field with a + // byte for each document. The .f[0-9]* file contains, for each document, + // a byte that encodes a value that is multiplied into the score for hits + // on that field + openNorms(cfsDir); + + termVectorsReaderOrig = NULL; + if (fieldInfos->hasVectors()) // open term vector files only as needed + termVectorsReaderOrig = _CLNEW TermVectorsReader(cfsDir, segment, fieldInfos); +} + +SegmentReader::~SegmentReader() +{ + //Func - Destructor. + //Pre - doClose has been invoked! + //Post - the instance has been destroyed + + doClose(); //this means that index reader doesn't need to be closed manually + + _CLDELETE(fieldInfos); + _CLDELETE(fieldsReader); + _CLDELETE(tis); + _CLDELETE(freqStream); + _CLDELETE(proxStream); + _CLDELETE(deletedDocs); + _CLDELETE_ARRAY(ones); + _CLDELETE(termVectorsReaderOrig); + _CLDECDELETE(cfsReader); +} + +void SegmentReader::doCommit() +{ + QString bufdel(segment + QLatin1String(".del")); + + if (deletedDocsDirty) { // re-write deleted + QString buftmp(segment + QLatin1String(".tmp")); + deletedDocs->write(getDirectory(), buftmp); + getDirectory()->renameFile(buftmp, bufdel); + } + + if(undeleteAll && getDirectory()->fileExists(bufdel)) + getDirectory()->deleteFile(bufdel, true); + + if (normsDirty) { // re-write norms + NormsType::iterator itr = _norms.begin(); + while (itr != _norms.end()) { + Norm* norm = itr->second; + if (norm->dirty) { + norm->reWrite(); + } + ++itr; + } + } + deletedDocsDirty = false; + normsDirty = false; + undeleteAll = false; +} + +void SegmentReader::doClose() +{ + //Func - Closes all streams to the files of a single segment + //Pre - fieldsReader != NULL + // tis != NULL + //Post - All streams to files have been closed + + CND_PRECONDITION(fieldsReader != NULL, "fieldsReader is NULL"); + CND_PRECONDITION(tis != NULL, "tis is NULL"); + + //Close the fieldsReader + fieldsReader->close(); + //Close the TermInfosReader + tis->close(); + + //Close the frequency stream + if (freqStream != NULL){ + freqStream->close(); + } + //Close the prox stream + if (proxStream != NULL){ + proxStream->close(); + } + + //Close the norm file + closeNorms(); + + if (termVectorsReaderOrig != NULL) + termVectorsReaderOrig->close(); + + if (cfsReader != NULL) + cfsReader->close(); +} + +bool SegmentReader::hasDeletions() const +{ + return deletedDocs != NULL; +} + +//static +bool SegmentReader::usesCompoundFile(SegmentInfo* si) +{ + return si->getDir()->fileExists(si->name + QLatin1String(".cfs")); +} + +//static +bool SegmentReader::hasSeparateNorms(SegmentInfo* si) +{ + QString pattern(si->name); + pattern.append(QLatin1String(".s")); + size_t patternLength = pattern.length(); + + QStringList names = si->getDir()->list(); + foreach (const QString& name, names) { + int length = name.length(); + if (length > patternLength && name.left(patternLength) == pattern) { + if (name.at(patternLength) >= QLatin1Char('0') + && name.at(patternLength) <= QLatin1Char('9')) { + return true; + } + } + } + return false; +} + +bool SegmentReader::hasDeletions(const SegmentInfo* si) +{ + //Func - Static method + // Checks if a segment managed by SegmentInfo si-> has deletions + //Pre - si-> holds a valid reference to an SegmentInfo instance + //Post - if the segement contains deleteions true is returned otherwise flas + + //Check if the deletion file exists and return the result + QString f; + Misc::segmentname(f, CL_MAX_PATH, si->name, QLatin1String(".del"), -1); + return si->getDir()->fileExists(f); +} + +//synchronized +void SegmentReader::doDelete(const int32_t docNum) +{ + //Func - Marks document docNum as deleted + //Pre - docNum >=0 and DocNum < maxDoc() + // docNum contains the number of the document that must be + // marked deleted + //Post - The document identified by docNum has been marked deleted + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + CND_PRECONDITION(docNum >= 0, "docNum is a negative number"); + CND_PRECONDITION(docNum < maxDoc(), + "docNum is bigger than the total number of documents"); + + //Check if deletedDocs exists + if (deletedDocs == NULL) { + deletedDocs = _CLNEW BitSet(maxDoc()); + + //Condition check to see if deletedDocs points to a valid instance + CND_CONDITION(deletedDocs != NULL, + "No memory could be allocated for deletedDocs"); + } + //Flag that there are documents marked deleted + deletedDocsDirty = true; + undeleteAll = false; + //Mark document identified by docNum as deleted + deletedDocs->set(docNum); + +} + +void SegmentReader::doUndeleteAll() +{ + _CLDELETE(deletedDocs); + deletedDocsDirty = false; + undeleteAll = true; +} + +void SegmentReader::files(QStringList& retarray) +{ + //Func - Returns all file names managed by this SegmentReader + //Pre - segment != NULL + //Post - All filenames managed by this SegmentRead have been returned + + CND_PRECONDITION(segment != NULL, "segment is NULL"); + + QString temp; + #define _ADD_SEGMENT(ext) \ + temp = SegmentName(ext); \ + if (getDirectory()->fileExists(temp)) \ + retarray.push_back(temp); + + //Add the name of the Field Info file + _ADD_SEGMENT(QLatin1String(".cfs")); + _ADD_SEGMENT(QLatin1String(".fnm")); + _ADD_SEGMENT(QLatin1String(".fdx")); + _ADD_SEGMENT(QLatin1String(".fdt")); + _ADD_SEGMENT(QLatin1String(".tii")); + _ADD_SEGMENT(QLatin1String(".tis")); + _ADD_SEGMENT(QLatin1String(".frq")); + _ADD_SEGMENT(QLatin1String(".prx")); + _ADD_SEGMENT(QLatin1String(".del")); + _ADD_SEGMENT(QLatin1String(".tvx")); + _ADD_SEGMENT(QLatin1String(".tvd")); + _ADD_SEGMENT(QLatin1String(".tvf")); + _ADD_SEGMENT(QLatin1String(".tvp")); + + //iterate through the field infos + for (int32_t i = 0; i < fieldInfos->size(); ++i) { + //Get the field info for the i-th field + FieldInfo* fi = fieldInfos->fieldInfo(i); + //Check if the field has been indexed + if (fi->isIndexed && !fi->omitNorms) { + QString name; + if (cfsReader == NULL) + name = SegmentName(QLatin1String(".f"), i); + else + name = SegmentName(QLatin1String(".s"), i); + + //The field has been indexed so add its norm file + if (getDirectory()->fileExists(name)) + retarray.push_back(name); + } + } +} + +TermEnum* SegmentReader::terms() const +{ + //Func - Returns an enumeration of all the Terms and TermInfos in the set. + //Pre - tis != NULL + //Post - An enumeration of all the Terms and TermInfos in the set has been returned + + CND_PRECONDITION(tis != NULL, "tis is NULL"); + + return tis->terms(); +} + +TermEnum* SegmentReader::terms(const Term* t) const +{ + //Func - Returns an enumeration of terms starting at or after the named term t + //Pre - t != NULL + // tis != NULL + //Post - An enumeration of terms starting at or after the named term t + + CND_PRECONDITION(t != NULL, "t is NULL"); + CND_PRECONDITION(tis != NULL, "tis is NULL"); + + return tis->terms(t); +} + +bool SegmentReader::document(int32_t n, Document* doc) +{ + //Func - Returns a document identified by n + //Pre - n >=0 and identifies the document n + //Post - if the document has been deleted then an exception has been thrown + // otherwise a reference to the found document has been returned + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + CND_PRECONDITION(n >= 0, "n is a negative number"); + + //Check if the n-th document has been marked deleted + if (isDeleted(n)) + _CLTHROWA(CL_ERR_InvalidState, "attempt to access a deleted document" ); + + //Retrieve the n-th document + return fieldsReader->doc(n, doc); +} + +bool SegmentReader::isDeleted(const int32_t n) +{ + //Func - Checks if the n-th document has been marked deleted + //Pre - n >=0 and identifies the document n + //Post - true has been returned if document n has been deleted otherwise fralse + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + CND_PRECONDITION(n >= 0, "n is a negative number"); + + //Is document n deleted + return (deletedDocs != NULL && deletedDocs->get(n)); +} + +TermDocs* SegmentReader::termDocs() const +{ + //Func - Returns an unpositioned TermDocs enumerator. + //Pre - true + //Post - An unpositioned TermDocs enumerator has been returned + + return _CLNEW SegmentTermDocs(this); +} + +TermPositions* SegmentReader::termPositions() const +{ + //Func - Returns an unpositioned TermPositions enumerator. + //Pre - true + //Post - An unpositioned TermPositions enumerator has been returned + + return _CLNEW SegmentTermPositions(this); +} + +int32_t SegmentReader::docFreq(const Term* t) const +{ + //Func - Returns the number of documents which contain the term t + //Pre - t holds a valid reference to a Term + //Post - The number of documents which contain term t has been returned + + //Get the TermInfo ti for Term t in the set + TermInfo* ti = tis->get(t); + //Check if an TermInfo has been returned + if (ti) { + //Get the frequency of the term + int32_t ret = ti->docFreq; + //TermInfo ti is not needed anymore so delete it + _CLDELETE( ti ); + //return the number of documents which containt term t + return ret; + } + + //No TermInfo returned so return 0 + return 0; +} + +int32_t SegmentReader::numDocs() +{ + //Func - Returns the actual number of documents in the segment + //Pre - true + //Post - The actual number of documents in the segments + + //Get the number of all the documents in the segment including the ones that have + //been marked deleted + int32_t n = maxDoc(); + + //Check if there any deleted docs + if (deletedDocs != NULL) + //Substract the number of deleted docs from the number returned by maxDoc + n -= deletedDocs->count(); + + //return the actual number of documents in the segment + return n; +} + +int32_t SegmentReader::maxDoc() const +{ + //Func - Returns the number of all the documents in the segment including + // the ones that have been marked deleted + //Pre - true + //Post - The total number of documents in the segment has been returned + + return fieldsReader->size(); +} + +void SegmentReader::getFieldNames(FieldOption fldOption, + StringArrayWithDeletor& retarray) +{ + size_t len = fieldInfos->size(); + for (size_t i = 0; i < len; i++) { + bool v = false; + FieldInfo* fi = fieldInfos->fieldInfo(i); + if (fldOption & IndexReader::ALL) { + v = true; + } else { + if (!fi->isIndexed && (fldOption & IndexReader::UNINDEXED)) { + v = true; + } + + if (fi->isIndexed && (fldOption & IndexReader::INDEXED)) { + v = true; + } + + if (fi->isIndexed && fi->storeTermVector == false + && (fldOption & IndexReader::INDEXED_NO_TERMVECTOR)) { + v = true; + } + + if ((fldOption & IndexReader::TERMVECTOR) + && fi->storeTermVector == true + && fi->storePositionWithTermVector == false + && fi->storeOffsetWithTermVector == false) { + v = true; + } + + if (fi->isIndexed && fi->storeTermVector + && (fldOption & IndexReader::INDEXED_WITH_TERMVECTOR)) { + v = true; + } + + if (fi->storePositionWithTermVector + && fi->storeOffsetWithTermVector == false + && (fldOption & IndexReader::TERMVECTOR_WITH_POSITION)) { + v = true; + } + + if (fi->storeOffsetWithTermVector + && fi->storePositionWithTermVector == false + && (fldOption & IndexReader::TERMVECTOR_WITH_OFFSET)) { + v = true; + } + + if ((fi->storeOffsetWithTermVector && fi->storePositionWithTermVector) + && (fldOption & IndexReader::TERMVECTOR_WITH_POSITION_OFFSET)) { + v = true; + } + } + + if (v) + retarray.push_back(STRDUP_TtoT(fi->name)); + } +} + +bool SegmentReader::hasNorms(const TCHAR* field) const +{ + return _norms.find(field) != _norms.end(); +} + + +void SegmentReader::norms(const TCHAR* field, uint8_t* bytes) +{ + //Func - Reads the Norms for field from disk starting at offset in the inputstream + //Pre - field != NULL + // bytes != NULL is an array of bytes which is to be used to read the norms into. + // it is advisable to have bytes initalized by zeroes! + //Post - The if an inputstream to the norm file could be retrieved the bytes have been read + // You are never sure whether or not the norms have been read into bytes properly!!!!!!!!!!!!!!!!! + + CND_PRECONDITION(field != NULL, "field is NULL"); + CND_PRECONDITION(bytes != NULL, "field is NULL"); + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + Norm* norm = _norms.get(field); + if ( norm == NULL ){ + memcpy(bytes, fakeNorms(), maxDoc()); + return; + } + + if (norm->bytes != NULL) { // can copy from cache + memcpy(bytes, norm->bytes, maxDoc()); + return; + } + + IndexInput* _normStream = norm->in->clone(); + CND_PRECONDITION(_normStream != NULL, "normStream==NULL") + + // read from disk + try { + _normStream->seek(0); + _normStream->readBytes(bytes, maxDoc()); + } _CLFINALLY ( + //Have the normstream closed + _normStream->close(); + //Destroy the normstream + _CLDELETE( _normStream ); + ); +} + +uint8_t* SegmentReader::createFakeNorms(int32_t size) +{ + uint8_t* ones = _CL_NEWARRAY(uint8_t,size); + memset(ones, DefaultSimilarity::encodeNorm(1.0f), size); + return ones; +} + +uint8_t* SegmentReader::fakeNorms() +{ + if (ones == NULL) + ones = createFakeNorms(maxDoc()); + return ones; +} + +// can return null if norms aren't stored +uint8_t* SegmentReader::getNorms(const TCHAR* field) +{ + SCOPED_LOCK_MUTEX(THIS_LOCK) + Norm* norm = _norms.get(field); + if (norm == NULL) + return NULL; // not indexed, or norms not stored + + if (norm->bytes == NULL) { // value not yet read + uint8_t* bytes = _CL_NEWARRAY(uint8_t, maxDoc()); + norms(field, bytes); + norm->bytes = bytes; // cache it + } + return norm->bytes; +} + +uint8_t* SegmentReader::norms(const TCHAR* field) +{ + //Func - Returns the bytes array that holds the norms of a named field + //Pre - field != NULL and contains the name of the field for which the norms + // must be retrieved + //Post - If there was norm for the named field then a bytes array has been allocated + // and returned containing the norms for that field. If the named field is unknown NULL is returned. + + CND_PRECONDITION(field != NULL, "field is NULL"); + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + uint8_t* bytes = getNorms(field); + if (bytes == NULL) + bytes = fakeNorms(); + return bytes; +} + +void SegmentReader::doSetNorm(int32_t doc, const TCHAR* field, uint8_t value) +{ + Norm* norm = _norms.get(field); + if (norm == NULL) // not an indexed field + return; + + norm->dirty = true; // mark it dirty + normsDirty = true; + + uint8_t* bits = norms(field); + bits[doc] = value; // set the value +} + +QString SegmentReader::SegmentName(const QString& ext, const int32_t x) +{ + //Func - Returns an allocated buffer in which it creates a filename by + // concatenating segment with ext and x + //Pre ext != NULL and holds the extension + // x contains a number + //Post - A buffer has been instantiated an when x = -1 buffer contains the concatenation of + // segment and ext otherwise buffer contains the contentation of segment, ext and x + + CND_PRECONDITION(!ext.isEmpty(), "ext is NULL"); + + QString buf; + SegmentName(buf, CL_MAX_PATH, ext, x); + return buf; +} + +void SegmentReader::SegmentName(QString& buffer, int32_t bufferLen, + const QString& ext, const int32_t x) +{ + //Func - Creates a filename in buffer by concatenating segment with ext and x + //Pre - buffer != NULL + // ext != NULL + // x contains a number + //Post - When x = -1 buffer contains the concatenation of segment and ext otherwise + // buffer contains the contentation of segment, ext and x + + CND_PRECONDITION(!segment.isEmpty(), "Segment is NULL"); + + Misc::segmentname(buffer, bufferLen, segment, ext, x); +} + +void SegmentReader::openNorms(Directory* cfsDir) +{ + //Func - Open all norms files for all fields + // Creates for each field a norm Instance with an open inputstream to + // a corresponding norm file ready to be read + //Pre - true + //Post - For each field a norm instance has been created with an open inputstream to + // a corresponding norm file ready to be read + + //Iterate through all the fields + for (int32_t i = 0; i < fieldInfos->size(); i++) { + //Get the FieldInfo for the i-th field + FieldInfo* fi = fieldInfos->fieldInfo(i); + //Check if the field is indexed + if (fi->isIndexed && !fi->omitNorms ) { + //Allocate a buffer + QString fileName; + + // look first if there are separate norms in compound format + SegmentName(fileName, CL_MAX_PATH, QLatin1String(".s"), fi->number); + Directory* d = getDirectory(); + if(!d->fileExists(fileName)){ + SegmentName(fileName, CL_MAX_PATH, QLatin1String(".f"), fi->number); + d = cfsDir; + } + + _norms.put(fi->name, _CLNEW Norm(d->openInput(fileName), + fi->number, this, segment)); + } + } +} + +void SegmentReader::closeNorms() +{ + //Func - Close all the norms stored in norms + //Pre - true + //Post - All the norms have been destroyed + + SCOPED_LOCK_MUTEX(_norms.THIS_LOCK) + + //Create an interator initialized at the beginning of norms + NormsType::iterator itr = _norms.begin(); + //Iterate through all the norms + while (itr != _norms.end()) { + // Get, delete the norm + _CLDELETE(itr->second); + // Move the interator to the next norm in the norms collection. + // Note ++ is an overloaded operator + ++itr; + } + _norms.clear(); //bvk: they're deleted, so clear them so that they are not re-used +} + +TermVectorsReader* SegmentReader::getTermVectorsReader() +{ + TermVectorsReader* tvReader = termVectorsLocal.get(); + if (tvReader == NULL) { + tvReader = termVectorsReaderOrig->clone(); + termVectorsLocal.set(tvReader); + } + return tvReader; +} + +TermFreqVector* SegmentReader::getTermFreqVector(int32_t docNumber, + const TCHAR* field) +{ + if (field) { + FieldInfo* fi = fieldInfos->fieldInfo(field); + // Check if this field is invalid or has no stored term vector + if (fi == NULL || !fi->storeTermVector || termVectorsReaderOrig == NULL) + return NULL; + } + + TermVectorsReader* termVectorsReader = getTermVectorsReader(); + if (termVectorsReader == NULL) + return NULL; + + return termVectorsReader->get(docNumber, field); +} + +bool SegmentReader::getTermFreqVectors(int32_t docNumber, + Array<TermFreqVector*>& result) +{ + if (termVectorsReaderOrig == NULL) + return false; + + TermVectorsReader* termVectorsReader = getTermVectorsReader(); + if (termVectorsReader == NULL) + return false; + + return termVectorsReader->get(docNumber, result); +} + +CL_NS_END |