diff options
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/IndexReader.cpp')
-rw-r--r-- | 3rdparty/clucene/src/CLucene/index/IndexReader.cpp | 668 |
1 files changed, 668 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/IndexReader.cpp b/3rdparty/clucene/src/CLucene/index/IndexReader.cpp new file mode 100644 index 000000000..91c735632 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/index/IndexReader.cpp @@ -0,0 +1,668 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +# include <QtCore/QFile> +# include <QtCore/QStringList> + +#include "CLucene/StdHeader.h" +#include "IndexReader.h" +#include "IndexWriter.h" + +#include "CLucene/store/Directory.h" +#include "CLucene/store/FSDirectory.h" +#include "CLucene/store/Lock.h" +#include "CLucene/document/Document.h" +#include "CLucene/search/Similarity.h" +#include "SegmentInfos.h" +#include "MultiReader.h" +#include "Terms.h" + +CL_NS_USE(util) +CL_NS_USE(store) +CL_NS_DEF(index) + +IndexReader::IndexReader(Directory* dir) + : stale(false) + , hasChanges(false) + , closeDirectory(false) + , directoryOwner(false) + , segmentInfos(NULL) + , directory(_CL_POINTER(dir)) + , writeLock(NULL) +{ +} + +IndexReader::IndexReader(Directory* dir, SegmentInfos* infos, bool close) + : stale(false) + , hasChanges(false) + , closeDirectory(close) + , directoryOwner(true) + , segmentInfos(infos) + , directory(_CL_POINTER(dir)) + , writeLock(NULL) +{ +} + +IndexReader::~IndexReader() +{ + if (writeLock != NULL) { + writeLock->release(); + _CLDELETE(writeLock); + } + _CLDELETE(segmentInfos); + _CLDECDELETE(directory); +} + +IndexReader* IndexReader::open(const QString& path) +{ + //Func - Static method. + // Returns an IndexReader reading the index in an FSDirectory in the named path. + //Pre - path != NULL and contains the path of the index for which an IndexReader must be + // instantiated + // closeDir indicates if the directory needs to be closed + //Post - An IndexReader has been returned that reads tnhe index located at path + + CND_PRECONDITION(!path.isEmpty(), "path is NULL"); + + Directory* dir = FSDirectory::getDirectory(path, false); + IndexReader* reader = open(dir, true); + //because fsdirectory will now have a refcount of 1 more than + //if the reader had been opened with a directory object, + //we need to do a refdec + _CLDECDELETE(dir); + return reader; +} + +IndexReader* IndexReader::open(Directory* directory, bool closeDirectory) +{ + //Func - Static method. + // Returns an IndexReader reading the index in an FSDirectory in the named path. + //Pre - directory represents a directory + // closeDir indicates if the directory needs to be closed + //Post - An IndexReader has been returned that reads the index located at directory + + // in- & inter-process sync + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) + + //Instantiate an IndexReader::LockWith which can produce an IndexReader + LuceneLock* lock = directory->makeLock(QLatin1String("commit.lock")); + IndexReader::LockWith with(lock, directory); + + IndexReader* ret = NULL; + try { + //Create an IndexReader reading the index + ret = with.runAndReturn(); + } _CLFINALLY ( + _CLDELETE(lock); + ); + + CND_CONDITION(ret != NULL, "ret is NULL"); + ret->closeDirectory = closeDirectory; + + return ret; +} + +CL_NS(document)::Document* IndexReader::document(const int32_t n) +{ + CL_NS(document)::Document* ret = _CLNEW CL_NS(document)::Document; + if (!document(n, ret)) + _CLDELETE(ret); + return ret; +} + +IndexReader* IndexReader::LockWith::doBody() +{ + //Func - Reads the segmentinfo file and depending on the number of segments found + // it returns a SegmentsReader or a SegmentReader + //Pre - directory != NULL + //Post - Depending on the number of Segments present in directory this method + // returns an empty SegmentsReader when there are no segments, a SegmentReader when + // directory contains 1 segment and a nonempty SegmentsReader when directory + // contains multiple segements + + CND_PRECONDITION(directory != NULL, "directory is NULL"); + + //Instantiate SegmentInfos + SegmentInfos* infos = _CLNEW SegmentInfos; + try { + //Have SegmentInfos read the segments file in directory + infos->read(directory); + } catch(...) { + //make sure infos is cleaned up + _CLDELETE(infos); + throw; + } + + // If there is at least one segment (if infos.size() >= 1), the last + // SegmentReader object will close the directory when the SegmentReader + // object itself is closed (see SegmentReader::doClose). + // If there are no segments, there will be no "last SegmentReader object" + // to fulfill this responsibility, so we need to explicitly close the + // directory in the segmentsreader.close + + //Count the number segments in the directory + const uint32_t nSegs = infos->size(); + + if (nSegs == 1 ) { + // index is optimized + return _CLNEW SegmentReader(infos, infos->info(0)); + } else { + //Instantiate an array of pointers to SegmentReaders of size nSegs (The number of segments in the index) + IndexReader** readers = NULL; + + if (nSegs > 0){ + uint32_t infosize = infos->size(); + readers = _CL_NEWARRAY(IndexReader*,infosize+1); + for (uint32_t i = 0; i < infosize; ++i) { + //Instantiate a SegementReader responsible for reading the i-th segment and store it in + //the readers array + readers[i] = _CLNEW SegmentReader(infos->info(i)); + } + readers[infosize] = NULL; + } + + //return an instance of SegmentsReader which is a reader that manages all Segments + return _CLNEW MultiReader(directory, infos, readers); + }// end if +} + +uint64_t IndexReader::lastModified(const QString& directory) +{ + //Func - Static method + // Returns the time the index in the named directory was last modified. + //Pre - directory != NULL and contains the path name of the directory to check + //Post - The last modified time of the index has been returned + + CND_PRECONDITION(!directory.isEmpty(), "directory is NULL"); + + return FSDirectory::fileModified(directory, QLatin1String("segments")); +} + +int64_t IndexReader::getCurrentVersion(Directory* directory) +{ + // in- & inter-process sync + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) + + int64_t ret = 0; + bool locked = false; + LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); + try { + locked = commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT); + ret = SegmentInfos::readCurrentVersion(directory); + } _CLFINALLY ( + if (locked) + commitLock->release(); + _CLDELETE(commitLock); + ) + return ret; +} + +int64_t IndexReader::getCurrentVersion(const QString& directory) +{ + Directory* dir = FSDirectory::getDirectory(directory, false); + int64_t version = getCurrentVersion(dir); + dir->close(); + _CLDECDELETE(dir); + return version; +} + +int64_t IndexReader::getVersion() +{ + return segmentInfos->getVersion(); +} + +bool IndexReader::isCurrent() +{ + // in- & inter-process sync + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) + + bool ret = false; + bool locked = false; + LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); + try { + locked = commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT); + ret = SegmentInfos::readCurrentVersion(directory) + == segmentInfos->getVersion(); + } _CLFINALLY( + if (locked) + commitLock->release(); + _CLDELETE(commitLock); + ) + return ret; +} + +uint64_t IndexReader::lastModified(const Directory* directory) +{ + //Func - Static method + // Returns the time the index in this directory was last modified. + //Pre - directory contains a valid reference + //Post - The last modified time of the index has been returned + + return directory->fileModified(QLatin1String("segments")); +} + + +bool IndexReader::indexExists(const QString& directory) +{ + //Func - Static method + // Checks if an index exists in the named directory + //Pre - directory != NULL + //Post - Returns true if an index exists at the specified directory-> + // If the directory does not exist or if there is no index in it. + // false is returned. + + CND_PRECONDITION(!directory.isEmpty(), "directory is NULL"); + return QFile(directory + QLatin1String("/segments")).exists(); +} + + +void IndexReader::setNorm(int32_t doc, const TCHAR* field, uint8_t value) +{ + SCOPED_LOCK_MUTEX(THIS_LOCK) + if(directoryOwner) + aquireWriteLock(); + doSetNorm(doc, field, value); + hasChanges = true; +} + +void IndexReader::aquireWriteLock() +{ + if (stale) { + _CLTHROWA(CL_ERR_IO, + "IndexReader out of date and no longer valid for delete, " + "undelete, or setNorm operations"); + } + + if (writeLock == NULL) { + LuceneLock* writeLock = directory->makeLock(QLatin1String("write.lock")); + if (!writeLock->obtain(IndexWriter::WRITE_LOCK_TIMEOUT)) // obtain write lock + _CLTHROWA(CL_ERR_IO,"Index locked for write"); // + writeLock + this->writeLock = writeLock; + + // we have to check whether index has changed since this reader was opened. + // if so, this reader is no longer valid for deletion + if (SegmentInfos::readCurrentVersion(directory) > segmentInfos->getVersion()) { + stale = true; + this->writeLock->release(); + _CLDELETE(this->writeLock); + _CLTHROWA(CL_ERR_IO,"IndexReader out of date and no longer valid " + "for delete, undelete, or setNorm operations"); + } + } +} + + +void IndexReader::setNorm(int32_t doc, const TCHAR* field, qreal value) +{ + setNorm(doc, field, CL_NS(search)::Similarity::encodeNorm(value)); +} + +bool IndexReader::indexExists(const Directory* directory) +{ + //Func - Static method + // Checks if an index exists in the directory + //Pre - directory is a valid reference + //Post - Returns true if an index exists at the specified directory-> + // If the directory does not exist or if there is no index in it. + // false is returned. + + return directory->fileExists(QLatin1String("segments")); +} + +TermDocs* IndexReader::termDocs(Term* term) const +{ + //Func - Returns an enumeration of all the documents which contain + // term. For each document, the document number, the frequency of + // the term in that document is also provided, for use in search scoring. + // Thus, this method implements the mapping: + // + // Term => <docNum, freq>* + // The enumeration is ordered by document number. Each document number + // is greater than all that precede it in the enumeration. + //Pre - term != NULL + //Post - A reference to TermDocs containing an enumeration of all found documents + // has been returned + + CND_PRECONDITION(term != NULL, "term is NULL"); + + //Reference an instantiated TermDocs instance + TermDocs* _termDocs = termDocs(); + //Seek all documents containing term + _termDocs->seek(term); + //return the enumaration + return _termDocs; +} + +TermPositions* IndexReader::termPositions(Term* term) const +{ + //Func - Returns an enumeration of all the documents which contain term. For each + // document, in addition to the document number and frequency of the term in + // that document, a list of all of the ordinal positions of the term in the document + // is available. Thus, this method implements the mapping: + // + // Term => <docNum, freq,<pos 1, pos 2, ...pos freq-1>>* + // + // This positional information faciliates phrase and proximity searching. + // The enumeration is ordered by document number. Each document number is greater than + // all that precede it in the enumeration. + //Pre - term != NULL + //Post - A reference to TermPositions containing an enumeration of all found documents + // has been returned + + CND_PRECONDITION(term != NULL, "term is NULL"); + + //Reference an instantiated termPositions instance + TermPositions* _termPositions = termPositions(); + //Seek all documents containing term + _termPositions->seek(term); + //return the enumeration + return _termPositions; +} + +void IndexReader::deleteDocument(const int32_t docNum) +{ + //Func - Deletes the document numbered docNum. Once a document is deleted it will not appear + // in TermDocs or TermPostitions enumerations. Attempts to read its field with the document + // method will result in an error. The presence of this document may still be reflected in + // the docFreq statistic, though this will be corrected eventually as the index is further modified. + //Pre - docNum >= 0 + //Post - If successful the document identified by docNum has been deleted. If no writelock + // could be obtained an exception has been thrown stating that the index was locked or has no write access + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + CND_PRECONDITION(docNum >= 0, "docNum is negative"); + + if (directoryOwner) + aquireWriteLock(); + + //Have the document identified by docNum deleted + doDelete(docNum); + hasChanges = true; +} + +/** +* Commit changes resulting from delete, undeleteAll, or setNorm operations +* +* @throws IOException +*/ +void IndexReader::commit() +{ + SCOPED_LOCK_MUTEX(THIS_LOCK) + if(hasChanges){ + if(directoryOwner){ + { + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync + + LuceneLock* commitLock = directory->makeLock(QLatin1String("commit.lock")); + IndexReader::CommitLockWith cl(commitLock,this); + cl.run(); + _CLDELETE(commitLock); + + } + if (writeLock != NULL) { + writeLock->release(); // release write lock + _CLDELETE(writeLock); + } + }else + doCommit(); + } + hasChanges = false; +} + + +void IndexReader::undeleteAll() +{ + SCOPED_LOCK_MUTEX(THIS_LOCK) + if(directoryOwner) + aquireWriteLock(); + doUndeleteAll(); + hasChanges = true; +} + +int32_t IndexReader::deleteDocuments(Term* term) +{ + //Func - Deletes all documents containing term. This is useful if one uses a + // document field to hold a unique ID string for the document. Then to delete such + // a document, one merely constructs a term with the appropriate field and the unique + // ID string as its text and passes it to this method. + //Pre - term != NULL + //Post - All documents containing term have been deleted. The number of deleted documents + // has been returned + + CND_PRECONDITION(term != NULL, "term is NULL"); + + //Search for the documents contain term + TermDocs* docs = termDocs(term); + + //Check if documents have been found + if ( docs == NULL ){ + return 0; + } + + //initialize + int32_t Counter = 0; + try { + //iterate through the found documents + while (docs->next()) { + //Delete the document + deleteDocument(docs->doc()); + ++Counter; + } + }_CLFINALLY( + //Close the enumeration + docs->close(); + ); + + //Delete the enumeration of found documents + _CLDELETE( docs ); + + //Return the number of deleted documents + return Counter; +} + +TCHAR** IndexReader::getFieldNames() +{ + CL_NS(util)::StringArrayWithDeletor array; + getFieldNames(IndexReader::ALL, array); + + array.setDoDelete(false); + TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1); + int j=0; + CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin(); + while ( itr != array.end() ){ + ret[j]=*itr; + ++j;++itr; + } + ret[j]=NULL; + return ret; +} + +TCHAR** IndexReader::getFieldNames(bool indexed) +{ + CL_NS(util)::StringArrayWithDeletor array; + getFieldNames(indexed?IndexReader::INDEXED:IndexReader::UNINDEXED, array); + + array.setDoDelete(false); + TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1); + int j=0; + CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin(); + while ( itr != array.end() ){ + ret[j]=*itr; + ++j;++itr; + } + ret[j]=NULL; + return ret; +} + +void IndexReader::close() +{ + //Func - Closes files associated with this index and also saves any new deletions to disk. + // No other methods should be called after this has been called. + //Pre - true + //Post - All files associated with this index have been deleted and new deletions have been + // saved to disk + SCOPED_LOCK_MUTEX(THIS_LOCK) + + CloseCallbackMap::iterator iter; + for (iter = closeCallbacks.begin(); iter != closeCallbacks.end(); iter++) { + CloseCallback callback = *iter->first; + callback(this, iter->second); + } + + commit(); + doClose(); + + if(closeDirectory) { + directory->close(); + _CLDECDELETE(directory); + } +} + +bool IndexReader::isLocked(Directory* directory) +{ + //Func - Static method + // Checks if the index in the directory is currently locked. + //Pre - directory is a valid reference to a directory to check for a lock + //Post - Returns true if the index in the named directory is locked otherwise false + + //Check the existence of the file write.lock and return true when it does and false + //when it doesn't + LuceneLock* l1 = directory->makeLock(QLatin1String("write.lock")); + LuceneLock* l2 = directory->makeLock(QLatin1String("commit.lock")); + + bool ret = l1->isLocked() || l2->isLocked(); + + _CLDELETE(l1); + _CLDELETE(l2); + return ret; +} + +bool IndexReader::isLocked(const QString& directory) +{ + //Func - Static method + // Checks if the index in the named directory is currently locked. + //Pre - directory != NULL and contains the directory to check for a lock + //Post - Returns true if the index in the named directory is locked otherwise false + + CND_PRECONDITION(!directory.isEmpty(), "directory is NULL"); + + Directory* dir = FSDirectory::getDirectory(directory, false); + bool ret = isLocked(dir); + dir->close(); + _CLDECDELETE(dir); + + return ret; +} + +/** Returns true if there are norms stored for this field. */ +bool IndexReader::hasNorms(const TCHAR* field) +{ + // backward compatible implementation. + // SegmentReader has an efficient implementation. + return norms(field) != NULL; +} + +void IndexReader::unlock(const QString& path) +{ + FSDirectory* dir = FSDirectory::getDirectory(path, false); + unlock(dir); + dir->close(); + _CLDECDELETE(dir); +} + +void IndexReader::unlock(Directory* directory) +{ + //Func - Static method + // Forcibly unlocks the index in the named directory-> + // Caution: this should only be used by failure recovery code, + // when it is known that no other process nor thread is in fact + // currently accessing this index. + //Pre - directory is a valid reference to a directory + //Post - The directory has been forcibly unlocked + LuceneLock* lock; + + lock = directory->makeLock(QLatin1String("write.lock")); + lock->release(); + _CLDELETE(lock); + + lock = directory->makeLock(QLatin1String("commit.lock")); + lock->release(); + _CLDELETE(lock); +} + +bool IndexReader::isLuceneFile(const QString& filename) +{ + if (filename.isNull() || filename.isEmpty()) + return false; + + size_t len = filename.length(); + if (len < 6) //need at least x.frx + return false; + + if (filename == QLatin1String("segments")) + return true; + + if (filename == QLatin1String("segments.new")) + return true; + + if (filename == QLatin1String("deletable")) + return true; + + QStringList extList; + extList << QLatin1String(".cfs") + << QLatin1String(".fnm") << QLatin1String(".fdx") << QLatin1String(".fdt") + << QLatin1String(".tii") << QLatin1String(".tis") << QLatin1String(".frq") + << QLatin1String(".prx") << QLatin1String(".del") << QLatin1String(".tvx") + << QLatin1String(".tvd") << QLatin1String(".tvf") << QLatin1String(".tvp"); + + QString suffix = filename.right(4); + if (extList.contains(suffix, Qt::CaseInsensitive)) + return true; + + if (suffix.leftRef(2) == QLatin1String(".f")) { + suffix = suffix.remove(0, 2); + if (suffix.length() > 0) { + for (int i = 0; i < suffix.length(); ++i) { + if (!suffix.at(i).isDigit()) + return false; + } + return true; + } + } + return false; +} + +void IndexReader::addCloseCallback(CloseCallback callback, void* parameter) +{ + closeCallbacks.put(callback, parameter); +} + +// #pragma mark -- IndexReader::LockWith + +IndexReader::LockWith::LockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir) + : CL_NS(store)::LuceneLockWith<IndexReader*>(lock, IndexWriter::COMMIT_LOCK_TIMEOUT) +{ + this->directory = dir; +} + +// #pragma mark -- IndexReader::CommitLockWith + +IndexReader::CommitLockWith::CommitLockWith(CL_NS(store)::LuceneLock* lock, IndexReader* r) + : CL_NS(store)::LuceneLockWith<void>(lock,IndexWriter::COMMIT_LOCK_TIMEOUT) + , reader(r) +{ +} + +void IndexReader::CommitLockWith::doBody() +{ + reader->doCommit(); + reader->segmentInfos->write(reader->getDirectory()); +} + +CL_NS_END |