summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/index/IndexReader.cpp
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/IndexReader.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/index/IndexReader.cpp668
1 files changed, 668 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/IndexReader.cpp b/3rdparty/clucene/src/CLucene/index/IndexReader.cpp
new file mode 100644
index 000000000..91c735632
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/IndexReader.cpp
@@ -0,0 +1,668 @@
+/*
+ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+ *
+ * Distributable under the terms of either the Apache License (Version 2.0) or
+ * the GNU Lesser General Public License, as specified in the COPYING file.
+ *
+ * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
+*/
+# include <QtCore/QFile>
+# include <QtCore/QStringList>
+
+#include "CLucene/StdHeader.h"
+#include "IndexReader.h"
+#include "IndexWriter.h"
+
+#include "CLucene/store/Directory.h"
+#include "CLucene/store/FSDirectory.h"
+#include "CLucene/store/Lock.h"
+#include "CLucene/document/Document.h"
+#include "CLucene/search/Similarity.h"
+#include "SegmentInfos.h"
+#include "MultiReader.h"
+#include "Terms.h"
+
+CL_NS_USE(util)
+CL_NS_USE(store)
+CL_NS_DEF(index)
+
+IndexReader::IndexReader(Directory* dir)
+ : stale(false)
+ , hasChanges(false)
+ , closeDirectory(false)
+ , directoryOwner(false)
+ , segmentInfos(NULL)
+ , directory(_CL_POINTER(dir))
+ , writeLock(NULL)
+{
+}
+
+IndexReader::IndexReader(Directory* dir, SegmentInfos* infos, bool close)
+ : stale(false)
+ , hasChanges(false)
+ , closeDirectory(close)
+ , directoryOwner(true)
+ , segmentInfos(infos)
+ , directory(_CL_POINTER(dir))
+ , writeLock(NULL)
+{
+}
+
+IndexReader::~IndexReader()
+{
+ if (writeLock != NULL) {
+ writeLock->release();
+ _CLDELETE(writeLock);
+ }
+ _CLDELETE(segmentInfos);
+ _CLDECDELETE(directory);
+}
+
+IndexReader* IndexReader::open(const QString& path)
+{
+ //Func - Static method.
+ // Returns an IndexReader reading the index in an FSDirectory in the named path.
+ //Pre - path != NULL and contains the path of the index for which an IndexReader must be
+ // instantiated
+ // closeDir indicates if the directory needs to be closed
+ //Post - An IndexReader has been returned that reads tnhe index located at path
+
+ CND_PRECONDITION(!path.isEmpty(), "path is NULL");
+
+ Directory* dir = FSDirectory::getDirectory(path, false);
+ IndexReader* reader = open(dir, true);
+ //because fsdirectory will now have a refcount of 1 more than
+ //if the reader had been opened with a directory object,
+ //we need to do a refdec
+ _CLDECDELETE(dir);
+ return reader;
+}
+
+IndexReader* IndexReader::open(Directory* directory, bool closeDirectory)
+{
+ //Func - Static method.
+ // Returns an IndexReader reading the index in an FSDirectory in the named path.
+ //Pre - directory represents a directory
+ // closeDir indicates if the directory needs to be closed
+ //Post - An IndexReader has been returned that reads the index located at directory
+
+ // in- & inter-process sync
+ SCOPED_LOCK_MUTEX(directory->THIS_LOCK)
+
+ //Instantiate an IndexReader::LockWith which can produce an IndexReader
+ LuceneLock* lock = directory->makeLock(QLatin1String("commit.lock"));
+ IndexReader::LockWith with(lock, directory);
+
+ IndexReader* ret = NULL;
+ try {
+ //Create an IndexReader reading the index
+ ret = with.runAndReturn();
+ } _CLFINALLY (
+ _CLDELETE(lock);
+ );
+
+ CND_CONDITION(ret != NULL, "ret is NULL");
+ ret->closeDirectory = closeDirectory;
+
+ return ret;
+}
+
+CL_NS(document)::Document* IndexReader::document(const int32_t n)
+{
+ CL_NS(document)::Document* ret = _CLNEW CL_NS(document)::Document;
+ if (!document(n, ret))
+ _CLDELETE(ret);
+ return ret;
+}
+
+IndexReader* IndexReader::LockWith::doBody()
+{
+ //Func - Reads the segmentinfo file and depending on the number of segments found
+ // it returns a SegmentsReader or a SegmentReader
+ //Pre - directory != NULL
+ //Post - Depending on the number of Segments present in directory this method
+ // returns an empty SegmentsReader when there are no segments, a SegmentReader when
+ // directory contains 1 segment and a nonempty SegmentsReader when directory
+ // contains multiple segements
+
+ CND_PRECONDITION(directory != NULL, "directory is NULL");
+
+ //Instantiate SegmentInfos
+ SegmentInfos* infos = _CLNEW SegmentInfos;
+ try {
+ //Have SegmentInfos read the segments file in directory
+ infos->read(directory);
+ } catch(...) {
+ //make sure infos is cleaned up
+ _CLDELETE(infos);
+ throw;
+ }
+
+ // If there is at least one segment (if infos.size() >= 1), the last
+ // SegmentReader object will close the directory when the SegmentReader
+ // object itself is closed (see SegmentReader::doClose).
+ // If there are no segments, there will be no "last SegmentReader object"
+ // to fulfill this responsibility, so we need to explicitly close the
+ // directory in the segmentsreader.close
+
+ //Count the number segments in the directory
+ const uint32_t nSegs = infos->size();
+
+ if (nSegs == 1 ) {
+ // index is optimized
+ return _CLNEW SegmentReader(infos, infos->info(0));
+ } else {
+ //Instantiate an array of pointers to SegmentReaders of size nSegs (The number of segments in the index)
+ IndexReader** readers = NULL;
+
+ if (nSegs > 0){
+ uint32_t infosize = infos->size();
+ readers = _CL_NEWARRAY(IndexReader*,infosize+1);
+ for (uint32_t i = 0; i < infosize; ++i) {
+ //Instantiate a SegementReader responsible for reading the i-th segment and store it in
+ //the readers array
+ readers[i] = _CLNEW SegmentReader(infos->info(i));
+ }
+ readers[infosize] = NULL;
+ }
+
+ //return an instance of SegmentsReader which is a reader that manages all Segments
+ return _CLNEW MultiReader(directory, infos, readers);
+ }// end if
+}
+
+uint64_t IndexReader::lastModified(const QString& directory)
+{
+ //Func - Static method
+ // Returns the time the index in the named directory was last modified.
+ //Pre - directory != NULL and contains the path name of the directory to check
+ //Post - The last modified time of the index has been returned
+
+ CND_PRECONDITION(!directory.isEmpty(), "directory is NULL");
+
+ return FSDirectory::fileModified(directory, QLatin1String("segments"));
+}
+
+int64_t IndexReader::getCurrentVersion(Directory* directory)
+{
+ // in- & inter-process sync
+ SCOPED_LOCK_MUTEX(directory->THIS_LOCK)
+
+ int64_t ret = 0;
+ bool locked = false;
+ LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+ try {
+ locked = commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT);
+ ret = SegmentInfos::readCurrentVersion(directory);
+ } _CLFINALLY (
+ if (locked)
+ commitLock->release();
+ _CLDELETE(commitLock);
+ )
+ return ret;
+}
+
+int64_t IndexReader::getCurrentVersion(const QString& directory)
+{
+ Directory* dir = FSDirectory::getDirectory(directory, false);
+ int64_t version = getCurrentVersion(dir);
+ dir->close();
+ _CLDECDELETE(dir);
+ return version;
+}
+
+int64_t IndexReader::getVersion()
+{
+ return segmentInfos->getVersion();
+}
+
+bool IndexReader::isCurrent()
+{
+ // in- & inter-process sync
+ SCOPED_LOCK_MUTEX(directory->THIS_LOCK)
+
+ bool ret = false;
+ bool locked = false;
+ LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+ try {
+ locked = commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT);
+ ret = SegmentInfos::readCurrentVersion(directory)
+ == segmentInfos->getVersion();
+ } _CLFINALLY(
+ if (locked)
+ commitLock->release();
+ _CLDELETE(commitLock);
+ )
+ return ret;
+}
+
+uint64_t IndexReader::lastModified(const Directory* directory)
+{
+ //Func - Static method
+ // Returns the time the index in this directory was last modified.
+ //Pre - directory contains a valid reference
+ //Post - The last modified time of the index has been returned
+
+ return directory->fileModified(QLatin1String("segments"));
+}
+
+
+bool IndexReader::indexExists(const QString& directory)
+{
+ //Func - Static method
+ // Checks if an index exists in the named directory
+ //Pre - directory != NULL
+ //Post - Returns true if an index exists at the specified directory->
+ // If the directory does not exist or if there is no index in it.
+ // false is returned.
+
+ CND_PRECONDITION(!directory.isEmpty(), "directory is NULL");
+ return QFile(directory + QLatin1String("/segments")).exists();
+}
+
+
+void IndexReader::setNorm(int32_t doc, const TCHAR* field, uint8_t value)
+{
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+ if(directoryOwner)
+ aquireWriteLock();
+ doSetNorm(doc, field, value);
+ hasChanges = true;
+}
+
+void IndexReader::aquireWriteLock()
+{
+ if (stale) {
+ _CLTHROWA(CL_ERR_IO,
+ "IndexReader out of date and no longer valid for delete, "
+ "undelete, or setNorm operations");
+ }
+
+ if (writeLock == NULL) {
+ LuceneLock* writeLock = directory->makeLock(QLatin1String("write.lock"));
+ if (!writeLock->obtain(IndexWriter::WRITE_LOCK_TIMEOUT)) // obtain write lock
+ _CLTHROWA(CL_ERR_IO,"Index locked for write"); // + writeLock
+ this->writeLock = writeLock;
+
+ // we have to check whether index has changed since this reader was opened.
+ // if so, this reader is no longer valid for deletion
+ if (SegmentInfos::readCurrentVersion(directory) > segmentInfos->getVersion()) {
+ stale = true;
+ this->writeLock->release();
+ _CLDELETE(this->writeLock);
+ _CLTHROWA(CL_ERR_IO,"IndexReader out of date and no longer valid "
+ "for delete, undelete, or setNorm operations");
+ }
+ }
+}
+
+
+void IndexReader::setNorm(int32_t doc, const TCHAR* field, qreal value)
+{
+ setNorm(doc, field, CL_NS(search)::Similarity::encodeNorm(value));
+}
+
+bool IndexReader::indexExists(const Directory* directory)
+{
+ //Func - Static method
+ // Checks if an index exists in the directory
+ //Pre - directory is a valid reference
+ //Post - Returns true if an index exists at the specified directory->
+ // If the directory does not exist or if there is no index in it.
+ // false is returned.
+
+ return directory->fileExists(QLatin1String("segments"));
+}
+
+TermDocs* IndexReader::termDocs(Term* term) const
+{
+ //Func - Returns an enumeration of all the documents which contain
+ // term. For each document, the document number, the frequency of
+ // the term in that document is also provided, for use in search scoring.
+ // Thus, this method implements the mapping:
+ //
+ // Term => <docNum, freq>*
+ // The enumeration is ordered by document number. Each document number
+ // is greater than all that precede it in the enumeration.
+ //Pre - term != NULL
+ //Post - A reference to TermDocs containing an enumeration of all found documents
+ // has been returned
+
+ CND_PRECONDITION(term != NULL, "term is NULL");
+
+ //Reference an instantiated TermDocs instance
+ TermDocs* _termDocs = termDocs();
+ //Seek all documents containing term
+ _termDocs->seek(term);
+ //return the enumaration
+ return _termDocs;
+}
+
+TermPositions* IndexReader::termPositions(Term* term) const
+{
+ //Func - Returns an enumeration of all the documents which contain term. For each
+ // document, in addition to the document number and frequency of the term in
+ // that document, a list of all of the ordinal positions of the term in the document
+ // is available. Thus, this method implements the mapping:
+ //
+ // Term => <docNum, freq,<pos 1, pos 2, ...pos freq-1>>*
+ //
+ // This positional information faciliates phrase and proximity searching.
+ // The enumeration is ordered by document number. Each document number is greater than
+ // all that precede it in the enumeration.
+ //Pre - term != NULL
+ //Post - A reference to TermPositions containing an enumeration of all found documents
+ // has been returned
+
+ CND_PRECONDITION(term != NULL, "term is NULL");
+
+ //Reference an instantiated termPositions instance
+ TermPositions* _termPositions = termPositions();
+ //Seek all documents containing term
+ _termPositions->seek(term);
+ //return the enumeration
+ return _termPositions;
+}
+
+void IndexReader::deleteDocument(const int32_t docNum)
+{
+ //Func - Deletes the document numbered docNum. Once a document is deleted it will not appear
+ // in TermDocs or TermPostitions enumerations. Attempts to read its field with the document
+ // method will result in an error. The presence of this document may still be reflected in
+ // the docFreq statistic, though this will be corrected eventually as the index is further modified.
+ //Pre - docNum >= 0
+ //Post - If successful the document identified by docNum has been deleted. If no writelock
+ // could be obtained an exception has been thrown stating that the index was locked or has no write access
+
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+
+ CND_PRECONDITION(docNum >= 0, "docNum is negative");
+
+ if (directoryOwner)
+ aquireWriteLock();
+
+ //Have the document identified by docNum deleted
+ doDelete(docNum);
+ hasChanges = true;
+}
+
+/**
+* Commit changes resulting from delete, undeleteAll, or setNorm operations
+*
+* @throws IOException
+*/
+void IndexReader::commit()
+{
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+ if(hasChanges){
+ if(directoryOwner){
+ {
+ SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
+
+ LuceneLock* commitLock = directory->makeLock(QLatin1String("commit.lock"));
+ IndexReader::CommitLockWith cl(commitLock,this);
+ cl.run();
+ _CLDELETE(commitLock);
+
+ }
+ if (writeLock != NULL) {
+ writeLock->release(); // release write lock
+ _CLDELETE(writeLock);
+ }
+ }else
+ doCommit();
+ }
+ hasChanges = false;
+}
+
+
+void IndexReader::undeleteAll()
+{
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+ if(directoryOwner)
+ aquireWriteLock();
+ doUndeleteAll();
+ hasChanges = true;
+}
+
+int32_t IndexReader::deleteDocuments(Term* term)
+{
+ //Func - Deletes all documents containing term. This is useful if one uses a
+ // document field to hold a unique ID string for the document. Then to delete such
+ // a document, one merely constructs a term with the appropriate field and the unique
+ // ID string as its text and passes it to this method.
+ //Pre - term != NULL
+ //Post - All documents containing term have been deleted. The number of deleted documents
+ // has been returned
+
+ CND_PRECONDITION(term != NULL, "term is NULL");
+
+ //Search for the documents contain term
+ TermDocs* docs = termDocs(term);
+
+ //Check if documents have been found
+ if ( docs == NULL ){
+ return 0;
+ }
+
+ //initialize
+ int32_t Counter = 0;
+ try {
+ //iterate through the found documents
+ while (docs->next()) {
+ //Delete the document
+ deleteDocument(docs->doc());
+ ++Counter;
+ }
+ }_CLFINALLY(
+ //Close the enumeration
+ docs->close();
+ );
+
+ //Delete the enumeration of found documents
+ _CLDELETE( docs );
+
+ //Return the number of deleted documents
+ return Counter;
+}
+
+TCHAR** IndexReader::getFieldNames()
+{
+ CL_NS(util)::StringArrayWithDeletor array;
+ getFieldNames(IndexReader::ALL, array);
+
+ array.setDoDelete(false);
+ TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1);
+ int j=0;
+ CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin();
+ while ( itr != array.end() ){
+ ret[j]=*itr;
+ ++j;++itr;
+ }
+ ret[j]=NULL;
+ return ret;
+}
+
+TCHAR** IndexReader::getFieldNames(bool indexed)
+{
+ CL_NS(util)::StringArrayWithDeletor array;
+ getFieldNames(indexed?IndexReader::INDEXED:IndexReader::UNINDEXED, array);
+
+ array.setDoDelete(false);
+ TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1);
+ int j=0;
+ CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin();
+ while ( itr != array.end() ){
+ ret[j]=*itr;
+ ++j;++itr;
+ }
+ ret[j]=NULL;
+ return ret;
+}
+
+void IndexReader::close()
+{
+ //Func - Closes files associated with this index and also saves any new deletions to disk.
+ // No other methods should be called after this has been called.
+ //Pre - true
+ //Post - All files associated with this index have been deleted and new deletions have been
+ // saved to disk
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
+
+ CloseCallbackMap::iterator iter;
+ for (iter = closeCallbacks.begin(); iter != closeCallbacks.end(); iter++) {
+ CloseCallback callback = *iter->first;
+ callback(this, iter->second);
+ }
+
+ commit();
+ doClose();
+
+ if(closeDirectory) {
+ directory->close();
+ _CLDECDELETE(directory);
+ }
+}
+
+bool IndexReader::isLocked(Directory* directory)
+{
+ //Func - Static method
+ // Checks if the index in the directory is currently locked.
+ //Pre - directory is a valid reference to a directory to check for a lock
+ //Post - Returns true if the index in the named directory is locked otherwise false
+
+ //Check the existence of the file write.lock and return true when it does and false
+ //when it doesn't
+ LuceneLock* l1 = directory->makeLock(QLatin1String("write.lock"));
+ LuceneLock* l2 = directory->makeLock(QLatin1String("commit.lock"));
+
+ bool ret = l1->isLocked() || l2->isLocked();
+
+ _CLDELETE(l1);
+ _CLDELETE(l2);
+ return ret;
+}
+
+bool IndexReader::isLocked(const QString& directory)
+{
+ //Func - Static method
+ // Checks if the index in the named directory is currently locked.
+ //Pre - directory != NULL and contains the directory to check for a lock
+ //Post - Returns true if the index in the named directory is locked otherwise false
+
+ CND_PRECONDITION(!directory.isEmpty(), "directory is NULL");
+
+ Directory* dir = FSDirectory::getDirectory(directory, false);
+ bool ret = isLocked(dir);
+ dir->close();
+ _CLDECDELETE(dir);
+
+ return ret;
+}
+
+/** Returns true if there are norms stored for this field. */
+bool IndexReader::hasNorms(const TCHAR* field)
+{
+ // backward compatible implementation.
+ // SegmentReader has an efficient implementation.
+ return norms(field) != NULL;
+}
+
+void IndexReader::unlock(const QString& path)
+{
+ FSDirectory* dir = FSDirectory::getDirectory(path, false);
+ unlock(dir);
+ dir->close();
+ _CLDECDELETE(dir);
+}
+
+void IndexReader::unlock(Directory* directory)
+{
+ //Func - Static method
+ // Forcibly unlocks the index in the named directory->
+ // Caution: this should only be used by failure recovery code,
+ // when it is known that no other process nor thread is in fact
+ // currently accessing this index.
+ //Pre - directory is a valid reference to a directory
+ //Post - The directory has been forcibly unlocked
+ LuceneLock* lock;
+
+ lock = directory->makeLock(QLatin1String("write.lock"));
+ lock->release();
+ _CLDELETE(lock);
+
+ lock = directory->makeLock(QLatin1String("commit.lock"));
+ lock->release();
+ _CLDELETE(lock);
+}
+
+bool IndexReader::isLuceneFile(const QString& filename)
+{
+ if (filename.isNull() || filename.isEmpty())
+ return false;
+
+ size_t len = filename.length();
+ if (len < 6) //need at least x.frx
+ return false;
+
+ if (filename == QLatin1String("segments"))
+ return true;
+
+ if (filename == QLatin1String("segments.new"))
+ return true;
+
+ if (filename == QLatin1String("deletable"))
+ return true;
+
+ QStringList extList;
+ extList << QLatin1String(".cfs")
+ << QLatin1String(".fnm") << QLatin1String(".fdx") << QLatin1String(".fdt")
+ << QLatin1String(".tii") << QLatin1String(".tis") << QLatin1String(".frq")
+ << QLatin1String(".prx") << QLatin1String(".del") << QLatin1String(".tvx")
+ << QLatin1String(".tvd") << QLatin1String(".tvf") << QLatin1String(".tvp");
+
+ QString suffix = filename.right(4);
+ if (extList.contains(suffix, Qt::CaseInsensitive))
+ return true;
+
+ if (suffix.leftRef(2) == QLatin1String(".f")) {
+ suffix = suffix.remove(0, 2);
+ if (suffix.length() > 0) {
+ for (int i = 0; i < suffix.length(); ++i) {
+ if (!suffix.at(i).isDigit())
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+void IndexReader::addCloseCallback(CloseCallback callback, void* parameter)
+{
+ closeCallbacks.put(callback, parameter);
+}
+
+// #pragma mark -- IndexReader::LockWith
+
+IndexReader::LockWith::LockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir)
+ : CL_NS(store)::LuceneLockWith<IndexReader*>(lock, IndexWriter::COMMIT_LOCK_TIMEOUT)
+{
+ this->directory = dir;
+}
+
+// #pragma mark -- IndexReader::CommitLockWith
+
+IndexReader::CommitLockWith::CommitLockWith(CL_NS(store)::LuceneLock* lock, IndexReader* r)
+ : CL_NS(store)::LuceneLockWith<void>(lock,IndexWriter::COMMIT_LOCK_TIMEOUT)
+ , reader(r)
+{
+}
+
+void IndexReader::CommitLockWith::doBody()
+{
+ reader->doCommit();
+ reader->segmentInfos->write(reader->getDirectory());
+}
+
+CL_NS_END