1 files changed, 425 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/IndexWriter.h b/3rdparty/clucene/src/CLucene/index/IndexWriter.h
new file mode 100644
index 000000000..80476c864
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/IndexWriter.h
@@ -0,0 +1,425 @@
+/*
+ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+ *
+ * Distributable under the terms of either the Apache License (Version 2.0) or 
+ * the GNU Lesser General Public License, as specified in the COPYING file.
+ *
+ * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
+*/
+#ifndef _lucene_index_IndexWriter_
+#define _lucene_index_IndexWriter_
+
+#if defined(_LUCENE_PRAGMA_ONCE)
+#   pragma once
+#endif
+
+#include <QtCore/QString>
+#include <QtCore/QStringList>
+
+#include "CLucene/analysis/AnalysisHeader.h"
+#include "CLucene/util/VoidList.h"
+#include "CLucene/search/Similarity.h"
+#include "CLucene/store/Lock.h"
+#include "CLucene/store/TransactionalRAMDirectory.h"
+
+#include "SegmentHeader.h"
+
+CL_NS_DEF(index)
+
+/**
+An IndexWriter creates and maintains an index.
+
+The third argument to the 
+<a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a>
+determines whether a new index is created, or whether an existing index is
+opened for the addition of new documents.
+
+In either case, documents are added with the <a
+href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a> method.  
+When finished adding documents, <a href="#close()"><b>close</b></a> should be called.
+
+<p>If an index will not have more documents added for a while and optimal search
+performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
+method should be called before the index is closed.
+
+<p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open
+another IndexWriter on the same directory will lead to an IOException. The IOException
+is also thrown if an IndexReader on the same directory is used to delete documents
+from the index.
+
+@see IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion
+*/
+class IndexWriter : LUCENE_BASE
+{
+	class LockWith2 : public CL_NS(store)::LuceneLockWith<void>
+    {
+	public:
+		LockWith2(CL_NS(store)::LuceneLock* lock,
+                  int64_t lockWaitTimeout,
+                  IndexWriter* wr,
+                  CL_NS(util)::CLVector<SegmentReader*>* std,
+                  bool create);
+
+        ~LockWith2() {}
+
+		void doBody();
+
+    private:
+        bool create;
+        IndexWriter* writer;
+		CL_NS(util)::CLVector<SegmentReader*>* segmentsToDelete;
+	};
+	friend class LockWith2;
+
+	class LockWithCFS : public CL_NS(store)::LuceneLockWith<void>
+    {
+	public:
+		LockWithCFS(CL_NS(store)::LuceneLock* lock,
+                    int64_t lockWaitTimeout,
+                    CL_NS(store)::Directory* dir,
+                    IndexWriter* wr,
+                    const QString& segName,
+                    const QStringList& ftd);
+		
+        ~LockWithCFS() {}
+
+        void doBody();
+
+    private:
+		QString segName;        
+        IndexWriter* writer;
+		CL_NS(store)::Directory* directory;
+		QStringList filesToDelete;
+	};
+    friend class IndexWriter::LockWithCFS;
+
+    // indicates if the writers is open - this way close can be called multiple
+    // times
+    bool isOpen;
+
+	// how to analyze text
+	CL_NS(analysis)::Analyzer* analyzer;
+
+	CL_NS(search)::Similarity* similarity; // how to normalize
+
+	/** Use compound file setting. Normally defaults to true, except when
+	* using a RAMDirectory. This minimizes the number of files used.  
+	* Setting this to false may improve indexing performance, but
+	* may also cause file handle problems.
+	*/
+	bool useCompoundFile;
+	bool closeDir;
+
+    // for temp segs
+	CL_NS(store)::TransactionalRAMDirectory* ramDirectory;
+
+	CL_NS(store)::LuceneLock* writeLock;
+
+	void _IndexWriter(const bool create);
+
+	void _finalize();
+
+	// where this index resides
+	CL_NS(store)::Directory* directory;		
+		
+		
+	int32_t getSegmentsCounter() { return segmentInfos.counter; }
+	int32_t maxFieldLength;
+	int32_t mergeFactor;
+	int32_t minMergeDocs;
+	int32_t maxMergeDocs;
+	int32_t termIndexInterval;
+
+	int64_t writeLockTimeout;
+	int64_t commitLockTimeout;
+public:
+	DEFINE_MUTEX(THIS_LOCK)
+	
+	// Release the write lock, if needed. 
+	SegmentInfos segmentInfos;
+  
+	// Release the write lock, if needed.
+	~IndexWriter();
+
+	/**
+	*  The Java implementation of Lucene silently truncates any tokenized
+	*  field if the number of tokens exceeds a certain threshold.  Although
+	*  that threshold is adjustable, it is easy for the client programmer
+	*  to be unaware that such a threshold exists, and to become its
+	*  unwitting victim.
+	*  CLucene implements a less insidious truncation policy.  Up to
+	*  DEFAULT_MAX_FIELD_LENGTH tokens, CLucene behaves just as JLucene
+	*  does.  If the number of tokens exceeds that threshold without any
+	*  indication of a truncation preference by the client programmer,
+	*  CLucene raises an exception, prompting the client programmer to
+	*  explicitly set a truncation policy by adjusting maxFieldLength.
+	*/
+	LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MAX_FIELD_LENGTH = 10000);
+	LUCENE_STATIC_CONSTANT(int32_t, FIELD_TRUNC_POLICY__WARN = -1);
+	int32_t getMaxFieldLength() const{ return maxFieldLength; }
+	void setMaxFieldLength(int32_t val){ maxFieldLength = val; }
+
+	/**
+	* Default value is 10. Change using {@link #setMaxBufferedDocs(int)}.
+	*/
+	LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MAX_BUFFERED_DOCS = 10);
+	/** Determines the minimal number of documents required before the buffered
+	* in-memory documents are merging and a new Segment is created.
+	* Since Documents are merged in a {@link RAMDirectory},
+	* large value gives faster indexing.  At the same time, mergeFactor limits
+	* the number of files open in a FSDirectory.
+	*
+	* <p> The default value is DEFAULT_MAX_BUFFERED_DOCS.*/
+	void setMaxBufferedDocs(int32_t val){ minMergeDocs = val; }
+	/**
+	* @see #setMaxBufferedDocs
+	*/
+	int32_t getMaxBufferedDocs(){ return minMergeDocs; }
+	
+	/**
+	* Default value for the write lock timeout (1,000).
+	*/
+	LUCENE_STATIC_CONSTANT(int64_t, WRITE_LOCK_TIMEOUT = 1000);
+	/**
+	* Sets the maximum time to wait for a write lock (in milliseconds).
+	*/
+	void setWriteLockTimeout(int64_t writeLockTimeout)
+    { this->writeLockTimeout = writeLockTimeout; }
+	/**
+	* @see #setWriteLockTimeout
+	*/
+	int64_t getWriteLockTimeout() { return writeLockTimeout; }
+	
+	/**
+	* Default value for the commit lock timeout (10,000).
+	*/
+	LUCENE_STATIC_CONSTANT(int64_t, COMMIT_LOCK_TIMEOUT = 10000);
+	/**
+	* Sets the maximum time to wait for a commit lock (in milliseconds).
+	*/
+	void setCommitLockTimeout(int64_t commitLockTimeout)
+    { this->commitLockTimeout = commitLockTimeout; }
+	/**
+	* @see #setCommitLockTimeout
+	*/
+	int64_t getCommitLockTimeout() { return commitLockTimeout; }
+
+	static const QLatin1String WRITE_LOCK_NAME; //"write.lock";
+	static const QLatin1String COMMIT_LOCK_NAME; //"commit.lock";
+	
+	/**
+	* Default value is 10. Change using {@link #setMergeFactor(int)}.
+	*/
+	LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MERGE_FACTOR = 10);
+	/* Determines how often segment indices are merged by addDocument().  With
+	*  smaller values, less RAM is used while indexing, and searches on
+	*  unoptimized indices are faster, but indexing speed is slower.  With larger
+	*  values more RAM is used while indexing and searches on unoptimized indices
+	*  are slower, but indexing is faster.  Thus larger values (> 10) are best
+	*  for batched index creation, and smaller values (< 10) for indices that are
+	*  interactively maintained.
+	*
+	* <p>This must never be less than 2.  The default value is 10.
+	*/
+	int32_t getMergeFactor() const{ return mergeFactor; }
+	void setMergeFactor(int32_t val){ mergeFactor = val; }
+
+	
+	/** Expert: The fraction of terms in the "dictionary" which should be stored
+	*   in RAM.  Smaller values use more memory, but make searching slightly
+	*   faster, while larger values use less memory and make searching slightly
+	*   slower.  Searching is typically not dominated by dictionary lookup, so
+	*   tweaking this is rarely useful.
+	*/
+	LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_TERM_INDEX_INTERVAL = 128);
+	/** Expert: Set the interval between indexed terms.  Large values cause less
+	* memory to be used by IndexReader, but slow random-access to terms.  Small
+	* values cause more memory to be used by an IndexReader, and speed
+	* random-access to terms.
+	*
+	* This parameter determines the amount of computation required per query
+	* term, regardless of the number of documents that contain that term.  In
+	* particular, it is the maximum number of other terms that must be
+	* scanned before a term is located and its frequency and position information
+	* may be processed.  In a large index with user-entered query terms, query
+	* processing time is likely to be dominated not by term lookup but rather
+	* by the processing of frequency and positional data.  In a small index
+	* or when many uncommon query terms are generated (e.g., by wildcard
+	* queries) term lookup may become a dominant cost.
+	*
+	* In particular, <code>numUniqueTerms/interval</code> terms are read into
+	* memory by an IndexReader, and, on average, <code>interval/2</code> terms
+	* must be scanned for each random term access.
+	*
+	* @see #DEFAULT_TERM_INDEX_INTERVAL
+	*/
+	void setTermIndexInterval(int32_t interval) { termIndexInterval = interval; }
+	/** Expert: Return the interval between indexed terms.
+	*
+	* @see #setTermIndexInterval(int)
+	*/
+	int32_t getTermIndexInterval() { return termIndexInterval; }
+  
+	/** Determines the minimal number of documents required before the buffered
+	* in-memory documents are merging and a new Segment is created.
+	* Since Documents are merged in a {@link RAMDirectory},
+	* large value gives faster indexing.  At the same time, mergeFactor limits
+	* the number of files open in a FSDirectory.
+	*
+	* <p> The default value is 10.*/
+	int32_t getMinMergeDocs() const{ return minMergeDocs; }
+	void setMinMergeDocs(int32_t val){ minMergeDocs = val; }
+
+	/** Determines the largest number of documents ever merged by addDocument().
+	* Small values (e.g., less than 10,000) are best for interactive indexing,
+	* as this limits the length of pauses while indexing to a few seconds.
+	* Larger values are best for batched indexing and speedier searches.
+	*
+	* <p>The default value is {@link #DEFAULT_MAX_MERGE_DOCS}.
+	*/
+	LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MAX_MERGE_DOCS = LUCENE_INT32_MAX_SHOULDBE);
+	/**Determines the largest number of documents ever merged by addDocument().
+	*  Small values (e.g., less than 10,000) are best for interactive indexing,
+	*  as this limits the length of pauses while indexing to a few seconds.
+	*  Larger values are best for batched indexing and speedier searches.
+	*
+	*  <p>The default value is {@link Integer#MAX_VALUE}.
+	*/
+	int32_t getMaxMergeDocs() const{ return maxMergeDocs; }
+	void setMaxMergeDocs(int32_t val){ maxMergeDocs = val; }
+
+	/**
+	* Constructs an IndexWriter for the index in <code>path</code>.
+	* Text will be analyzed with <code>a</code>.  If <code>create</code>
+	* is true, then a new, empty index will be created in
+	* <code>path</code>, replacing the index already there, if any.
+	*
+	* @param path the path to the index directory
+	* @param a the analyzer to use
+	* @param create <code>true</code> to create the index or overwrite
+	*  the existing one; <code>false</code> to append to the existing
+	*  index
+	* @throws IOException if the directory cannot be read/written to, or
+	*  if it does not exist, and <code>create</code> is
+	*  <code>false</code>
+	*/
+	IndexWriter(const QString& path, CL_NS(analysis)::Analyzer* a,
+        const bool create, const bool closeDir = true);
+	
+	
+	/**Constructs an IndexWriter for the index in <code>d</code>.  Text will be
+	*  analyzed with <code>a</code>.  If <code>create</code> is true, then a new,
+	*  empty index will be created in <code>d</code>, replacing the index already
+	*  there, if any.
+	*/
+	IndexWriter(CL_NS(store)::Directory* d, CL_NS(analysis)::Analyzer* a,
+        const bool create, const bool closeDir = false);
+
+    // Flushes all changes to an index, closes all associated files, and closes
+    // the directory that the index is stored in.
+	void close();
+
+	// Returns the number of documents currently in this index. synchronized
+	int32_t docCount();
+
+
+    // Adds a document to this index, using the provided analyzer instead of
+    // the value of {@link #getAnalyzer()}.  If the document contains more than
+    // {@link #setMaxFieldLength(int)} terms for a given field, the remainder
+    // are discarded.
+	void addDocument(CL_NS(document)::Document* doc,
+        CL_NS(analysis)::Analyzer* analyzer = NULL);
+  
+
+	// Merges all segments together into a single segment, optimizing an index
+	// for search. synchronized
+	void optimize();
+
+
+	/**Merges all segments from an array of indices into this index.
+	*  
+	*  <p>This may be used to parallelize batch indexing.  A large document
+	*  collection can be broken into sub-collections.  Each sub-collection can be
+	*  indexed in parallel, on a different thread, process or machine.  The
+	*  complete index can then be created by merging sub-collection indices
+	*  with this method.
+	*
+	*  <p>After this completes, the index is optimized.
+	*@synchronized
+	*/
+	void addIndexes(CL_NS(store)::Directory** dirs);
+		
+	/** Merges the provided indexes into this index.
+	* <p>After this completes, the index is optimized. </p>
+	* <p>The provided IndexReaders are not closed.</p>
+	*/
+	void addIndexes(IndexReader** readers);
+
+
+	/** Returns the directory this index resides in. */
+	CL_NS(store)::Directory* getDirectory() { return directory; }
+
+	/** Get the current setting of whether to use the compound file format.
+	*  Note that this just returns the value you set with setUseCompoundFile(boolean)
+	*  or the default. You cannot use this to query the status of an existing index.
+	*  @see #setUseCompoundFile(boolean)
+	*/
+	bool getUseCompoundFile() { return useCompoundFile; }
+
+	/** Setting to turn on usage of a compound file. When on, multiple files
+	*  for each segment are merged into a single file once the segment creation
+	*  is finished. This is done regardless of what directory is in use.
+	*/
+	void setUseCompoundFile(bool value) { useCompoundFile = value; }
+
+
+	/** Expert: Set the Similarity implementation used by this IndexWriter.
+	*
+	* @see Similarity#setDefault(Similarity)
+	*/
+	void setSimilarity(CL_NS(search)::Similarity* similarity)
+    { this->similarity = similarity; }
+
+	/** Expert: Return the Similarity implementation used by this IndexWriter.
+	*
+	* <p>This defaults to the current value of {@link Similarity#getDefault()}.
+	*/
+	CL_NS(search)::Similarity* getSimilarity() { return this->similarity; }
+
+	/** Returns the analyzer used by this index. */
+	CL_NS(analysis)::Analyzer* getAnalyzer() { return analyzer; }
+
+private:
+	/** Merges all RAM-resident segments. */
+	void flushRamSegments();
+
+	/** Incremental segment merger. */
+	void maybeMergeSegments();
+
+	// Pops segments off of segmentInfos stack down to minSegment, merges them,
+	// and pushes the merged index onto the top of the segmentInfos stack.
+	void mergeSegments(const uint32_t minSegment);
+	
+	// Merges the named range of segments, replacing them in the stack with a
+	// single segment.
+	void mergeSegments(const uint32_t minSegment, const uint32_t end);
+
+    // Some operating systems (e.g. Windows) don't permit a file to be deleted
+    // while it is opened for read (e.g. by another process or thread). So we
+    // assume that when a delete fails it is because the file is open in another
+    // process, and queue the file for subsequent deletion.
+	void deleteSegments(CL_NS(util)::CLVector<SegmentReader*>* segments);
+
+	void deleteFiles(const QStringList& files);
+	void readDeleteableFiles(QStringList& files);
+	void deleteFiles(const QStringList& files, QStringList& deletable);
+    void deleteFiles(const QStringList& files, CL_NS(store)::Directory* directory);
+	void writeDeleteableFiles(const QStringList& files);
+
+	// synchronized
+	QString newSegmentName();
+};
+
+CL_NS_END
+
+#endif