summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/index/SegmentMerger.h
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/SegmentMerger.h')
-rw-r--r--3rdparty/clucene/src/CLucene/index/SegmentMerger.h169
1 files changed, 169 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/SegmentMerger.h b/3rdparty/clucene/src/CLucene/index/SegmentMerger.h
new file mode 100644
index 000000000..230843b00
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/SegmentMerger.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+ *
+ * Distributable under the terms of either the Apache License (Version 2.0) or
+ * the GNU Lesser General Public License, as specified in the COPYING file.
+ *
+ * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
+*/
+#ifndef _lucene_index_SegmentMerger_
+#define _lucene_index_SegmentMerger_
+
+#if defined(_LUCENE_PRAGMA_ONCE)
+# pragma once
+#endif
+
+#include <QtCore/QString>
+#include <QtCore/QStringList>
+
+#include "CLucene/store/Directory.h"
+#include "CLucene/store/RAMDirectory.h"
+#include "CLucene/util/VoidList.h"
+#include "SegmentMergeInfo.h"
+#include "SegmentMergeQueue.h"
+#include "IndexWriter.h"
+#include "FieldInfos.h"
+#include "FieldsWriter.h"
+#include "TermInfosWriter.h"
+
+CL_NS_DEF(index)
+
+/**
+* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
+* into a single Segment. After adding the appropriate readers, call the merge method to combine the
+* segments.
+*<P>
+* If the compoundFile flag is set, then the segments will be merged into a compound file.
+*
+*
+* @see #merge
+* @see #add
+*/
+class SegmentMerger : LUCENE_BASE
+{
+ bool useCompoundFile;
+
+ CL_NS(store)::RAMIndexOutput* skipBuffer;
+ int32_t lastSkipDoc;
+ int64_t lastSkipFreqPointer;
+ int64_t lastSkipProxPointer;
+
+ void resetSkip();
+ void bufferSkip(int32_t doc);
+ int64_t writeSkip();
+
+ //Directory of the segment
+ CL_NS(store)::Directory* directory;
+ //name of the new segment
+ QString segment;
+ //Set of IndexReaders
+ CL_NS(util)::CLVector<IndexReader*,
+ CL_NS(util)::Deletor::Object<IndexReader> > readers;
+ //Field Infos for t he FieldInfo instances of all fields
+ FieldInfos* fieldInfos;
+
+ //The queue that holds SegmentMergeInfo instances
+ SegmentMergeQueue* queue;
+ //IndexOutput to the new Frequency File
+ CL_NS(store)::IndexOutput* freqOutput;
+ //IndexOutput to the new Prox File
+ CL_NS(store)::IndexOutput* proxOutput;
+ //Writes Terminfos that have been merged
+ TermInfosWriter* termInfosWriter;
+ TermInfo termInfo; //(new) minimize consing
+
+ int32_t termIndexInterval;
+ int32_t skipInterval;
+
+public:
+ /**
+ *
+ * @param dir The Directory to merge the other segments into
+ * @param name The name of the new segment
+ * @param compoundFile true if the new segment should use a compoundFile
+ */
+ SegmentMerger( IndexWriter* writer, const QString& name );
+
+ //Destructor
+ ~SegmentMerger();
+
+ /**
+ * Add an IndexReader to the collection of readers that are to be merged
+ * @param reader
+ */
+ void add(IndexReader* reader);
+
+ /**
+ *
+ * @param i The index of the reader to return
+ * @return The ith reader to be merged
+ */
+ IndexReader* segmentReader(const int32_t i);
+
+ /**
+ * Merges the readers specified by the {@link #add} method into the
+ * directory passed to the constructor
+ * @return The number of documents that were merged
+ * @throws IOException
+ */
+ int32_t merge();
+ /**
+ * close all IndexReaders that have been added.
+ * Should not be called before merge().
+ * @throws IOException
+ */
+ void closeReaders();
+private:
+ void addIndexed(IndexReader* reader, FieldInfos* fieldInfos,
+ CL_NS(util)::StringArrayWithDeletor& names,
+ bool storeTermVectors, bool storePositionWithTermVector,
+ bool storeOffsetWithTermVector);
+
+ /**
+ * Merge the fields of all segments
+ * @return The number of documents in all of the readers
+ * @throws IOException
+ */
+ int32_t mergeFields();
+
+ /**
+ * Merge the TermVectors from each of the segments into the new one.
+ * @throws IOException
+ */
+ void mergeVectors();
+
+ /** Merge the terms of all segments */
+ void mergeTerms();
+
+ /** Merges all TermInfos into a single segment */
+ void mergeTermInfos();
+
+ /** Merge one term found in one or more segments. The array <code>smis</code>
+ * contains segments that are positioned at the same term. <code>N</code>
+ * is the number of cells in the array actually occupied.
+ *
+ * @param smis array of segments
+ * @param n number of cells in the array actually occupied
+ */
+ void mergeTermInfo( SegmentMergeInfo** smis);
+
+ /** Process postings from multiple segments all positioned on the
+ * same term. Writes out merged entries into freqOutput and
+ * the proxOutput streams.
+ *
+ * @param smis array of segments
+ * @param n number of cells in the array actually occupied
+ * @return number of documents across all segments where this term was found
+ */
+ int32_t appendPostings(SegmentMergeInfo** smis);
+
+ //Merges the norms for all fields
+ void mergeNorms();
+
+ void createCompoundFile(const QString& filename, QStringList& files);
+ friend class IndexWriter; //allow IndexWriter to use createCompoundFile
+};
+
+CL_NS_END
+
+#endif