summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/clucene/src/CLucene/index/SegmentMerger.h
blob: 230843b00ec82ef86d58d3f637181f60e9b48814 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/*
 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
 *
 * Distributable under the terms of either the Apache License (Version 2.0) or 
 * the GNU Lesser General Public License, as specified in the COPYING file.
 *
 * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
*/
#ifndef _lucene_index_SegmentMerger_
#define _lucene_index_SegmentMerger_

#if defined(_LUCENE_PRAGMA_ONCE)
#   pragma once
#endif

#include <QtCore/QString>
#include <QtCore/QStringList>

#include "CLucene/store/Directory.h"
#include "CLucene/store/RAMDirectory.h"
#include "CLucene/util/VoidList.h"
#include "SegmentMergeInfo.h"
#include "SegmentMergeQueue.h"
#include "IndexWriter.h"
#include "FieldInfos.h"
#include "FieldsWriter.h"
#include "TermInfosWriter.h"

CL_NS_DEF(index)

/**
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
* into a single Segment.  After adding the appropriate readers, call the merge method to combine the 
* segments.
*<P> 
* If the compoundFile flag is set, then the segments will be merged into a compound file.
*   
* 
* @see #merge
* @see #add
*/
class SegmentMerger : LUCENE_BASE
{
	bool useCompoundFile;
	
	CL_NS(store)::RAMIndexOutput* skipBuffer;
	int32_t lastSkipDoc;
	int64_t lastSkipFreqPointer;
	int64_t lastSkipProxPointer;
	
	void resetSkip();
	void bufferSkip(int32_t doc);
	int64_t writeSkip();

	//Directory of the segment
	CL_NS(store)::Directory* directory;     
	//name of the new segment
	QString segment;
	//Set of IndexReaders
	CL_NS(util)::CLVector<IndexReader*,
        CL_NS(util)::Deletor::Object<IndexReader> > readers;
	//Field Infos for t	he FieldInfo instances of all fields
	FieldInfos* fieldInfos;

	//The queue that holds SegmentMergeInfo instances
	SegmentMergeQueue* queue;
	//IndexOutput to the new Frequency File
	CL_NS(store)::IndexOutput* freqOutput;
  	//IndexOutput to the new Prox File
	CL_NS(store)::IndexOutput* proxOutput;
	//Writes Terminfos that have been merged
	TermInfosWriter* termInfosWriter;
	TermInfo termInfo; //(new) minimize consing

    int32_t termIndexInterval;
	int32_t skipInterval;

public:
	/**
	* 
	* @param dir The Directory to merge the other segments into
	* @param name The name of the new segment
	* @param compoundFile true if the new segment should use a compoundFile
	*/
	SegmentMerger( IndexWriter* writer, const QString& name );

	//Destructor
	~SegmentMerger();
	
	/**
	* Add an IndexReader to the collection of readers that are to be merged
	* @param reader
	*/
	void add(IndexReader* reader);
	
	/**
	* 
	* @param i The index of the reader to return
	* @return The ith reader to be merged
	*/
	IndexReader* segmentReader(const int32_t i);
	
	/**
	* Merges the readers specified by the {@link #add} method into the
    * directory passed to the constructor
	* @return The number of documents that were merged
	* @throws IOException
	*/
	int32_t merge();
	/**
	* close all IndexReaders that have been added.
	* Should not be called before merge().
	* @throws IOException
	*/
	void closeReaders();
private:
	void addIndexed(IndexReader* reader, FieldInfos* fieldInfos,
        CL_NS(util)::StringArrayWithDeletor& names, 
		bool storeTermVectors, bool storePositionWithTermVector,
		bool storeOffsetWithTermVector);

	/**
	* Merge the fields of all segments 
	* @return The number of documents in all of the readers
	* @throws IOException
	*/
	int32_t mergeFields();

	/**
	* Merge the TermVectors from each of the segments into the new one.
	* @throws IOException
	*/
  	void mergeVectors();

	/** Merge the terms of all segments */
	void mergeTerms();

	/** Merges all TermInfos into a single segment */
	void mergeTermInfos();

	/** Merge one term found in one or more segments. The array <code>smis</code>
	*  contains segments that are positioned at the same term. <code>N</code>
	*  is the number of cells in the array actually occupied.
	*
	* @param smis array of segments
	* @param n number of cells in the array actually occupied
	*/
	void mergeTermInfo( SegmentMergeInfo** smis);
	    
	/** Process postings from multiple segments all positioned on the
	*  same term. Writes out merged entries into freqOutput and
	*  the proxOutput streams.
	*
	* @param smis array of segments
	* @param n number of cells in the array actually occupied
	* @return number of documents across all segments where this term was found
	*/
	int32_t appendPostings(SegmentMergeInfo** smis);

	//Merges the norms for all fields 
	void mergeNorms();
	
	void createCompoundFile(const QString& filename, QStringList& files);
	friend class IndexWriter; //allow IndexWriter to use createCompoundFile
};

CL_NS_END

#endif