summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/index/MultiReader.h
blob: 1d76814e1961210a5803581a96ce6a3e708787a4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_index_MultiReader
#define _lucene_index_MultiReader

#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif

#include "SegmentHeader.h"
#include "IndexReader.h"
#include "CLucene/document/Document.h"
#include "Terms.h"
#include "SegmentMergeQueue.h"

CL_NS_DEF(index)

/** An IndexReader which reads multiple indexes, appending their content.
*/
class MultiTermDocs:public virtual TermDocs {
private:
protected:
	TermDocs** readerTermDocs;

	IndexReader** subReaders;
  int32_t subReadersLength;
	const int32_t* starts;
	Term* term;

	int32_t base;
	int32_t pointer;

	TermDocs* current;              // == segTermDocs[pointer]
public:
	MultiTermDocs();
	MultiTermDocs(IndexReader** subReaders, const int32_t* s);
	virtual ~MultiTermDocs();

	int32_t doc() const;
	int32_t freq() const;

    void seek(TermEnum* termEnum);
	void seek(Term* tterm);
	bool next();

	/** Optimized implementation. */
	int32_t read(int32_t* docs, int32_t* freqs, int32_t length);

	/** As yet unoptimized implementation. */
	bool skipTo(const int32_t target);

	void close();

	virtual TermPositions* __asTermPositions();
protected:
	virtual TermDocs* termDocs(const IndexReader* reader) const;
private:
	TermDocs* termDocs(const int32_t i) const;

};


//MultiTermEnum represents the enumeration of all terms of all readers
class MultiTermEnum:public TermEnum {
private:
	SegmentMergeQueue* queue;

	Term* _term;
	int32_t _docFreq;
public:
	//Constructor
	//Opens all enumerations of all readers
	MultiTermEnum(IndexReader** subReaders, const int32_t* starts, const Term* t);

	//Destructor
	~MultiTermEnum();

	//Move the current term to the next in the set of enumerations
	bool next();

	//Returns a pointer to the current term of the set of enumerations
	Term* term();
	Term* term(bool pointer);

	//Returns the document frequency of the current term in the set
	int32_t docFreq() const;

	//Closes the set of enumerations in the queue
	void close();

		
	const char* getObjectName(){ return MultiTermEnum::getClassName(); }
	static const char* getClassName(){ return "MultiTermEnum"; }
};


class MultiTermPositions:public MultiTermDocs,public TermPositions {
public:
	MultiTermPositions(IndexReader** subReaders, const int32_t* s);
	~MultiTermPositions() {};
	int32_t nextPosition();


	virtual TermDocs* __asTermDocs();
	virtual TermPositions* __asTermPositions();
protected:
	TermDocs* termDocs(const IndexReader* reader) const;
};


class MultiReader:public IndexReader{
private:
	bool _hasDeletions;
	IndexReader** subReaders;
	int32_t subReadersLength;
	int32_t* starts;			  // 1st docno for each segment

	CL_NS(util)::CLHashtable<const TCHAR*,uint8_t*,
		CL_NS(util)::Compare::TChar,
			CL_NS(util)::Equals::TChar,
		CL_NS(util)::Deletor::tcArray,
		CL_NS(util)::Deletor::Array<uint8_t> > normsCache;
	int32_t _maxDoc;
	int32_t _numDocs;
	void initialize(IndexReader** subReaders);
  
	int32_t readerIndex(const int32_t n) const;
	
	bool hasNorms(const TCHAR* field);
	uint8_t* ones;
	uint8_t* fakeNorms();
protected:
	void doSetNorm(int32_t n, const TCHAR* field, uint8_t value);
	void doUndeleteAll();
	void doCommit();
	// synchronized
	void doClose();
	
	// synchronized
	void doDelete(const int32_t n);
public:
	/** Construct reading the named set of readers. */
	MultiReader(CL_NS(store)::Directory* directory, SegmentInfos* sis, IndexReader** subReaders);

	/**
	* <p>Construct a MultiReader aggregating the named set of (sub)readers.
	* Directory locking for delete, undeleteAll, and setNorm operations is
	* left to the subreaders. </p>
	* <p>Note that all subreaders are closed if this Multireader is closed.</p>
	* @param subReaders set of (sub)readers
	* @throws IOException
	*/
	MultiReader(IndexReader** subReaders);

	~MultiReader();

	/** Return an array of term frequency vectors for the specified document.
	*  The array contains a vector for each vectorized field in the document.
	*  Each vector vector contains term numbers and frequencies for all terms
	*  in a given vectorized field.
	*  If no such fields existed, the method returns null.
	*/
	bool getTermFreqVectors(int32_t n, Array<TermFreqVector*>& result);
	TermFreqVector* getTermFreqVector(int32_t n, const TCHAR* field);


	// synchronized
	int32_t numDocs();

	int32_t maxDoc() const;

	bool document(int32_t n, CL_NS(document)::Document* doc);

	bool isDeleted(const int32_t n);
	bool hasDeletions() const{ return _hasDeletions; }

	// synchronized
	uint8_t* norms(const TCHAR* field);
	void norms(const TCHAR* field, uint8_t* result);

	TermEnum* terms() const;
	TermEnum* terms(const Term* term) const;
	
	//Returns the document frequency of the current term in the set
	int32_t docFreq(const Term* t=NULL) const;
	TermDocs* termDocs() const;
	TermPositions* termPositions() const;
	
		
	/**
	* @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
	*/
	void getFieldNames(FieldOption fldOption, CL_NS(util)::StringArrayWithDeletor& retarray);
};


CL_NS_END
#endif