3rdparty/clucene/src/CLucene/search/Sort.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_search_Sort_
#define _lucene_search_Sort_

#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif

#include "CLucene/index/IndexReader.h"
#include "SearchHeader.h"

CL_NS_DEF(search)

 class SortField; //predefine
 class Sort;

/**
 * Expert: Compares two ScoreDoc objects for sorting.
 *
 */
 class ScoreDocComparator:LUCENE_BASE {
 protected:
	 ScoreDocComparator(){}
 public:
	 virtual ~ScoreDocComparator();
//    CL_NS(util)::Comparable** cachedValues;
//    ScoreDocComparator(CL_NS(util)::Comparable** cachedValues);

	/**
	 * Compares two ScoreDoc objects and returns a result indicating their
	 * sort order.
	 * @param i First ScoreDoc
	 * @param j Second ScoreDoc
	 * @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
	 * @see java.util.Comparator
	 */
    virtual int32_t compare (struct ScoreDoc* i, struct ScoreDoc* j) = 0;

	/**
	 * Returns the value used to sort the given document.  The
	 * object returned must implement the java.io.Serializable
	 * interface.  This is used by multisearchers to determine how to collate results from their searchers.
	 * @see FieldDoc
	 * @param i Document
	 * @return Serializable object
	 */
    virtual CL_NS(util)::Comparable* sortValue (struct ScoreDoc* i) = 0;

	
	/**
	 * Returns the type of sort.  Should return <code>SortField.SCORE</code>, <code>SortField.DOC</code>, <code>SortField.STRING</code>, <code>SortField.INTEGER</code>, 
	 * <code>SortField::FLOAT</code> or <code>SortField.CUSTOM</code>.  It is not valid to return <code>SortField.AUTO</code>.
	 * This is used by multisearchers to determine how to collate results from their searchers.
	 * @return One of the constants in SortField.
	 * @see SortField
	 */
    virtual int32_t sortType() = 0;

	/** Special comparator for sorting hits according to computed relevance (document score). */
	static ScoreDocComparator* RELEVANCE;

	/** Special comparator for sorting hits according to index order (document number). */
	static ScoreDocComparator* INDEXORDER;
 };

/**
 * Expert: returns a comparator for sorting ScoreDocs.
 *
 */
class SortComparatorSource:LUCENE_BASE {
public:
   virtual ~SortComparatorSource(){
   }

   /**
   * return a reference to a string describing the name of the comparator
   * this is used in the explanation
   */
   virtual TCHAR* getName() = 0;

   virtual size_t hashCode() = 0;

  /**
   * Creates a comparator for the field in the given index.
   * @param reader Index to create comparator for.
   * @param fieldname  Field to create comparator for.
   * @return Comparator of ScoreDoc objects.
   * @throws IOException If an error occurs reading the index.
   */
   virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname) = 0;
};


/**
 * Abstract base class for sorting hits returned by a Query.
 *
 * <p>This class should only be used if the other SortField
 * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an
 * adequate sorting.  It maintains an internal cache of values which
 * could be quite large.  The cache is an array of Comparable,
 * one for each document in the index.  There is a distinct
 * Comparable for each unique term in the field - if
 * some documents have the same term in the field, the cache
 * array will have entries which reference the same Comparable.
 *
 */
class SortComparator: public SortComparatorSource {
public:
	virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname);
  
	SortComparator();
	virtual ~SortComparator();

  /**
   * Returns an object which, when sorted according to natural order,
   * will order the Term values in the correct order.
   * <p>For example, if the Terms contained integer values, this method
   * would return <code>new Integer(termtext)</code>.  Note that this
   * might not always be the most efficient implementation - for this
   * particular example, a better implementation might be to make a
   * ScoreDocLookupComparator that uses an internal lookup table of int.
   * @param termtext The textual value of the term.
   * @return An object representing <code>termtext</code> that sorts 
   * according to the natural order of <code>termtext</code>.
   * @see Comparable
   * @see ScoreDocComparator
   */
   virtual CL_NS(util)::Comparable* getComparable (const TCHAR* termtext) = 0;

};


/**
 * Stores information about how to sort documents by terms in an individual
 * field.  Fields must be indexed in order to sort by them.
 *
 */
class SortField:LUCENE_BASE {
private:
  const TCHAR* field;
  int32_t type;  // defaults to determining type dynamically
  //Locale* locale;    // defaults to "natural order" (no Locale)
  bool reverse;  // defaults to natural order
  SortComparatorSource* factory;

protected:
  SortField (const SortField& clone);
public:
   virtual ~SortField();

  /** Sort by document score (relevancy).  Sort values are Float and higher
   * values are at the front. 
   * PORTING: this is the same as SCORE in java, it had to be renamed because
   * SCORE is a system macro on some platforms (AIX).
   */
   LUCENE_STATIC_CONSTANT(int32_t, DOCSCORE=0);
   
  /** Sort by document number (index order).  Sort values are Integer and lower
   * values are at the front. */
   LUCENE_STATIC_CONSTANT(int32_t, DOC=1);

  /** Guess type of sort based on field contents.  A regular expression is used
   * to look at the first term indexed for the field and determine if it
   * represents an integer number, a floating point number, or just arbitrary
   * string characters. */
   LUCENE_STATIC_CONSTANT(int32_t, AUTO=2);

  /** Sort using term values as Strings.  Sort values are String and lower
   * values are at the front. */
   LUCENE_STATIC_CONSTANT(int32_t, STRING=3);

  /** Sort using term values as encoded Integers.  Sort values are Integer and
   * lower values are at the front. */
   LUCENE_STATIC_CONSTANT(int32_t, INT=4);

  /** Sort using term values as encoded Floats.  Sort values are Float and
   * lower values are at the front. */
   LUCENE_STATIC_CONSTANT(int32_t, FLOAT=5);

  /** Sort using a custom Comparator.  Sort values are any Comparable and
   * sorting is done according to natural order. */
   LUCENE_STATIC_CONSTANT(int32_t, CUSTOM=9);

  // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace"
  // as the above static int values.  Any new values must not have the same value
  // as FieldCache.STRING_INDEX.

  /** Represents sorting by document score (relevancy). */
  static SortField* FIELD_SCORE;

  /** Represents sorting by document number (index order). */
  static SortField* FIELD_DOC;

  SortField (const TCHAR* field);
  //SortField (const TCHAR* field, bool reverse);
  //todo: we cannot make reverse use default field of =false.
  //because bool and int are the same type in c, overloading is not possible
  SortField (const TCHAR* field, int32_t type, bool reverse); 

  /*
   SortField (TCHAR* field, Locale* locale) {
   SortField (TCHAR* field, Locale* locale, bool reverse);*/

  SortField (const TCHAR* field, SortComparatorSource* comparator, bool reverse=false);

  /** Returns the name of the field.  Could return <code>null</code>
   * if the sort is by SCORE or DOC.
   * @return Name of field, possibly <code>null</code>.
   */
  const TCHAR* getField() const { return field; }
  
  SortField* clone() const;

  /** Returns the type of contents in the field.
   * @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT.
   */
  int32_t getType() const { return type; }

  /** Returns the Locale by which term values are interpreted.
   * May return <code>null</code> if no Locale was specified.
   * @return Locale, or <code>null</code>.
   */
  /*Locale getLocale() {
    return locale;
  }*/

  /** Returns whether the sort should be reversed.
   * @return  True if natural order should be reversed.
   */
  bool getReverse() const { return reverse; }

  SortComparatorSource* getFactory() const { return factory; }

  TCHAR* toString() const;
};


/**
 * Encapsulates sort criteria for returned hits.
 *
 * <p>The fields used to determine sort order must be carefully chosen.
 * Documents must contain a single term in such a field,
 * and the value of the term should indicate the document's relative position in
 * a given sort order.  The field must be indexed, but should not be tokenized,
 * and does not need to be stored (unless you happen to want it back with the
 * rest of your document data).  In other words:
 *
 * <dl><dd><code>document.add (new Field ("byNumber", Integer.toString(x), false, true, false));</code>
 * </dd></dl>
 *
 * <p><h3>Valid Types of Values</h3>
 *
 * <p>There are three possible kinds of term values which may be put into
 * sorting fields: Integers, Floats, or Strings.  Unless
 * {@link SortField SortField} objects are specified, the type of value
 * in the field is determined by parsing the first term in the field.
 *
 * <p>Integer term values should contain only digits and an optional
 * preceeding negative sign.  Values must be base 10 and in the range
 * <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
 * Documents which should appear first in the sort
 * should have low value integers, later documents high values
 * (i.e. the documents should be numbered <code>1..n</code> where
 * <code>1</code> is the first and <code>n</code> the last).
 *
 * <p>Float term values should conform to values accepted by
 * {@link Float Float.valueOf(String)} (except that <code>NaN</code>
 * and <code>Infinity</code> are not supported).
 * Documents which should appear first in the sort
 * should have low values, later documents high values.
 *
 * <p>String term values can contain any valid String, but should
 * not be tokenized.  The values are sorted according to their
 * {@link Comparable natural order}.  Note that using this type
 * of term value has higher memory requirements than the other
 * two types.
 *
 * <p><h3>Object Reuse</h3>
 *
 * <p>One of these objects can be
 * used multiple times and the sort order changed between usages.
 *
 * <p>This class is thread safe.
 *
 * <p><h3>Memory Usage</h3>
 *
 * <p>Sorting uses of caches of term values maintained by the
 * internal HitQueue(s).  The cache is static and contains an integer
 * or float array of length <code>IndexReader.maxDoc()</code> for each field
 * name for which a sort is performed.  In other words, the size of the
 * cache in bytes is:
 *
 * <p><code>4 * IndexReader.maxDoc() * (# of different fields actually used to sort)</code>
 *
 * <p>For String fields, the cache is larger: in addition to the
 * above array, the value of every term in the field is kept in memory.
 * If there are many unique terms in the field, this could
 * be quite large.
 *
 * <p>Note that the size of the cache is not affected by how many
 * fields are in the index and <i>might</i> be used to sort - only by
 * the ones actually used to sort a result set.
 *
 * <p>The cache is cleared each time a new <code>IndexReader</code> is
 * passed in, or if the value returned by <code>maxDoc()</code>
 * changes for the current IndexReader.  This class is not set up to
 * be able to efficiently sort hits from more than one index
 * simultaneously.
 *
 */
class Sort:LUCENE_BASE {
	// internal representation of the sort criteria
	SortField** fields;
	void clear();
public:
	~Sort();

	/** Represents sorting by computed relevance. Using this sort criteria
	 * returns the same results as calling {@link Searcher#search(Query) Searcher#search()}
	 * without a sort criteria, only with slightly more overhead. */
	static Sort* RELEVANCE;

	/** Represents sorting by index order. */
	static Sort* INDEXORDER;

	Sort();
	Sort (const TCHAR* field, bool reverse=false);
	Sort (const TCHAR** fields);
	Sort (SortField* field);
	Sort (SortField** fields);
	void setSort (const TCHAR* field, bool reverse=false);
	void setSort (const TCHAR** fieldnames);
	void setSort (SortField* field);
	void setSort (SortField** fields);

    TCHAR* toString() const;
 
    /**
    * Representation of the sort criteria.
    * @return a pointer to the of SortField array used in this sort criteria
    */
    SortField** getSort() const{ return fields; }
};


CL_NS_END
#endif