summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/search/Sort.h
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/Sort.h')
-rw-r--r--3rdparty/clucene/src/CLucene/search/Sort.h356
1 files changed, 356 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/Sort.h b/3rdparty/clucene/src/CLucene/search/Sort.h
new file mode 100644
index 000000000..cfe96d56c
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/search/Sort.h
@@ -0,0 +1,356 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_search_Sort_
+#define _lucene_search_Sort_
+
+#if defined(_LUCENE_PRAGMA_ONCE)
+# pragma once
+#endif
+
+#include "CLucene/index/IndexReader.h"
+#include "SearchHeader.h"
+
+CL_NS_DEF(search)
+
+ class SortField; //predefine
+ class Sort;
+
+/**
+ * Expert: Compares two ScoreDoc objects for sorting.
+ *
+ */
+ class ScoreDocComparator:LUCENE_BASE {
+ protected:
+ ScoreDocComparator(){}
+ public:
+ virtual ~ScoreDocComparator();
+// CL_NS(util)::Comparable** cachedValues;
+// ScoreDocComparator(CL_NS(util)::Comparable** cachedValues);
+
+ /**
+ * Compares two ScoreDoc objects and returns a result indicating their
+ * sort order.
+ * @param i First ScoreDoc
+ * @param j Second ScoreDoc
+ * @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
+ * @see java.util.Comparator
+ */
+ virtual int32_t compare (struct ScoreDoc* i, struct ScoreDoc* j) = 0;
+
+ /**
+ * Returns the value used to sort the given document. The
+ * object returned must implement the java.io.Serializable
+ * interface. This is used by multisearchers to determine how to collate results from their searchers.
+ * @see FieldDoc
+ * @param i Document
+ * @return Serializable object
+ */
+ virtual CL_NS(util)::Comparable* sortValue (struct ScoreDoc* i) = 0;
+
+
+ /**
+ * Returns the type of sort. Should return <code>SortField.SCORE</code>, <code>SortField.DOC</code>, <code>SortField.STRING</code>, <code>SortField.INTEGER</code>,
+ * <code>SortField::FLOAT</code> or <code>SortField.CUSTOM</code>. It is not valid to return <code>SortField.AUTO</code>.
+ * This is used by multisearchers to determine how to collate results from their searchers.
+ * @return One of the constants in SortField.
+ * @see SortField
+ */
+ virtual int32_t sortType() = 0;
+
+ /** Special comparator for sorting hits according to computed relevance (document score). */
+ static ScoreDocComparator* RELEVANCE;
+
+ /** Special comparator for sorting hits according to index order (document number). */
+ static ScoreDocComparator* INDEXORDER;
+ };
+
+/**
+ * Expert: returns a comparator for sorting ScoreDocs.
+ *
+ */
+class SortComparatorSource:LUCENE_BASE {
+public:
+ virtual ~SortComparatorSource(){
+ }
+
+ /**
+ * return a reference to a string describing the name of the comparator
+ * this is used in the explanation
+ */
+ virtual TCHAR* getName() = 0;
+
+ virtual size_t hashCode() = 0;
+
+ /**
+ * Creates a comparator for the field in the given index.
+ * @param reader Index to create comparator for.
+ * @param fieldname Field to create comparator for.
+ * @return Comparator of ScoreDoc objects.
+ * @throws IOException If an error occurs reading the index.
+ */
+ virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname) = 0;
+};
+
+
+/**
+ * Abstract base class for sorting hits returned by a Query.
+ *
+ * <p>This class should only be used if the other SortField
+ * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an
+ * adequate sorting. It maintains an internal cache of values which
+ * could be quite large. The cache is an array of Comparable,
+ * one for each document in the index. There is a distinct
+ * Comparable for each unique term in the field - if
+ * some documents have the same term in the field, the cache
+ * array will have entries which reference the same Comparable.
+ *
+ */
+class SortComparator: public SortComparatorSource {
+public:
+ virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname);
+
+ SortComparator();
+ virtual ~SortComparator();
+
+ /**
+ * Returns an object which, when sorted according to natural order,
+ * will order the Term values in the correct order.
+ * <p>For example, if the Terms contained integer values, this method
+ * would return <code>new Integer(termtext)</code>. Note that this
+ * might not always be the most efficient implementation - for this
+ * particular example, a better implementation might be to make a
+ * ScoreDocLookupComparator that uses an internal lookup table of int.
+ * @param termtext The textual value of the term.
+ * @return An object representing <code>termtext</code> that sorts
+ * according to the natural order of <code>termtext</code>.
+ * @see Comparable
+ * @see ScoreDocComparator
+ */
+ virtual CL_NS(util)::Comparable* getComparable (const TCHAR* termtext) = 0;
+
+};
+
+
+/**
+ * Stores information about how to sort documents by terms in an individual
+ * field. Fields must be indexed in order to sort by them.
+ *
+ */
+class SortField:LUCENE_BASE {
+private:
+ const TCHAR* field;
+ int32_t type; // defaults to determining type dynamically
+ //Locale* locale; // defaults to "natural order" (no Locale)
+ bool reverse; // defaults to natural order
+ SortComparatorSource* factory;
+
+protected:
+ SortField (const SortField& clone);
+public:
+ virtual ~SortField();
+
+ /** Sort by document score (relevancy). Sort values are Float and higher
+ * values are at the front.
+ * PORTING: this is the same as SCORE in java, it had to be renamed because
+ * SCORE is a system macro on some platforms (AIX).
+ */
+ LUCENE_STATIC_CONSTANT(int32_t, DOCSCORE=0);
+
+ /** Sort by document number (index order). Sort values are Integer and lower
+ * values are at the front. */
+ LUCENE_STATIC_CONSTANT(int32_t, DOC=1);
+
+ /** Guess type of sort based on field contents. A regular expression is used
+ * to look at the first term indexed for the field and determine if it
+ * represents an integer number, a floating point number, or just arbitrary
+ * string characters. */
+ LUCENE_STATIC_CONSTANT(int32_t, AUTO=2);
+
+ /** Sort using term values as Strings. Sort values are String and lower
+ * values are at the front. */
+ LUCENE_STATIC_CONSTANT(int32_t, STRING=3);
+
+ /** Sort using term values as encoded Integers. Sort values are Integer and
+ * lower values are at the front. */
+ LUCENE_STATIC_CONSTANT(int32_t, INT=4);
+
+ /** Sort using term values as encoded Floats. Sort values are Float and
+ * lower values are at the front. */
+ LUCENE_STATIC_CONSTANT(int32_t, FLOAT=5);
+
+ /** Sort using a custom Comparator. Sort values are any Comparable and
+ * sorting is done according to natural order. */
+ LUCENE_STATIC_CONSTANT(int32_t, CUSTOM=9);
+
+ // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace"
+ // as the above static int values. Any new values must not have the same value
+ // as FieldCache.STRING_INDEX.
+
+ /** Represents sorting by document score (relevancy). */
+ static SortField* FIELD_SCORE;
+
+ /** Represents sorting by document number (index order). */
+ static SortField* FIELD_DOC;
+
+ SortField (const TCHAR* field);
+ //SortField (const TCHAR* field, bool reverse);
+ //todo: we cannot make reverse use default field of =false.
+ //because bool and int are the same type in c, overloading is not possible
+ SortField (const TCHAR* field, int32_t type, bool reverse);
+
+ /*
+ SortField (TCHAR* field, Locale* locale) {
+ SortField (TCHAR* field, Locale* locale, bool reverse);*/
+
+ SortField (const TCHAR* field, SortComparatorSource* comparator, bool reverse=false);
+
+ /** Returns the name of the field. Could return <code>null</code>
+ * if the sort is by SCORE or DOC.
+ * @return Name of field, possibly <code>null</code>.
+ */
+ const TCHAR* getField() const { return field; }
+
+ SortField* clone() const;
+
+ /** Returns the type of contents in the field.
+ * @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT.
+ */
+ int32_t getType() const { return type; }
+
+ /** Returns the Locale by which term values are interpreted.
+ * May return <code>null</code> if no Locale was specified.
+ * @return Locale, or <code>null</code>.
+ */
+ /*Locale getLocale() {
+ return locale;
+ }*/
+
+ /** Returns whether the sort should be reversed.
+ * @return True if natural order should be reversed.
+ */
+ bool getReverse() const { return reverse; }
+
+ SortComparatorSource* getFactory() const { return factory; }
+
+ TCHAR* toString() const;
+};
+
+
+
+/**
+ * Encapsulates sort criteria for returned hits.
+ *
+ * <p>The fields used to determine sort order must be carefully chosen.
+ * Documents must contain a single term in such a field,
+ * and the value of the term should indicate the document's relative position in
+ * a given sort order. The field must be indexed, but should not be tokenized,
+ * and does not need to be stored (unless you happen to want it back with the
+ * rest of your document data). In other words:
+ *
+ * <dl><dd><code>document.add (new Field ("byNumber", Integer.toString(x), false, true, false));</code>
+ * </dd></dl>
+ *
+ * <p><h3>Valid Types of Values</h3>
+ *
+ * <p>There are three possible kinds of term values which may be put into
+ * sorting fields: Integers, Floats, or Strings. Unless
+ * {@link SortField SortField} objects are specified, the type of value
+ * in the field is determined by parsing the first term in the field.
+ *
+ * <p>Integer term values should contain only digits and an optional
+ * preceeding negative sign. Values must be base 10 and in the range
+ * <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
+ * Documents which should appear first in the sort
+ * should have low value integers, later documents high values
+ * (i.e. the documents should be numbered <code>1..n</code> where
+ * <code>1</code> is the first and <code>n</code> the last).
+ *
+ * <p>Float term values should conform to values accepted by
+ * {@link Float Float.valueOf(String)} (except that <code>NaN</code>
+ * and <code>Infinity</code> are not supported).
+ * Documents which should appear first in the sort
+ * should have low values, later documents high values.
+ *
+ * <p>String term values can contain any valid String, but should
+ * not be tokenized. The values are sorted according to their
+ * {@link Comparable natural order}. Note that using this type
+ * of term value has higher memory requirements than the other
+ * two types.
+ *
+ * <p><h3>Object Reuse</h3>
+ *
+ * <p>One of these objects can be
+ * used multiple times and the sort order changed between usages.
+ *
+ * <p>This class is thread safe.
+ *
+ * <p><h3>Memory Usage</h3>
+ *
+ * <p>Sorting uses of caches of term values maintained by the
+ * internal HitQueue(s). The cache is static and contains an integer
+ * or float array of length <code>IndexReader.maxDoc()</code> for each field
+ * name for which a sort is performed. In other words, the size of the
+ * cache in bytes is:
+ *
+ * <p><code>4 * IndexReader.maxDoc() * (# of different fields actually used to sort)</code>
+ *
+ * <p>For String fields, the cache is larger: in addition to the
+ * above array, the value of every term in the field is kept in memory.
+ * If there are many unique terms in the field, this could
+ * be quite large.
+ *
+ * <p>Note that the size of the cache is not affected by how many
+ * fields are in the index and <i>might</i> be used to sort - only by
+ * the ones actually used to sort a result set.
+ *
+ * <p>The cache is cleared each time a new <code>IndexReader</code> is
+ * passed in, or if the value returned by <code>maxDoc()</code>
+ * changes for the current IndexReader. This class is not set up to
+ * be able to efficiently sort hits from more than one index
+ * simultaneously.
+ *
+ */
+class Sort:LUCENE_BASE {
+ // internal representation of the sort criteria
+ SortField** fields;
+ void clear();
+public:
+ ~Sort();
+
+ /** Represents sorting by computed relevance. Using this sort criteria
+ * returns the same results as calling {@link Searcher#search(Query) Searcher#search()}
+ * without a sort criteria, only with slightly more overhead. */
+ static Sort* RELEVANCE;
+
+ /** Represents sorting by index order. */
+ static Sort* INDEXORDER;
+
+ Sort();
+ Sort (const TCHAR* field, bool reverse=false);
+ Sort (const TCHAR** fields);
+ Sort (SortField* field);
+ Sort (SortField** fields);
+ void setSort (const TCHAR* field, bool reverse=false);
+ void setSort (const TCHAR** fieldnames);
+ void setSort (SortField* field);
+ void setSort (SortField** fields);
+
+ TCHAR* toString() const;
+
+ /**
+ * Representation of the sort criteria.
+ * @return a pointer to the of SortField array used in this sort criteria
+ */
+ SortField** getSort() const{ return fields; }
+};
+
+
+
+
+
+CL_NS_END
+#endif