summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/index/DocumentWriter.h
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/DocumentWriter.h')
-rw-r--r--3rdparty/clucene/src/CLucene/index/DocumentWriter.h107
1 files changed, 107 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/DocumentWriter.h b/3rdparty/clucene/src/CLucene/index/DocumentWriter.h
new file mode 100644
index 000000000..7096ba3ee
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/DocumentWriter.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+ *
+ * Distributable under the terms of either the Apache License (Version 2.0) or
+ * the GNU Lesser General Public License, as specified in the COPYING file.
+ *
+ * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
+*/
+#ifndef _lucene_index_DocumentWriter_
+#define _lucene_index_DocumentWriter_
+
+#if defined(_LUCENE_PRAGMA_ONCE)
+# pragma once
+#endif
+
+#include <QtCore/QString>
+
+#include "CLucene/analysis/AnalysisHeader.h"
+#include "CLucene/document/Document.h"
+#include "CLucene/store/Directory.h"
+#include "FieldInfos.h"
+#include "IndexWriter.h"
+#include "CLucene/util/VoidMap.h"
+#include "CLucene/document/Field.h"
+#include "TermInfo.h"
+#include "CLucene/search/Similarity.h"
+#include "TermInfosWriter.h"
+#include "FieldsWriter.h"
+#include "Term.h"
+
+CL_NS_DEF(index)
+
+class DocumentWriter : LUCENE_BASE
+{
+public:
+ // info about a Term in a doc
+ class Posting : LUCENE_BASE
+ {
+ public:
+ Term* term; // the Term
+ int32_t freq; // its frequency in doc
+ Array<int32_t> positions; // positions it occurs at
+ Array<TermVectorOffsetInfo> offsets;
+
+ Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset);
+ ~Posting();
+ };
+
+private:
+ CL_NS(analysis)::Analyzer* analyzer;
+ CL_NS(store)::Directory* directory;
+ FieldInfos* fieldInfos; //array
+ const int32_t maxFieldLength;
+ CL_NS(search)::Similarity* similarity;
+ int32_t termIndexInterval;
+
+ // Keys are Terms, values are Postings.
+ // Used to buffer a document before it is written to the index.
+ typedef CL_NS(util)::CLHashtable<Term*, Posting*, Term::Compare,
+ Term::Equals> PostingTableType;
+ PostingTableType postingTable;
+ int32_t* fieldLengths; //array
+ int32_t* fieldPositions; //array
+ int32_t* fieldOffsets; //array
+ qreal* fieldBoosts; //array
+
+ Term* termBuffer;
+public:
+ /** This ctor used by test code only.
+ *
+ * @param directory The directory to write the document information to
+ * @param analyzer The analyzer to use for the document
+ * @param similarity The Similarity function
+ * @param maxFieldLength The maximum number of tokens a field may have
+ */
+ DocumentWriter(CL_NS(store)::Directory* d, CL_NS(analysis)::Analyzer* a,
+ CL_NS(search)::Similarity* similarity, const int32_t maxFieldLength);
+
+ DocumentWriter(CL_NS(store)::Directory* directory,
+ CL_NS(analysis)::Analyzer* analyzer, IndexWriter* writer);
+ ~DocumentWriter();
+
+ void addDocument(const QString& segment, CL_NS(document)::Document* doc);
+
+
+private:
+ // Tokenizes the fields of a document into Postings.
+ void invertDocument(const CL_NS(document)::Document* doc);
+
+ void addPosition(const TCHAR* field, const TCHAR* text,
+ const int32_t position, TermVectorOffsetInfo* offset);
+
+ void sortPostingTable(Posting**& array, int32_t& arraySize);
+
+ static void quickSort(Posting**& postings, const int32_t lo, const int32_t hi);
+
+ void writePostings(Posting** postings, const int32_t postingsLength,
+ const QString& segment);
+
+ void writeNorms(const QString& segment);
+
+ void clearPostingTable();
+};
+
+CL_NS_END
+
+#endif