summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/index/DocumentWriter.h
diff options
context:
space:
mode:
authorQt by Nokia <qt-info@nokia.com>2011-04-27 12:05:43 +0200
committeraxis <qt-info@nokia.com>2011-04-27 12:05:43 +0200
commit50123887ba0f33cf47520bee7c419d68742af2d1 (patch)
tree0eb8679b9e4e4370e59b44bfdcae616816e39aca /3rdparty/clucene/src/CLucene/index/DocumentWriter.h
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you want to look at revision history older than this, please refer to the Qt Git wiki for how to use Git history grafting. At the time of writing, this wiki is located here: http://qt.gitorious.org/qt/pages/GitIntroductionWithQt If you have already performed the grafting and you don't see any history beyond this commit, try running "git log" with the "--follow" argument. Branched from the monolithic repo, Qt master branch, at commit 896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/DocumentWriter.h')
-rw-r--r--3rdparty/clucene/src/CLucene/index/DocumentWriter.h107
1 files changed, 107 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/DocumentWriter.h b/3rdparty/clucene/src/CLucene/index/DocumentWriter.h
new file mode 100644
index 000000000..7096ba3ee
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/DocumentWriter.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+ *
+ * Distributable under the terms of either the Apache License (Version 2.0) or
+ * the GNU Lesser General Public License, as specified in the COPYING file.
+ *
+ * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
+*/
+#ifndef _lucene_index_DocumentWriter_
+#define _lucene_index_DocumentWriter_
+
+#if defined(_LUCENE_PRAGMA_ONCE)
+# pragma once
+#endif
+
+#include <QtCore/QString>
+
+#include "CLucene/analysis/AnalysisHeader.h"
+#include "CLucene/document/Document.h"
+#include "CLucene/store/Directory.h"
+#include "FieldInfos.h"
+#include "IndexWriter.h"
+#include "CLucene/util/VoidMap.h"
+#include "CLucene/document/Field.h"
+#include "TermInfo.h"
+#include "CLucene/search/Similarity.h"
+#include "TermInfosWriter.h"
+#include "FieldsWriter.h"
+#include "Term.h"
+
+CL_NS_DEF(index)
+
+class DocumentWriter : LUCENE_BASE
+{
+public:
+ // info about a Term in a doc
+ class Posting : LUCENE_BASE
+ {
+ public:
+ Term* term; // the Term
+ int32_t freq; // its frequency in doc
+ Array<int32_t> positions; // positions it occurs at
+ Array<TermVectorOffsetInfo> offsets;
+
+ Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset);
+ ~Posting();
+ };
+
+private:
+ CL_NS(analysis)::Analyzer* analyzer;
+ CL_NS(store)::Directory* directory;
+ FieldInfos* fieldInfos; //array
+ const int32_t maxFieldLength;
+ CL_NS(search)::Similarity* similarity;
+ int32_t termIndexInterval;
+
+ // Keys are Terms, values are Postings.
+ // Used to buffer a document before it is written to the index.
+ typedef CL_NS(util)::CLHashtable<Term*, Posting*, Term::Compare,
+ Term::Equals> PostingTableType;
+ PostingTableType postingTable;
+ int32_t* fieldLengths; //array
+ int32_t* fieldPositions; //array
+ int32_t* fieldOffsets; //array
+ qreal* fieldBoosts; //array
+
+ Term* termBuffer;
+public:
+ /** This ctor used by test code only.
+ *
+ * @param directory The directory to write the document information to
+ * @param analyzer The analyzer to use for the document
+ * @param similarity The Similarity function
+ * @param maxFieldLength The maximum number of tokens a field may have
+ */
+ DocumentWriter(CL_NS(store)::Directory* d, CL_NS(analysis)::Analyzer* a,
+ CL_NS(search)::Similarity* similarity, const int32_t maxFieldLength);
+
+ DocumentWriter(CL_NS(store)::Directory* directory,
+ CL_NS(analysis)::Analyzer* analyzer, IndexWriter* writer);
+ ~DocumentWriter();
+
+ void addDocument(const QString& segment, CL_NS(document)::Document* doc);
+
+
+private:
+ // Tokenizes the fields of a document into Postings.
+ void invertDocument(const CL_NS(document)::Document* doc);
+
+ void addPosition(const TCHAR* field, const TCHAR* text,
+ const int32_t position, TermVectorOffsetInfo* offset);
+
+ void sortPostingTable(Posting**& array, int32_t& arraySize);
+
+ static void quickSort(Posting**& postings, const int32_t lo, const int32_t hi);
+
+ void writePostings(Posting** postings, const int32_t postingsLength,
+ const QString& segment);
+
+ void writeNorms(const QString& segment);
+
+ void clearPostingTable();
+};
+
+CL_NS_END
+
+#endif