diff options
author | Qt by Nokia <qt-info@nokia.com> | 2011-04-27 12:05:43 +0200 |
---|---|---|
committer | axis <qt-info@nokia.com> | 2011-04-27 12:05:43 +0200 |
commit | 50123887ba0f33cf47520bee7c419d68742af2d1 (patch) | |
tree | 0eb8679b9e4e4370e59b44bfdcae616816e39aca /3rdparty/clucene/src/CLucene/index/DocumentWriter.h |
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you
want to look at revision history older than this, please refer to the
Qt Git wiki for how to use Git history grafting. At the time of
writing, this wiki is located here:
http://qt.gitorious.org/qt/pages/GitIntroductionWithQt
If you have already performed the grafting and you don't see any
history beyond this commit, try running "git log" with the "--follow"
argument.
Branched from the monolithic repo, Qt master branch, at commit
896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/DocumentWriter.h')
-rw-r--r-- | 3rdparty/clucene/src/CLucene/index/DocumentWriter.h | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/DocumentWriter.h b/3rdparty/clucene/src/CLucene/index/DocumentWriter.h new file mode 100644 index 000000000..7096ba3ee --- /dev/null +++ b/3rdparty/clucene/src/CLucene/index/DocumentWriter.h @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +#ifndef _lucene_index_DocumentWriter_ +#define _lucene_index_DocumentWriter_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include <QtCore/QString> + +#include "CLucene/analysis/AnalysisHeader.h" +#include "CLucene/document/Document.h" +#include "CLucene/store/Directory.h" +#include "FieldInfos.h" +#include "IndexWriter.h" +#include "CLucene/util/VoidMap.h" +#include "CLucene/document/Field.h" +#include "TermInfo.h" +#include "CLucene/search/Similarity.h" +#include "TermInfosWriter.h" +#include "FieldsWriter.h" +#include "Term.h" + +CL_NS_DEF(index) + +class DocumentWriter : LUCENE_BASE +{ +public: + // info about a Term in a doc + class Posting : LUCENE_BASE + { + public: + Term* term; // the Term + int32_t freq; // its frequency in doc + Array<int32_t> positions; // positions it occurs at + Array<TermVectorOffsetInfo> offsets; + + Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset); + ~Posting(); + }; + +private: + CL_NS(analysis)::Analyzer* analyzer; + CL_NS(store)::Directory* directory; + FieldInfos* fieldInfos; //array + const int32_t maxFieldLength; + CL_NS(search)::Similarity* similarity; + int32_t termIndexInterval; + + // Keys are Terms, values are Postings. + // Used to buffer a document before it is written to the index. + typedef CL_NS(util)::CLHashtable<Term*, Posting*, Term::Compare, + Term::Equals> PostingTableType; + PostingTableType postingTable; + int32_t* fieldLengths; //array + int32_t* fieldPositions; //array + int32_t* fieldOffsets; //array + qreal* fieldBoosts; //array + + Term* termBuffer; +public: + /** This ctor used by test code only. + * + * @param directory The directory to write the document information to + * @param analyzer The analyzer to use for the document + * @param similarity The Similarity function + * @param maxFieldLength The maximum number of tokens a field may have + */ + DocumentWriter(CL_NS(store)::Directory* d, CL_NS(analysis)::Analyzer* a, + CL_NS(search)::Similarity* similarity, const int32_t maxFieldLength); + + DocumentWriter(CL_NS(store)::Directory* directory, + CL_NS(analysis)::Analyzer* analyzer, IndexWriter* writer); + ~DocumentWriter(); + + void addDocument(const QString& segment, CL_NS(document)::Document* doc); + + +private: + // Tokenizes the fields of a document into Postings. + void invertDocument(const CL_NS(document)::Document* doc); + + void addPosition(const TCHAR* field, const TCHAR* text, + const int32_t position, TermVectorOffsetInfo* offset); + + void sortPostingTable(Posting**& array, int32_t& arraySize); + + static void quickSort(Posting**& postings, const int32_t lo, const int32_t hi); + + void writePostings(Posting** postings, const int32_t postingsLength, + const QString& segment); + + void writeNorms(const QString& segment); + + void clearPostingTable(); +}; + +CL_NS_END + +#endif |