summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/index/FieldsReader.cpp
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/FieldsReader.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/index/FieldsReader.cpp231
1 files changed, 231 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/FieldsReader.cpp b/3rdparty/clucene/src/CLucene/index/FieldsReader.cpp
new file mode 100644
index 000000000..e3f9d1cc2
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/FieldsReader.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+ *
+ * Distributable under the terms of either the Apache License (Version 2.0) or
+ * the GNU Lesser General Public License, as specified in the COPYING file.
+ *
+ * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
+*/
+#include "CLucene/StdHeader.h"
+#include "FieldsReader.h"
+
+#include "CLucene/util/VoidMap.h"
+#include "CLucene/util/Misc.h"
+#include "CLucene/store/Directory.h"
+#include "CLucene/document/Document.h"
+#include "CLucene/document/Field.h"
+#include "FieldInfos.h"
+#include "FieldsWriter.h"
+
+CL_NS_USE(store)
+CL_NS_USE(document)
+CL_NS_USE(util)
+CL_NS_DEF(index)
+
+FieldsReader::FieldsReader(Directory* d, const QString& segment, FieldInfos* fn)
+ : fieldInfos(fn)
+{
+ //Func - Constructor
+ //Pre - d contains a valid reference to a Directory
+ // segment != NULL
+ // fn contains a valid reference to a FieldInfos
+ //Post - The instance has been created
+
+ CND_PRECONDITION(!segment.isEmpty(), "segment != NULL");
+
+ QString buf = Misc::segmentname(segment, QLatin1String(".fdt"));
+ fieldsStream = d->openInput(buf);
+
+ buf = Misc::segmentname(segment, QLatin1String(".fdx"));
+ indexStream = d->openInput(buf);
+
+ _size = (int32_t)indexStream->length() / 8;
+}
+
+FieldsReader::~FieldsReader()
+{
+ //Func - Destructor
+ //Pre - true
+ //Post - The instance has been destroyed
+ close();
+}
+
+void FieldsReader::close()
+{
+ //Func - Closes the FieldsReader
+ //Pre - true
+ //Post - The FieldsReader has been closed
+ if (fieldsStream) {
+ fieldsStream->close();
+ _CLDELETE(fieldsStream);
+ }
+
+ if(indexStream) {
+ indexStream->close();
+ _CLDELETE(indexStream);
+ }
+}
+
+int32_t FieldsReader::size() const
+{
+ return _size;
+}
+
+bool FieldsReader::doc(int32_t n, Document* doc)
+{
+ if ( n * 8L > indexStream->length() )
+ return false;
+
+ indexStream->seek(n * 8L);
+ int64_t position = indexStream->readLong();
+ fieldsStream->seek(position);
+
+ int32_t numFields = fieldsStream->readVInt();
+ for (int32_t i = 0; i < numFields; i++) {
+ int32_t fieldNumber = fieldsStream->readVInt();
+ FieldInfo* fi = fieldInfos->fieldInfo(fieldNumber);
+
+ if ( fi == NULL )
+ _CLTHROWA(CL_ERR_IO, "Field stream is invalid");
+
+ uint8_t bits = fieldsStream->readByte();
+ if ((bits & FieldsWriter::FIELD_IS_BINARY) != 0) {
+ int32_t fieldLen = fieldsStream->readVInt();
+ FieldsReader::FieldsStreamHolder* subStream = new
+ FieldsReader::FieldsStreamHolder(fieldsStream, fieldLen);
+ uint8_t bits = Field::STORE_YES;
+ Field* f = _CLNEW Field(
+ fi->name, // name
+ subStream, // read value
+ bits);
+
+ doc->add(*f);
+
+ //now skip over the rest of the field
+ if (fieldsStream->getFilePointer() + fieldLen
+ == fieldsStream->length()) {
+ // set to eof
+ fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen - 1);
+ fieldsStream->readByte();
+ } else {
+ fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen);
+ }
+ } else {
+ uint8_t bits = Field::STORE_YES;
+ if (fi->isIndexed && (bits & FieldsWriter::FIELD_IS_TOKENIZED)!=0 )
+ bits |= Field::INDEX_TOKENIZED;
+ else if (fi->isIndexed && (bits & FieldsWriter::FIELD_IS_TOKENIZED) == 0)
+ bits |= Field::INDEX_UNTOKENIZED;
+ else
+ bits |= Field::INDEX_NO;
+
+ if (fi->storeTermVector) {
+ if (fi->storeOffsetWithTermVector) {
+ if (fi->storePositionWithTermVector) {
+ bits |= Field::TERMVECTOR_WITH_OFFSETS;
+ bits |= Field::TERMVECTOR_WITH_POSITIONS;
+ } else {
+ bits |= Field::TERMVECTOR_WITH_OFFSETS;
+ }
+ } else if (fi->storePositionWithTermVector) {
+ bits |= Field::TERMVECTOR_WITH_POSITIONS;
+ } else {
+ bits |= Field::TERMVECTOR_YES;
+ }
+ } else {
+ bits |= Field::TERMVECTOR_NO;
+ }
+
+ if ( (bits & FieldsWriter::FIELD_IS_COMPRESSED) != 0 ) {
+ bits |= Field::STORE_COMPRESS;
+ int32_t fieldLen = fieldsStream->readVInt();
+ FieldsStreamHolder* subStream = new
+ FieldsStreamHolder(fieldsStream, fieldLen);
+
+ // TODO: we dont have gzip inputstream available, must alert
+ // user to somehow use a gzip inputstream
+ Field* f = _CLNEW Field(
+ fi->name, // name
+ subStream, // read value
+ bits);
+
+ f->setOmitNorms(fi->omitNorms);
+ doc->add(*f);
+
+ // now skip over the rest of the field
+ if (fieldsStream->getFilePointer() + fieldLen
+ == fieldsStream->length()) {
+ //set to eof
+ fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen - 1);
+ fieldsStream->readByte();
+ } else {
+ fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen);
+ }
+ } else {
+ TCHAR* fvalue = fieldsStream->readString(true);
+ Field* f = _CLNEW Field(
+ fi->name, // name
+ fvalue, // read value
+ bits);
+ // TODO: could optimise this
+ _CLDELETE_CARRAY(fvalue);
+ f->setOmitNorms(fi->omitNorms);
+ doc->add(*f);
+ }
+ }
+ }
+ return true;
+}
+
+FieldsReader::FieldsStreamHolder::FieldsStreamHolder(IndexInput* indexInput,
+ int32_t subLength)
+{
+ this->indexInput = indexInput->clone();
+ this->indexInputStream = new IndexInputStream(this->indexInput);
+ this->subStream = new jstreams::SubInputStream<char>(indexInputStream,
+ subLength);
+
+ this->size = subStream->getSize();
+ this->position = subStream->getPosition();
+ this->error = subStream->getError();
+ this->status = subStream->getStatus();
+}
+
+FieldsReader::FieldsStreamHolder::~FieldsStreamHolder()
+{
+ delete subStream;
+ delete indexInputStream;
+
+ indexInput->close();
+ _CLDELETE(indexInput);
+}
+
+int32_t FieldsReader::FieldsStreamHolder::read(const char*& start, int32_t _min,
+ int32_t _max)
+{
+ int32_t ret = subStream->read(start,_min,_max);
+ this->position = subStream->getPosition();
+ this->error = subStream->getError();
+ this->status = subStream->getStatus();
+ return ret;
+}
+
+int64_t FieldsReader::FieldsStreamHolder::skip(int64_t ntoskip)
+{
+ int64_t ret = subStream->skip(ntoskip);
+ this->position = subStream->getPosition();
+ this->error = subStream->getError();
+ this->status = subStream->getStatus();
+ return ret;
+}
+
+int64_t FieldsReader::FieldsStreamHolder::reset(int64_t pos)
+{
+ int64_t ret = subStream->reset(pos);
+ this->position = subStream->getPosition();
+ this->error = subStream->getError();
+ this->status = subStream->getStatus();
+ return ret;
+}
+
+CL_NS_END