summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/index/SegmentMerger.cpp
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/index/SegmentMerger.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/index/SegmentMerger.cpp723
1 files changed, 723 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/SegmentMerger.cpp b/3rdparty/clucene/src/CLucene/index/SegmentMerger.cpp
new file mode 100644
index 000000000..40814da0c
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/SegmentMerger.cpp
@@ -0,0 +1,723 @@
+/*
+ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+ *
+ * Distributable under the terms of either the Apache License (Version 2.0) or
+ * the GNU Lesser General Public License, as specified in the COPYING file.
+ *
+ * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
+*/
+#include "CLucene/StdHeader.h"
+#include "SegmentMerger.h"
+
+CL_NS_USE(util)
+CL_NS_USE(document)
+CL_NS_USE(store)
+CL_NS_DEF(index)
+
+// File extensions of old-style index files
+int COMPOUND_EXTENSIONS_LENGTH = 7;
+const char* COMPOUND_EXTENSIONS = "fnm\0" "frq\0" "prx\0" "fdx\0" "fdt\0" "tii\0" "tis\0";
+
+int VECTOR_EXTENSIONS_LENGTH = 3;
+const char* VECTOR_EXTENSIONS = "tvx\0" "tvd\0" "tvf\0";
+
+SegmentMerger::SegmentMerger(IndexWriter* writer, const QString& name)
+{
+ //Func - Constructor
+ //Pre - dir holds a valid reference to a Directory
+ // name != NULL
+ //Post - Instance has been created
+
+ CND_PRECONDITION(!name.isEmpty(), "name is NULL");
+
+ freqOutput = NULL;
+ proxOutput = NULL;
+ termInfosWriter = NULL;
+ queue = NULL;
+ fieldInfos = NULL;
+ useCompoundFile = writer->getUseCompoundFile();
+ skipBuffer = _CLNEW CL_NS(store)::RAMIndexOutput();
+
+ segment = name;
+ directory = writer->getDirectory();
+ termIndexInterval = writer->getTermIndexInterval();
+
+ lastSkipDoc=0;
+ lastSkipFreqPointer=0;
+ lastSkipProxPointer=0;
+ skipInterval=0;
+}
+
+SegmentMerger::~SegmentMerger()
+{
+ //Func - Destructor
+ //Pre - true
+ //Post - The instance has been destroyed
+
+ //Clear the readers set
+ readers.clear();
+
+ //Delete field Infos
+ _CLDELETE(fieldInfos);
+ //Close and destroy the IndexOutput to the Frequency File
+ if (freqOutput != NULL) {
+ freqOutput->close();
+ _CLDELETE(freqOutput);
+ }
+ //Close and destroy the IndexOutput to the Prox File
+ if (proxOutput != NULL) {
+ proxOutput->close();
+ _CLDELETE(proxOutput);
+ }
+ //Close and destroy the termInfosWriter
+ if (termInfosWriter != NULL) {
+ termInfosWriter->close();
+ _CLDELETE(termInfosWriter);
+ }
+ //Close and destroy the queue
+ if (queue != NULL) {
+ queue->close();
+ _CLDELETE(queue);
+ }
+ //close and destory the skipBuffer
+ if (skipBuffer != NULL) {
+ skipBuffer->close();
+ _CLDELETE(skipBuffer);
+ }
+}
+
+void SegmentMerger::add(IndexReader* reader)
+{
+ //Func - Adds a IndexReader to the set of readers
+ //Pre - reader contains a valid reference to a IndexReader
+ //Post - The SegementReader reader has been added to the set of readers
+
+ readers.push_back(reader);
+}
+
+IndexReader* SegmentMerger::segmentReader(const int32_t i)
+{
+ //Func - Returns a reference to the i-th IndexReader
+ //Pre - 0 <= i < readers.size()
+ //Post - A reference to the i-th IndexReader has been returned
+
+ CND_PRECONDITION(i >= 0, "i is a negative number");
+ CND_PRECONDITION((size_t)i < readers.size(),
+ "i is bigger than the number of IndexReader instances");
+
+ //Retrieve the i-th IndexReader
+ IndexReader* ret = readers[i];
+ CND_CONDITION(ret != NULL, "No IndexReader found");
+
+ return ret;
+}
+
+int32_t SegmentMerger::merge()
+{
+ int32_t value = mergeFields();
+ mergeTerms();
+ mergeNorms();
+
+ if (fieldInfos->hasVectors())
+ mergeVectors();
+
+ return value;
+}
+
+void SegmentMerger::closeReaders()
+{
+ for (uint32_t i = 0; i < readers.size(); i++) {
+ // close readers
+ IndexReader* reader = readers[i];
+ reader->close();
+ }
+}
+
+void SegmentMerger::createCompoundFile(const QString& filename, QStringList& files)
+{
+ CompoundFileWriter* cfsWriter = _CLNEW CompoundFileWriter(directory, filename);
+
+ { //msvc6 scope fix
+ // Basic files
+ for (int32_t i = 0; i < COMPOUND_EXTENSIONS_LENGTH; i++) {
+ files.push_back(Misc::qjoin(segment, QLatin1String("."),
+ QLatin1String(COMPOUND_EXTENSIONS+(i*4))));
+ }
+ }
+
+ { //msvc6 scope fix
+ // Field norm files
+ for (int32_t i = 0; i < fieldInfos->size(); i++) {
+ FieldInfo* fi = fieldInfos->fieldInfo(i);
+ if (fi->isIndexed && !fi->omitNorms) {
+ TCHAR tbuf[10];
+ char abuf[10];
+ _i64tot(i, tbuf, 10);
+ STRCPY_TtoA(abuf, tbuf, 10);
+
+ files.push_back(Misc::qjoin(segment, QLatin1String(".f"),
+ QLatin1String(abuf)));
+ }
+ }
+ }
+
+ // Vector files
+ if (fieldInfos->hasVectors()) {
+ for (int32_t i = 0; i < VECTOR_EXTENSIONS_LENGTH; i++) {
+ files.push_back(Misc::qjoin(segment, QLatin1String("."),
+ QLatin1String(VECTOR_EXTENSIONS+(i*4))));
+ }
+ }
+
+ { //msvc6 scope fix
+ // Now merge all added files
+ for (size_t i=0;i<files.size();i++) {
+ cfsWriter->addFile(files[i]);
+ }
+ }
+
+ // Perform the merge
+ cfsWriter->close();
+ _CLDELETE(cfsWriter);
+}
+
+void SegmentMerger::addIndexed(IndexReader* reader, FieldInfos* fieldInfos,
+ StringArrayWithDeletor& names, bool storeTermVectors,
+ bool storePositionWithTermVector, bool storeOffsetWithTermVector)
+{
+ StringArrayWithDeletor::const_iterator itr = names.begin();
+ while (itr != names.end()) {
+ fieldInfos->add(*itr, true,
+ storeTermVectors, storePositionWithTermVector,
+ storeOffsetWithTermVector, !reader->hasNorms(*itr));
+ ++itr;
+ }
+}
+
+int32_t SegmentMerger::mergeFields()
+{
+ //Func - Merge the fields of all segments
+ //Pre - true
+ //Post - The field infos and field values of all segments have been merged.
+
+ //Create a new FieldInfos
+ fieldInfos = _CLNEW FieldInfos(); // merge field names
+
+ //Condition check to see if fieldInfos points to a valid instance
+ CND_CONDITION(fieldInfos != NULL, "Memory allocation for fieldInfos failed");
+
+ IndexReader* reader = NULL;
+
+ int32_t docCount = 0;
+
+ //Iterate through all readers
+ for (uint32_t i = 0; i < readers.size(); i++) {
+ //get the i-th reader
+ reader = readers[i];
+ //Condition check to see if reader points to a valid instance
+ CND_CONDITION(reader != NULL,"No IndexReader found");
+
+ StringArrayWithDeletor tmp;
+
+ tmp.clear();
+ reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION_OFFSET, tmp);
+ addIndexed(reader, fieldInfos, tmp, true, true, true);
+
+ tmp.clear();
+ reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION, tmp);
+ addIndexed(reader, fieldInfos, tmp, true, true, false);
+
+ tmp.clear();
+ reader->getFieldNames(IndexReader::TERMVECTOR_WITH_OFFSET, tmp);
+ addIndexed(reader, fieldInfos, tmp, true, false, true);
+
+ tmp.clear();
+ reader->getFieldNames(IndexReader::TERMVECTOR, tmp);
+ addIndexed(reader, fieldInfos, tmp, true, false, false);
+
+ tmp.clear();
+ reader->getFieldNames(IndexReader::INDEXED, tmp);
+ addIndexed(reader, fieldInfos, tmp, false, false, false);
+
+ tmp.clear();
+ reader->getFieldNames(IndexReader::UNINDEXED, tmp);
+ if (tmp.size() > 0) {
+ TCHAR** arr = _CL_NEWARRAY(TCHAR*,tmp.size()+1);
+ tmp.toArray(arr);
+ fieldInfos->add((const TCHAR**)arr, false);
+ _CLDELETE_ARRAY(arr);
+ //no need to delete the contents, since tmp is responsible for it
+ }
+ }
+
+ //Create the filename of the new FieldInfos file
+ QString buf = Misc::segmentname(segment, QLatin1String(".fnm"));
+ //Write the new FieldInfos file to the directory
+ fieldInfos->write(directory, buf);
+
+ // merge field values
+ // Instantiate Fieldswriter which will write in directory for the segment
+ // name segment using the new merged fieldInfos
+ FieldsWriter* fieldsWriter = _CLNEW FieldsWriter(directory, segment, fieldInfos);
+
+ //Condition check to see if fieldsWriter points to a valid instance
+ CND_CONDITION(fieldsWriter != NULL, "Memory allocation for fieldsWriter failed");
+
+ try {
+ IndexReader* reader = NULL;
+ int32_t maxDoc = 0;
+ //Iterate through all readers
+ for (uint32_t i = 0; i < readers.size(); i++) {
+ // get the i-th reader
+ reader = readers[i];
+
+
+ // Condition check to see if reader points to a valid instance
+ CND_CONDITION(reader != NULL, "No IndexReader found");
+
+ // Get the total number documents including the documents that have
+ // been marked deleted
+ int32_t maxDoc = reader->maxDoc();
+
+ //document buffer
+ Document doc;
+
+ //Iterate through all the documents managed by the current reader
+ for (int32_t j = 0; j < maxDoc; j++) {
+ //Check if the j-th document has been deleted, if so skip it
+ if (!reader->isDeleted(j)) {
+ //Get the document
+ if (reader->document(j, &doc)) {
+ //Add the document to the new FieldsWriter
+ fieldsWriter->addDocument(&doc);
+ docCount++;
+ //doc is cleard for re-use
+ doc.clear();
+ }
+ }
+ }
+ }
+ } _CLFINALLY (
+ //Close the fieldsWriter
+ fieldsWriter->close();
+ //And have it deleted as it not used any more
+ _CLDELETE(fieldsWriter);
+ );
+
+ return docCount;
+}
+
+void SegmentMerger::mergeVectors()
+{
+ TermVectorsWriter* termVectorsWriter =
+ _CLNEW TermVectorsWriter(directory, segment, fieldInfos);
+
+ try {
+ for (uint32_t r = 0; r < readers.size(); r++) {
+ IndexReader* reader = readers[r];
+ int32_t maxDoc = reader->maxDoc();
+ for (int32_t docNum = 0; docNum < maxDoc; docNum++) {
+ // skip deleted docs
+ if (reader->isDeleted(docNum))
+ continue;
+
+ Array<TermFreqVector*> tmp;
+ if (reader->getTermFreqVectors(docNum, tmp))
+ termVectorsWriter->addAllDocVectors(tmp);
+ tmp.deleteAll();
+ }
+ }
+ } _CLFINALLY (
+ _CLDELETE(termVectorsWriter);
+ );
+}
+
+
+void SegmentMerger::mergeTerms()
+{
+ //Func - Merge the terms of all segments
+ //Pre - fieldInfos != NULL
+ //Post - The terms of all segments have been merged
+
+ CND_PRECONDITION(fieldInfos != NULL, "fieldInfos is NULL");
+
+ try{
+ //create a filename for the new Frequency File for segment
+ QString buf = Misc::segmentname(segment, QLatin1String(".frq"));
+ //Open an IndexOutput to the new Frequency File
+ freqOutput = directory->createOutput(buf);
+
+ //create a filename for the new Prox File for segment
+ buf = Misc::segmentname(segment, QLatin1String(".prx"));
+ //Open an IndexOutput to the new Prox File
+ proxOutput = directory->createOutput(buf);
+
+ //Instantiate a new termInfosWriter which will write in directory
+ //for the segment name segment using the new merged fieldInfos
+ termInfosWriter = _CLNEW TermInfosWriter(directory, segment, fieldInfos,
+ termIndexInterval);
+
+ //Condition check to see if termInfosWriter points to a valid instance
+ CND_CONDITION(termInfosWriter != NULL,
+ "Memory allocation for termInfosWriter failed");
+
+ skipInterval = termInfosWriter->skipInterval;
+ queue = _CLNEW SegmentMergeQueue(readers.size());
+
+ //And merge the Term Infos
+ mergeTermInfos();
+ } _CLFINALLY (
+ //Close and destroy the IndexOutput to the Frequency File
+ if (freqOutput != NULL) {
+ freqOutput->close(); _CLDELETE(freqOutput);
+ }
+
+ //Close and destroy the IndexOutput to the Prox File
+ if (proxOutput != NULL)
+ {
+ proxOutput->close();
+ _CLDELETE(proxOutput);
+ }
+
+ //Close and destroy the termInfosWriter
+ if (termInfosWriter != NULL) {
+ termInfosWriter->close();
+ _CLDELETE(termInfosWriter);
+ }
+
+ //Close and destroy the queue
+ if (queue != NULL) {
+ queue->close();
+ _CLDELETE(queue);
+ }
+ );
+}
+
+void SegmentMerger::mergeTermInfos()
+{
+ //Func - Merges all TermInfos into a single segment
+ //Pre - true
+ //Post - All TermInfos have been merged into a single segment
+
+ //Condition check to see if queue points to a valid instance
+ CND_CONDITION(queue != NULL, "Memory allocation for queue failed");
+
+ //base is the id of the first document in a segment
+ int32_t base = 0;
+
+ IndexReader* reader = NULL;
+ SegmentMergeInfo* smi = NULL;
+
+ //iterate through all the readers
+ for (uint32_t i = 0; i < readers.size(); i++) {
+ //Get the i-th reader
+ reader = readers[i];
+
+ //Condition check to see if reader points to a valid instance
+ CND_CONDITION(reader != NULL, "No IndexReader found");
+
+ //Get the term enumeration of the reader
+ TermEnum* termEnum = reader->terms();
+ //Instantiate a new SegmentMerginfo for the current reader and enumeration
+ smi = _CLNEW SegmentMergeInfo(base, termEnum, reader);
+
+ //Condition check to see if smi points to a valid instance
+ CND_CONDITION(smi != NULL, "Memory allocation for smi failed") ;
+
+ //Increase the base by the number of documents that have not been marked deleted
+ //so base will contain a new value for the first document of the next iteration
+ base += reader->numDocs();
+ //Get the next current term
+ if (smi->next()) {
+ //Store the SegmentMergeInfo smi with the initialized SegmentTermEnum TermEnum
+ //into the queue
+ queue->put(smi);
+ } else {
+ //Apparently the end of the TermEnum of the SegmentTerm has been reached so
+ //close the SegmentMergeInfo smi
+ smi->close();
+ //And destroy the instance and set smi to NULL (It will be used later in this method)
+ _CLDELETE(smi);
+ }
+ }
+
+ //Instantiate an array of SegmentMergeInfo instances called match
+ SegmentMergeInfo** match = _CL_NEWARRAY(SegmentMergeInfo*,readers.size()+1);
+
+ //Condition check to see if match points to a valid instance
+ CND_CONDITION(match != NULL, "Memory allocation for match failed") ;
+
+ SegmentMergeInfo* top = NULL;
+
+ //As long as there are SegmentMergeInfo instances stored in the queue
+ while (queue->size() > 0) {
+ int32_t matchSize = 0;
+
+ // pop matching terms
+
+ //Pop the first SegmentMergeInfo from the queue
+ match[matchSize++] = queue->pop();
+ //Get the Term of match[0]
+ Term* term = match[0]->term;
+
+ //Condition check to see if term points to a valid instance
+ CND_CONDITION(term != NULL,"term is NULL") ;
+
+ //Get the current top of the queue
+ top = queue->top();
+
+ //For each SegmentMergInfo still in the queue
+ //Check if term matches the term of the SegmentMergeInfo instances in the queue
+ while (top != NULL && term->equals(top->term)) {
+ //A match has been found so add the matching SegmentMergeInfo to the match array
+ match[matchSize++] = queue->pop();
+ //Get the next SegmentMergeInfo
+ top = queue->top();
+ }
+ match[matchSize]=NULL;
+
+ //add new TermInfo
+ mergeTermInfo(match); //matchSize
+
+ //Restore the SegmentTermInfo instances in the match array back into the queue
+ while (matchSize > 0) {
+ smi = match[--matchSize];
+
+ //Condition check to see if smi points to a valid instance
+ CND_CONDITION(smi != NULL, "smi is NULL");
+
+ //Move to the next term in the enumeration of SegmentMergeInfo smi
+ if (smi->next()) {
+ //There still are some terms so restore smi in the queue
+ queue->put(smi);
+
+ } else {
+ //Done with a segment
+ //No terms anymore so close this SegmentMergeInfo instance
+ smi->close();
+ _CLDELETE(smi);
+ }
+ }
+ }
+
+ _CLDELETE_ARRAY(match);
+}
+
+void SegmentMerger::mergeTermInfo(SegmentMergeInfo** smis)
+{
+ //Func - Merge the TermInfo of a term found in one or more segments.
+ //Pre - smis != NULL and it contains segments that are positioned at the same term.
+ // n is equal to the number of SegmentMergeInfo instances in smis
+ // freqOutput != NULL
+ // proxOutput != NULL
+ //Post - The TermInfo of a term has been merged
+
+ CND_PRECONDITION(smis != NULL, "smis is NULL");
+ CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");
+ CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");
+
+ //Get the file pointer of the IndexOutput to the Frequency File
+ int64_t freqPointer = freqOutput->getFilePointer();
+ //Get the file pointer of the IndexOutput to the Prox File
+ int64_t proxPointer = proxOutput->getFilePointer();
+
+ //Process postings from multiple segments all positioned on the same term.
+ int32_t df = appendPostings(smis);
+
+ int64_t skipPointer = writeSkip();
+
+ //df contains the number of documents across all segments where this term was found
+ if (df > 0) {
+ //add an entry to the dictionary with pointers to prox and freq files
+ termInfo.set(df, freqPointer, proxPointer, (int32_t)(skipPointer - freqPointer));
+ //Precondition check for to be sure that the reference to
+ //smis[0]->term will be valid
+ CND_PRECONDITION(smis[0]->term != NULL, "smis[0]->term is NULL");
+ //Write a new TermInfo
+ termInfosWriter->add(smis[0]->term, &termInfo);
+ }
+}
+
+
+int32_t SegmentMerger::appendPostings(SegmentMergeInfo** smis)
+{
+ //Func - Process postings from multiple segments all positioned on the
+ // same term. Writes out merged entries into freqOutput and
+ // the proxOutput streams.
+ //Pre - smis != NULL and it contains segments that are positioned at the same term.
+ // n is equal to the number of SegmentMergeInfo instances in smis
+ // freqOutput != NULL
+ // proxOutput != NULL
+ //Post - Returns number of documents across all segments where this term was found
+
+ CND_PRECONDITION(smis != NULL, "smis is NULL");
+ CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");
+ CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");
+
+ int32_t lastDoc = 0;
+ int32_t df = 0; //Document Counter
+
+ resetSkip();
+ SegmentMergeInfo* smi = NULL;
+
+ //Iterate through all SegmentMergeInfo instances in smis
+ int32_t i = 0;
+ while ((smi=smis[i]) != NULL) {
+ //Get the i-th SegmentMergeInfo
+
+ //Condition check to see if smi points to a valid instance
+ CND_PRECONDITION(smi != NULL, " is NULL");
+
+ //Get the term positions
+ TermPositions* postings = smi->getPositions();
+ //Get the base of this segment
+ int32_t base = smi->base;
+ //Get the docMap so we can see which documents have been deleted
+ int32_t* docMap = smi->getDocMap();
+ //Seek the termpost
+ postings->seek(smi->termEnum);
+ while (postings->next()) {
+ int32_t doc = postings->doc();
+ //Check if there are deletions
+ if (docMap != NULL)
+ doc = docMap[doc]; // map around deletions
+ doc += base; // convert to merged space
+
+ //Condition check to see doc is eaqual to or bigger than lastDoc
+ CND_CONDITION(doc >= lastDoc,"docs out of order");
+
+ //Increase the total frequency over all segments
+ df++;
+
+ if ((df % skipInterval) == 0) {
+ bufferSkip(lastDoc);
+ }
+
+ //Calculate a new docCode
+ //use low bit to flag freq=1
+ int32_t docCode = (doc - lastDoc) << 1;
+ lastDoc = doc;
+
+ //Get the frequency of the Term
+ int32_t freq = postings->freq();
+ if (freq == 1) {
+ //write doc & freq=1
+ freqOutput->writeVInt(docCode | 1);
+ } else {
+ //write doc
+ freqOutput->writeVInt(docCode);
+ //write frequency in doc
+ freqOutput->writeVInt(freq);
+ }
+
+ int32_t lastPosition = 0;
+ // write position deltas
+ for (int32_t j = 0; j < freq; j++) {
+ //Get the next position
+ int32_t position = postings->nextPosition();
+ //Write the difference between position and the last position
+ proxOutput->writeVInt(position - lastPosition);
+ lastPosition = position;
+ }
+ }
+
+ i++;
+ }
+
+ //Return total number of documents across all segments where term was found
+ return df;
+}
+
+void SegmentMerger::resetSkip()
+{
+ skipBuffer->reset();
+ lastSkipDoc = 0;
+ lastSkipFreqPointer = freqOutput->getFilePointer();
+ lastSkipProxPointer = proxOutput->getFilePointer();
+}
+
+void SegmentMerger::bufferSkip(int32_t doc)
+{
+ int64_t freqPointer = freqOutput->getFilePointer();
+ int64_t proxPointer = proxOutput->getFilePointer();
+
+ skipBuffer->writeVInt(doc - lastSkipDoc);
+ skipBuffer->writeVInt((int32_t) (freqPointer - lastSkipFreqPointer));
+ skipBuffer->writeVInt((int32_t) (proxPointer - lastSkipProxPointer));
+
+ lastSkipDoc = doc;
+ lastSkipFreqPointer = freqPointer;
+ lastSkipProxPointer = proxPointer;
+}
+
+int64_t SegmentMerger::writeSkip()
+{
+ int64_t skipPointer = freqOutput->getFilePointer();
+ skipBuffer->writeTo(freqOutput);
+ return skipPointer;
+}
+
+// Func - Merges the norms for all fields
+// Pre - fieldInfos != NULL
+// Post - The norms for all fields have been merged
+void SegmentMerger::mergeNorms()
+{
+ CND_PRECONDITION(fieldInfos != NULL, "fieldInfos is NULL");
+
+ //iterate through all the Field Infos instances
+ for (int32_t i = 0; i < fieldInfos->size(); i++) {
+ //Get the i-th FieldInfo
+ FieldInfo* fi = fieldInfos->fieldInfo(i);
+ //Is this Field indexed?
+ if (fi->isIndexed && !fi->omitNorms) {
+ //Create and Instantiate an IndexOutput to that norm file
+ QString buf = Misc::segmentname(segment, QLatin1String(".f"), i);
+ IndexOutput* output = directory->createOutput(buf);
+
+ //Condition check to see if output points to a valid instance
+ CND_CONDITION(output != NULL, "No Outputstream retrieved");
+
+ uint8_t* input = NULL;
+ try {
+ for (uint32_t j = 0; j < readers.size(); ++j) {
+ // get the next index reader + condition check
+ IndexReader* reader = readers[j];
+ CND_CONDITION(reader != NULL, "No reader found");
+
+ // Get the total number of documents including the documents
+ // that have been marked deleted
+ int32_t maxDoc = reader->maxDoc();
+ if (maxDoc > 0) {
+ // if there are docs, allocate buffer to read it's norms
+ uint8_t* data = (uint8_t*)realloc(input, maxDoc *
+ sizeof(uint8_t));
+ if (data) {
+ input = data;
+ memset(input, 0, maxDoc * sizeof(uint8_t));
+ // Get an IndexInput to the norm file for this
+ // field in this segment
+ reader->norms(fi->name, input);
+
+ //Iterate through all the documents
+ for(int32_t k = 0; k < maxDoc; k++) {
+ //Check if document k is deleted
+ if (!reader->isDeleted(k)) {
+ //write the new norm
+ output->writeByte(input[k]);
+ }
+ }
+ }
+ }
+ }
+ } _CLFINALLY (
+ if (output != NULL) {
+ output->close();
+ _CLDELETE(output);
+ }
+ free(input);
+ );
+ }
+ }
+}
+
+CL_NS_END