summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/clucene/src/CLucene/index/SegmentMerger.cpp
blob: 1adda34cc608221f8156c6b42fd9b8fb54d3202e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
/*
 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
 *
 * Distributable under the terms of either the Apache License (Version 2.0) or 
 * the GNU Lesser General Public License, as specified in the COPYING file.
 *
 * Changes are Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies), all rights reserved.
*/
#include "CLucene/StdHeader.h"
#include "SegmentMerger.h"

CL_NS_USE(util)
CL_NS_USE(document)
CL_NS_USE(store)
CL_NS_DEF(index)

// File extensions of old-style index files
int COMPOUND_EXTENSIONS_LENGTH = 7;
const char* COMPOUND_EXTENSIONS = "fnm\0" "frq\0" "prx\0" "fdx\0" "fdt\0" "tii\0" "tis\0";

int VECTOR_EXTENSIONS_LENGTH = 3;
const char* VECTOR_EXTENSIONS = "tvx\0" "tvd\0" "tvf\0";

SegmentMerger::SegmentMerger(IndexWriter* writer, const QString& name)
{
    //Func - Constructor
    //Pre  - dir holds a valid reference to a Directory
    //       name != NULL
    //Post - Instance has been created

    CND_PRECONDITION(!name.isEmpty(), "name is NULL");

    freqOutput       = NULL;
    proxOutput       = NULL;
    termInfosWriter  = NULL;
    queue            = NULL;
    fieldInfos       = NULL;
    useCompoundFile  = writer->getUseCompoundFile();
    skipBuffer       = _CLNEW CL_NS(store)::RAMIndexOutput();

    segment          = name;
    directory		 = writer->getDirectory();
    termIndexInterval = writer->getTermIndexInterval();

    lastSkipDoc=0;
    lastSkipFreqPointer=0;
    lastSkipProxPointer=0;
    skipInterval=0;
}

SegmentMerger::~SegmentMerger()
{
    //Func - Destructor
    //Pre  - true
    //Post - The instance has been destroyed

    //Clear the readers set
    readers.clear();

    //Delete field Infos
    _CLDELETE(fieldInfos);     
    //Close and destroy the IndexOutput to the Frequency File
    if (freqOutput != NULL) { 
        freqOutput->close(); 
        _CLDELETE(freqOutput); 
    }
    //Close and destroy the IndexOutput to the Prox File
    if (proxOutput != NULL) {
        proxOutput->close(); 
        _CLDELETE(proxOutput); 
    }
    //Close and destroy the termInfosWriter
    if (termInfosWriter != NULL) {
        termInfosWriter->close(); 
        _CLDELETE(termInfosWriter); 
    }
    //Close and destroy the queue
    if (queue != NULL) {
        queue->close(); 
        _CLDELETE(queue);
    }
    //close and destory the skipBuffer
    if (skipBuffer != NULL) {
        skipBuffer->close();
        _CLDELETE(skipBuffer);
    }
}

void SegmentMerger::add(IndexReader* reader)
{
    //Func - Adds a IndexReader to the set of readers
    //Pre  - reader contains a valid reference to a IndexReader
    //Post - The SegementReader reader has been added to the set of readers

    readers.push_back(reader);
}

IndexReader* SegmentMerger::segmentReader(const int32_t i)
{
    //Func - Returns a reference to the i-th IndexReader
    //Pre  - 0 <= i < readers.size()
    //Post - A reference to the i-th IndexReader has been returned

    CND_PRECONDITION(i >= 0, "i is a negative number");
    CND_PRECONDITION((size_t)i < readers.size(),
        "i is bigger than the number of IndexReader instances");

    //Retrieve the i-th IndexReader
    IndexReader* ret = readers[i];
    CND_CONDITION(ret != NULL, "No IndexReader found");

    return ret;
}

int32_t SegmentMerger::merge()
{
    int32_t value = mergeFields();
    mergeTerms();
    mergeNorms();

    if (fieldInfos->hasVectors())
        mergeVectors();

    return value;
}

void SegmentMerger::closeReaders()
{
    for (uint32_t i = 0; i < readers.size(); i++) {
        // close readers
        IndexReader* reader = readers[i];
        reader->close();
    }
}

void SegmentMerger::createCompoundFile(const QString& filename, QStringList& files)
{
    CompoundFileWriter* cfsWriter = _CLNEW CompoundFileWriter(directory, filename);

    { //msvc6 scope fix
        // Basic files
        for (int32_t i = 0; i < COMPOUND_EXTENSIONS_LENGTH; i++) {
            files.push_back(Misc::qjoin(segment, QLatin1String("."),
                QLatin1String(COMPOUND_EXTENSIONS+(i*4))));
        }
    }

    { //msvc6 scope fix
        // Field norm files
        for (int32_t i = 0; i < fieldInfos->size(); i++) {
            FieldInfo* fi = fieldInfos->fieldInfo(i);
            if (fi->isIndexed && !fi->omitNorms) {
                TCHAR tbuf[10];
                char abuf[10];
                _i64tot(i, tbuf, 10);
                STRCPY_TtoA(abuf, tbuf, 10);

                files.push_back(Misc::qjoin(segment, QLatin1String(".f"),
                    QLatin1String(abuf)));
            }
        }
    }

    // Vector files
    if (fieldInfos->hasVectors()) {
        for (int32_t i = 0; i < VECTOR_EXTENSIONS_LENGTH; i++) {
            files.push_back(Misc::qjoin(segment, QLatin1String("."),
                QLatin1String(VECTOR_EXTENSIONS+(i*4))));
        }
    }

    { //msvc6 scope fix
        // Now merge all added files
        for (size_t i=0;i<files.size();i++) {
            cfsWriter->addFile(files[i]);
        }
    }

    // Perform the merge
    cfsWriter->close();
    _CLDELETE(cfsWriter);
}

void SegmentMerger::addIndexed(IndexReader* reader, FieldInfos* fieldInfos,
    StringArrayWithDeletor& names, bool storeTermVectors,
    bool storePositionWithTermVector, bool storeOffsetWithTermVector)
{
    StringArrayWithDeletor::const_iterator itr = names.begin();
    while (itr != names.end()) {
        fieldInfos->add(*itr, true, 
            storeTermVectors, storePositionWithTermVector, 
            storeOffsetWithTermVector, !reader->hasNorms(*itr));
        ++itr;
    }
}

int32_t SegmentMerger::mergeFields()
{
    //Func - Merge the fields of all segments 
    //Pre  - true
    //Post - The field infos and field values of all segments have been merged.

    //Create a new FieldInfos
    fieldInfos = _CLNEW FieldInfos();		  // merge field names

    //Condition check to see if fieldInfos points to a valid instance
    CND_CONDITION(fieldInfos != NULL, "Memory allocation for fieldInfos failed");

    IndexReader* reader = NULL;

    int32_t docCount = 0;

    //Iterate through all readers
    for (uint32_t i = 0; i < readers.size(); i++) {
        //get the i-th reader
        reader = readers[i];
        //Condition check to see if reader points to a valid instance
        CND_CONDITION(reader != NULL,"No IndexReader found");

        StringArrayWithDeletor tmp;

        tmp.clear();
        reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION_OFFSET, tmp);
        addIndexed(reader, fieldInfos, tmp, true, true, true);

        tmp.clear();
        reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION, tmp);
        addIndexed(reader, fieldInfos, tmp, true, true, false);

        tmp.clear();
        reader->getFieldNames(IndexReader::TERMVECTOR_WITH_OFFSET, tmp);
        addIndexed(reader, fieldInfos, tmp, true, false, true);

        tmp.clear();
        reader->getFieldNames(IndexReader::TERMVECTOR, tmp);
        addIndexed(reader, fieldInfos, tmp, true, false, false);

        tmp.clear();
        reader->getFieldNames(IndexReader::INDEXED, tmp);
        addIndexed(reader, fieldInfos, tmp, false, false, false);

        tmp.clear();
        reader->getFieldNames(IndexReader::UNINDEXED, tmp);
        if (tmp.size() > 0) {
            TCHAR** arr = _CL_NEWARRAY(TCHAR*,tmp.size()+1);
            tmp.toArray(arr);
            fieldInfos->add((const TCHAR**)arr, false);
            _CLDELETE_ARRAY(arr);
            //no need to delete the contents, since tmp is responsible for it
        }
    }

    //Create the filename of the new FieldInfos file
    QString buf = Misc::segmentname(segment, QLatin1String(".fnm"));
    //Write the new FieldInfos file to the directory
    fieldInfos->write(directory, buf);

    // merge field values
    // Instantiate Fieldswriter which will write in directory for the segment
    // name segment using the new merged fieldInfos
    FieldsWriter* fieldsWriter = _CLNEW FieldsWriter(directory, segment, fieldInfos);

    //Condition check to see if fieldsWriter points to a valid instance
    CND_CONDITION(fieldsWriter != NULL, "Memory allocation for fieldsWriter failed");

    try {  
        IndexReader* reader = NULL;
        int32_t maxDoc          = 0;
        //Iterate through all readers
        for (uint32_t i = 0; i < readers.size(); i++) {
            // get the i-th reader
            reader = readers[i];


            // Condition check to see if reader points to a valid instance
            CND_CONDITION(reader != NULL, "No IndexReader found");

            // Get the total number documents including the documents that have
            // been marked deleted
            int32_t maxDoc = reader->maxDoc();

            //document buffer
            Document doc;

            //Iterate through all the documents managed by the current reader
            for (int32_t j = 0; j < maxDoc; j++) {
                //Check if the j-th document has been deleted, if so skip it
                if (!reader->isDeleted(j)) { 
                    //Get the document
                    if (reader->document(j, &doc)) {
                        //Add the document to the new FieldsWriter
                        fieldsWriter->addDocument(&doc);
                        docCount++;
                        //doc is cleard for re-use
                        doc.clear();
                    }
                }
            }
        }
    } _CLFINALLY (
        //Close the fieldsWriter
        fieldsWriter->close();
        //And have it deleted as it not used any more
        _CLDELETE(fieldsWriter);
    );

    return docCount;
}

void SegmentMerger::mergeVectors()
{
    TermVectorsWriter* termVectorsWriter = 
        _CLNEW TermVectorsWriter(directory, segment, fieldInfos);

    try {
        for (uint32_t r = 0; r < readers.size(); r++) {
            IndexReader* reader = readers[r];
            int32_t maxDoc = reader->maxDoc();
            for (int32_t docNum = 0; docNum < maxDoc; docNum++) {
                // skip deleted docs
                if (reader->isDeleted(docNum))
                    continue;

                Array<TermFreqVector*> tmp;
                if (reader->getTermFreqVectors(docNum, tmp))
                    termVectorsWriter->addAllDocVectors(tmp);
                tmp.deleteAll();
            }
        }
    } _CLFINALLY (
        _CLDELETE(termVectorsWriter);
    );
}


void SegmentMerger::mergeTerms()
{
    //Func - Merge the terms of all segments
    //Pre  - fieldInfos != NULL
    //Post - The terms of all segments have been merged

    CND_PRECONDITION(fieldInfos != NULL, "fieldInfos is NULL");

    try{
        //create a filename for the new Frequency File for segment
        QString buf = Misc::segmentname(segment, QLatin1String(".frq"));
        //Open an IndexOutput to the new Frequency File
        freqOutput = directory->createOutput(buf);

        //create a filename for the new Prox File for segment
        buf = Misc::segmentname(segment, QLatin1String(".prx"));
        //Open an IndexOutput to the new Prox File
        proxOutput = directory->createOutput(buf);

        //Instantiate  a new termInfosWriter which will write in directory
        //for the segment name segment using the new merged fieldInfos
        termInfosWriter = _CLNEW TermInfosWriter(directory, segment, fieldInfos,
            termIndexInterval);  

        //Condition check to see if termInfosWriter points to a valid instance
        CND_CONDITION(termInfosWriter != NULL,
            "Memory allocation for termInfosWriter failed");

        skipInterval = termInfosWriter->skipInterval;
        queue = _CLNEW SegmentMergeQueue(readers.size());

        //And merge the Term Infos
        mergeTermInfos();	      
    } _CLFINALLY (
        //Close and destroy the IndexOutput to the Frequency File
        if (freqOutput != NULL) {
            freqOutput->close(); _CLDELETE(freqOutput);
        }

        //Close and destroy the IndexOutput to the Prox File
        if (proxOutput != NULL)
        {
            proxOutput->close();
            _CLDELETE(proxOutput);
        }

        //Close and destroy the termInfosWriter
        if (termInfosWriter != NULL) {
            termInfosWriter->close();
            _CLDELETE(termInfosWriter);
        }
        
        //Close and destroy the queue
        if (queue != NULL) {
            queue->close();
            _CLDELETE(queue);
        }
    );
}

void SegmentMerger::mergeTermInfos()
{
    //Func - Merges all TermInfos into a single segment
    //Pre  - true
    //Post - All TermInfos have been merged into a single segment

    //Condition check to see if queue points to a valid instance
    CND_CONDITION(queue != NULL, "Memory allocation for queue failed");

    //base is the id of the first document in a segment
    int32_t base = 0;

    IndexReader* reader = NULL;
    SegmentMergeInfo* smi = NULL;

    //iterate through all the readers
    for (uint32_t i = 0; i < readers.size(); i++) {
        //Get the i-th reader
        reader = readers[i];

        //Condition check to see if reader points to a valid instance
        CND_CONDITION(reader != NULL, "No IndexReader found");

        //Get the term enumeration of the reader
        TermEnum* termEnum = reader->terms();
        //Instantiate a new SegmentMerginfo for the current reader and enumeration
        smi = _CLNEW SegmentMergeInfo(base, termEnum, reader);

        //Condition check to see if smi points to a valid instance
        CND_CONDITION(smi != NULL, "Memory allocation for smi failed")	;

        //Increase the base by the number of documents that have not been marked deleted
        //so base will contain a new value for the first document of the next iteration
        base += reader->numDocs();
        //Get the next current term
        if (smi->next()) {
            //Store the SegmentMergeInfo smi with the initialized SegmentTermEnum TermEnum
            //into the queue
            queue->put(smi);
        } else {
            //Apparently the end of the TermEnum of the SegmentTerm has been reached so
            //close the SegmentMergeInfo smi
            smi->close();
            //And destroy the instance and set smi to NULL (It will be used later in this method)
            _CLDELETE(smi);
        }
    }

    //Instantiate an array of SegmentMergeInfo instances called match
    SegmentMergeInfo** match = _CL_NEWARRAY(SegmentMergeInfo*,readers.size()+1);

    //Condition check to see if match points to a valid instance
    CND_CONDITION(match != NULL, "Memory allocation for match failed")	;

    SegmentMergeInfo* top = NULL;

    //As long as there are SegmentMergeInfo instances stored in the queue
    while (queue->size() > 0) {
        int32_t matchSize = 0;			  

        // pop matching terms

        //Pop the first SegmentMergeInfo from the queue
        match[matchSize++] = queue->pop();
        //Get the Term of match[0]
        Term* term = match[0]->term;

        //Condition check to see if term points to a valid instance
        CND_CONDITION(term != NULL,"term is NULL")	;

        //Get the current top of the queue
        top = queue->top();

        //For each SegmentMergInfo still in the queue 
        //Check if term matches the term of the SegmentMergeInfo instances in the queue
        while (top != NULL && term->equals(top->term)) {
            //A match has been found so add the matching SegmentMergeInfo to the match array
            match[matchSize++] = queue->pop();
            //Get the next SegmentMergeInfo
            top = queue->top();
        }
        match[matchSize]=NULL;

        //add new TermInfo
        mergeTermInfo(match); //matchSize  

        //Restore the SegmentTermInfo instances in the match array back into the queue
        while (matchSize > 0) {
            smi = match[--matchSize];

            //Condition check to see if smi points to a valid instance
            CND_CONDITION(smi != NULL, "smi is NULL");

            //Move to the next term in the enumeration of SegmentMergeInfo smi
            if (smi->next()) {
                //There still are some terms so restore smi in the queue
                queue->put(smi);

            } else {
                //Done with a segment
                //No terms anymore so close this SegmentMergeInfo instance
                smi->close();				  
                _CLDELETE(smi);
            }
        }
    }

    _CLDELETE_ARRAY(match);
}

void SegmentMerger::mergeTermInfo(SegmentMergeInfo** smis)
{
    //Func - Merge the TermInfo of a term found in one or more segments. 
    //Pre  - smis != NULL and it contains segments that are positioned at the same term.
    //       n is equal to the number of SegmentMergeInfo instances in smis
    //       freqOutput != NULL
    //       proxOutput != NULL
    //Post - The TermInfo of a term has been merged

    CND_PRECONDITION(smis != NULL, "smis is NULL");
    CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");
    CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");

    //Get the file pointer of the IndexOutput to the Frequency File
    int64_t freqPointer = freqOutput->getFilePointer();
    //Get the file pointer of the IndexOutput to the Prox File
    int64_t proxPointer = proxOutput->getFilePointer();

    //Process postings from multiple segments all positioned on the same term.
    int32_t df = appendPostings(smis);  

    int64_t skipPointer = writeSkip();

    //df contains the number of documents across all segments where this term was found
    if (df > 0) {
        //add an entry to the dictionary with pointers to prox and freq files
        termInfo.set(df, freqPointer, proxPointer, (int32_t)(skipPointer - freqPointer));
        //Precondition check for to be sure that the reference to
        //smis[0]->term will be valid
        CND_PRECONDITION(smis[0]->term != NULL, "smis[0]->term is NULL");
        //Write a new TermInfo
        termInfosWriter->add(smis[0]->term, &termInfo);
    }
}


int32_t SegmentMerger::appendPostings(SegmentMergeInfo** smis)
{
    //Func - Process postings from multiple segments all positioned on the
    //       same term. Writes out merged entries into freqOutput and
    //       the proxOutput streams.
    //Pre  - smis != NULL and it contains segments that are positioned at the same term.
    //       n is equal to the number of SegmentMergeInfo instances in smis
    //       freqOutput != NULL
    //       proxOutput != NULL
    //Post - Returns number of documents across all segments where this term was found

    CND_PRECONDITION(smis != NULL, "smis is NULL");
    CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");
    CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");

    int32_t lastDoc = 0;
    int32_t df = 0;       //Document Counter

    resetSkip();
    SegmentMergeInfo* smi = NULL;

    //Iterate through all SegmentMergeInfo instances in smis
    int32_t i = 0;
    while ((smi=smis[i]) != NULL) {
        //Get the i-th SegmentMergeInfo 

        //Condition check to see if smi points to a valid instance
        CND_PRECONDITION(smi != NULL, "	 is NULL");

        //Get the term positions 
        TermPositions* postings = smi->getPositions();
        //Get the base of this segment
        int32_t base = smi->base;
        //Get the docMap so we can see which documents have been deleted
        int32_t* docMap = smi->getDocMap();
        //Seek the termpost
        postings->seek(smi->termEnum);
        while (postings->next()) {
            int32_t doc = postings->doc();
            //Check if there are deletions
            if (docMap != NULL)
                doc = docMap[doc]; // map around deletions
            doc += base;                              // convert to merged space

            //Condition check to see doc is eaqual to or bigger than lastDoc
            CND_CONDITION(doc >= lastDoc,"docs out of order");

            //Increase the total frequency over all segments
            df++;

            if ((df % skipInterval) == 0) {
                bufferSkip(lastDoc);
            }

            //Calculate a new docCode 
            //use low bit to flag freq=1
            int32_t docCode = (doc - lastDoc) << 1;	  
            lastDoc = doc;

            //Get the frequency of the Term
            int32_t freq = postings->freq();
            if (freq == 1) {
                //write doc & freq=1
                freqOutput->writeVInt(docCode | 1);	  
            } else {
                //write doc
                freqOutput->writeVInt(docCode);	  
                //write frequency in doc
                freqOutput->writeVInt(freq);		  
            }

            int32_t lastPosition = 0;			  
            // write position deltas
            for (int32_t j = 0; j < freq; j++) {
                //Get the next position
                int32_t position = postings->nextPosition();
                //Write the difference between position and the last position
                proxOutput->writeVInt(position - lastPosition);			  
                lastPosition = position;
            }
        }

        i++;
    }

    //Return total number of documents across all segments where term was found		
    return df;
}

void SegmentMerger::resetSkip()
{
    skipBuffer->reset();
    lastSkipDoc = 0;
    lastSkipFreqPointer = freqOutput->getFilePointer();
    lastSkipProxPointer = proxOutput->getFilePointer();
}

void SegmentMerger::bufferSkip(int32_t doc)
{
    int64_t freqPointer = freqOutput->getFilePointer();
    int64_t proxPointer = proxOutput->getFilePointer();

    skipBuffer->writeVInt(doc - lastSkipDoc);
    skipBuffer->writeVInt((int32_t) (freqPointer - lastSkipFreqPointer));
    skipBuffer->writeVInt((int32_t) (proxPointer - lastSkipProxPointer));

    lastSkipDoc = doc;
    lastSkipFreqPointer = freqPointer;
    lastSkipProxPointer = proxPointer;
}

int64_t SegmentMerger::writeSkip()
{
    int64_t skipPointer = freqOutput->getFilePointer();
    skipBuffer->writeTo(freqOutput);
    return skipPointer;
}

// Func - Merges the norms for all fields
// Pre  - fieldInfos != NULL
// Post - The norms for all fields have been merged
void SegmentMerger::mergeNorms()
{
    CND_PRECONDITION(fieldInfos != NULL, "fieldInfos is NULL");

    //iterate through all the Field Infos instances
    for (int32_t i = 0; i < fieldInfos->size(); i++) {
        //Get the i-th FieldInfo
        FieldInfo* fi = fieldInfos->fieldInfo(i);
        //Is this Field indexed?
        if (fi->isIndexed && !fi->omitNorms) {
            //Create and Instantiate an IndexOutput to that norm file
            QString buf = Misc::segmentname(segment, QLatin1String(".f"), i);
            IndexOutput* output = directory->createOutput(buf);

            //Condition check to see if output points to a valid instance
            CND_CONDITION(output != NULL, "No Outputstream retrieved");

            uint8_t* input = NULL;
            try {
                for (uint32_t j = 0; j < readers.size(); ++j) {
                    // get the next index reader + condition check
                    IndexReader* reader = readers[j];
                    CND_CONDITION(reader != NULL, "No reader found");

                    // Get the total number of documents including the documents
                    // that have been marked deleted
                    int32_t maxDoc = reader->maxDoc();
                    if (maxDoc > 0) {
                        // if there are docs, allocate buffer to read it's norms
                        uint8_t* data = (uint8_t*)realloc(input, maxDoc *
                            sizeof(uint8_t));
                        if (data) {
                            input = data;
                            memset(input, 0, maxDoc * sizeof(uint8_t));
                            // Get an IndexInput to the norm file for this
                            // field in this segment
                            reader->norms(fi->name, input);

                            //Iterate through all the documents
                            for(int32_t k = 0; k < maxDoc; k++) {
                                //Check if document k is deleted
                                if (!reader->isDeleted(k)) {
                                    //write the new norm
                                    output->writeByte(input[k]);
                                }
                            }
                        }
                    }
                }
            } _CLFINALLY (
                if (output != NULL) {
                    output->close();
                    _CLDELETE(output);
                }
                free(input);
            );
        }
    }
}

CL_NS_END