1 files changed, 722 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/index/MultiReader.cpp b/3rdparty/clucene/src/CLucene/index/MultiReader.cpp
new file mode 100644
index 000000000..1260d04dc
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/index/MultiReader.cpp
@@ -0,0 +1,722 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+* 
+* Distributable under the terms of either the Apache License (Version 2.0) or 
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "MultiReader.h"
+
+#include "IndexReader.h"
+#include "CLucene/document/Document.h"
+#include "Terms.h"
+#include "SegmentMergeQueue.h"
+
+CL_NS_USE(store)
+CL_NS_USE(util)
+CL_NS_DEF(index)
+
+MultiReader::MultiReader(IndexReader** subReaders):
+  IndexReader(subReaders == NULL || subReaders[0] == NULL ? NULL : subReaders[0]->getDirectory()),
+  normsCache(true, true)
+{
+	initialize(subReaders);
+}
+
+MultiReader::MultiReader(Directory* directory, SegmentInfos* sis, IndexReader** subReaders):
+	IndexReader(directory, sis, false),
+	normsCache(true, true)
+{
+	initialize(subReaders);
+}
+
+
+MultiReader::~MultiReader() {
+//Func - Destructor
+//Pre  - true
+//Post - The instance has been destroyed all IndexReader instances
+//       this instance managed have been destroyed to
+
+	_CLDELETE_ARRAY(ones);
+	_CLDELETE_ARRAY(starts);
+	
+	//Iterate through the subReaders and destroy each reader
+	if (subReaders && subReadersLength > 0) {
+		for (int32_t i = 0; i < subReadersLength; i++) {
+			_CLDELETE(subReaders[i]);
+		}
+	}
+	//Destroy the subReaders array
+	_CLDELETE_ARRAY(subReaders);
+}
+
+void MultiReader::initialize(IndexReader** subReaders){
+  this->subReadersLength = 0;
+  this->subReaders = subReaders;
+
+  //count the subReaders size
+  if ( subReaders != NULL ){
+     while ( subReaders[subReadersLength] != NULL ){
+        subReadersLength++;
+     }
+  }
+  _maxDoc        = 0;
+  _numDocs       = -1;
+  ones           = NULL;
+
+  starts = _CL_NEWARRAY(int32_t,subReadersLength + 1);    // build starts array
+  for (int32_t i = 0; i < subReadersLength; i++) {
+     starts[i] = _maxDoc;
+
+     // compute maxDocs
+     _maxDoc += subReaders[i]->maxDoc();      
+     if (subReaders[i]->hasDeletions())
+        _hasDeletions = true;
+  }
+  starts[subReadersLength] = _maxDoc;
+}
+
+bool MultiReader::getTermFreqVectors(int32_t n, Array<TermFreqVector*>& result){
+	int32_t i = readerIndex(n);        // find segment num
+	return subReaders[i]->getTermFreqVectors(n - starts[i], result); // dispatch to segment
+}
+
+TermFreqVector* MultiReader::getTermFreqVector(int32_t n, const TCHAR* field){
+	int32_t i = readerIndex(n);        // find segment num
+	return subReaders[i]->getTermFreqVector(n - starts[i], field);
+}
+
+
+int32_t MultiReader::numDocs() {
+	SCOPED_LOCK_MUTEX(THIS_LOCK)
+	if (_numDocs == -1) {			  // check cache
+	  int32_t n = 0;				  // cache miss--recompute
+	  for (int32_t i = 0; i < subReadersLength; i++)
+	    n += subReaders[i]->numDocs();		  // sum from readers
+	  _numDocs = n;
+	}
+	return _numDocs;
+}
+
+int32_t MultiReader::maxDoc() const {
+	return _maxDoc;
+}
+
+bool MultiReader::document(int32_t n, CL_NS(document)::Document* doc){
+	int32_t i = readerIndex(n);			  // find segment num
+	return subReaders[i]->document(n - starts[i],doc);	  // dispatch to segment reader
+}
+
+bool MultiReader::isDeleted(const int32_t n) {
+	int32_t i = readerIndex(n);			  // find segment num
+	return subReaders[i]->isDeleted(n - starts[i]);	  // dispatch to segment reader
+}
+
+uint8_t* MultiReader::norms(const TCHAR* field){
+	SCOPED_LOCK_MUTEX(THIS_LOCK)
+	uint8_t* bytes;
+	bytes = normsCache.get(field);
+	if (bytes != NULL){
+	  return bytes;				  // cache hit
+	}
+	
+	if ( !hasNorms(field) )
+		return fakeNorms();
+	
+	bytes = _CL_NEWARRAY(uint8_t,maxDoc());
+	for (int32_t i = 0; i < subReadersLength; i++)
+	  subReaders[i]->norms(field, bytes + starts[i]);
+	
+	//Unfortunately the data in the normCache can get corrupted, since it's being loaded with string
+	//keys that may be deleted while still in use by the map. To prevent this field is duplicated
+	//and then stored in the normCache
+	TCHAR* key = STRDUP_TtoT(field);
+	//update cache
+	normsCache.put(key, bytes);
+	
+	return bytes;
+}
+
+void MultiReader::norms(const TCHAR* field, uint8_t* result) {
+	SCOPED_LOCK_MUTEX(THIS_LOCK)
+	uint8_t* bytes = normsCache.get(field);
+	if (bytes==NULL && !hasNorms(field)) 
+		bytes=fakeNorms();
+    
+	if (bytes != NULL){                            // cache hit
+	   int32_t len = maxDoc();
+	   memcpy(result,bytes,len * sizeof(int32_t));
+	}
+	
+	for (int32_t i = 0; i < subReadersLength; i++)      // read from segments
+	  subReaders[i]->norms(field, result + starts[i]);
+}
+
+
+void MultiReader::doSetNorm(int32_t n, const TCHAR* field, uint8_t value){
+	normsCache.remove(field);                         // clear cache
+	int32_t i = readerIndex(n);                           // find segment num
+	subReaders[i]->setNorm(n-starts[i], field, value); // dispatch
+}
+
+TermEnum* MultiReader::terms() const {
+	return _CLNEW MultiTermEnum(subReaders, starts, NULL);
+}
+
+TermEnum* MultiReader::terms(const Term* term) const {
+	return _CLNEW MultiTermEnum(subReaders, starts, term);
+}
+
+int32_t MultiReader::docFreq(const Term* t) const {
+	int32_t total = 0;				  // sum freqs in Multi
+	for (int32_t i = 0; i < subReadersLength; i++)
+	  total += subReaders[i]->docFreq(t);
+	return total;
+}
+
+TermDocs* MultiReader::termDocs() const {
+	TermDocs* ret =  _CLNEW MultiTermDocs(subReaders, starts);
+	return ret;
+}
+
+TermPositions* MultiReader::termPositions() const {
+	TermPositions* ret = (TermPositions*)_CLNEW MultiTermPositions(subReaders, starts);
+	return ret;
+}
+
+void MultiReader::doDelete(const int32_t n) {
+	_numDocs = -1;				  // invalidate cache
+	int32_t i = readerIndex(n);			  // find segment num
+	subReaders[i]->deleteDocument(n - starts[i]);		  // dispatch to segment reader
+	_hasDeletions = true;
+}
+
+int32_t MultiReader::readerIndex(const int32_t n) const {	  // find reader for doc n:
+	int32_t lo = 0;					   // search starts array
+	int32_t hi = subReadersLength - 1;	// for first element less
+	                                // than n, return its index
+	while (hi >= lo) {
+	  int32_t mid = (lo + hi) >> 1;
+	  int32_t midValue = starts[mid];
+	  if (n < midValue)
+	    hi = mid - 1;
+	  else if (n > midValue)
+	    lo = mid + 1;
+	  else{                                      // found a match
+	    while (mid+1 < subReadersLength && starts[mid+1] == midValue) {
+	      mid++;                                  // scan to last match
+	    }
+	    return mid;
+	  }
+	}
+	return hi;
+}
+
+bool MultiReader::hasNorms(const TCHAR* field) {
+	for (int i = 0; i < subReadersLength; i++) {
+		if (subReaders[i]->hasNorms(field)) 
+			return true;
+	}
+	return false;
+}
+uint8_t* MultiReader::fakeNorms() {
+	if (ones==NULL) 
+		ones=SegmentReader::createFakeNorms(maxDoc());
+	return ones;
+}
+
+void MultiReader::doUndeleteAll(){
+	for (int32_t i = 0; i < subReadersLength; i++)
+		subReaders[i]->undeleteAll();
+	_hasDeletions = false;
+	_numDocs = -1;
+}
+void MultiReader::doCommit() {
+	for (int32_t i = 0; i < subReadersLength; i++)
+	  subReaders[i]->commit();
+}
+
+void MultiReader::doClose() {
+	SCOPED_LOCK_MUTEX(THIS_LOCK)
+	for (int32_t i = 0; i < subReadersLength; i++){
+	  subReaders[i]->close();
+	}
+}
+
+
+void MultiReader::getFieldNames(FieldOption fldOption, StringArrayWithDeletor& retarray){
+    StringArrayWithDeletor temp;
+    CLHashList<TCHAR*> hashList;
+    for (int32_t i = 0; i < subReadersLength; i++) {
+      IndexReader* reader = subReaders[i];
+      reader->getFieldNames(fldOption, temp);
+
+      //create a unique list of names.
+      StringArrayWithDeletor::iterator itr = temp.begin();
+      while ( itr != temp.end() ){
+          if ( hashList.find(*itr) == hashList.end() )
+            hashList.insert(STRDUP_TtoT(*itr));
+          itr++;
+      }
+    }
+    //move the items into the return
+    CLHashList<TCHAR*>::iterator itr = hashList.begin();
+    while ( itr != hashList.end() ){
+      retarray.push_back(*itr);//no need to copy, already done!
+      itr++;
+    }
+}
+
+
+MultiTermDocs::MultiTermDocs(){
+//Func - Default constructor
+//       Initialises an empty MultiTermDocs.
+//       This constructor is needed to allow the constructor of MultiTermPositions
+//       initialise the instance by itself
+//Pre  - true
+//Post - An empty
+
+	subReaders       = NULL;
+	subReadersLength = 0;
+	starts        = NULL;
+	base          = 0;
+	pointer       = 0;
+	current       = NULL;
+	term          = NULL;
+	readerTermDocs   = NULL;
+}
+
+MultiTermDocs::MultiTermDocs(IndexReader** r, const int32_t* s){
+//Func - Constructor
+//Pre  - if r is NULL then rLen must be 0 else if r != NULL then rLen > 0
+//       s != NULL
+//Post - The instance has been created
+
+	//count readers
+	subReadersLength = 0;
+	subReaders       = r;
+	
+	CND_PRECONDITION(s != NULL, "s is NULL");
+	
+	if ( subReaders != NULL ){
+		while ( subReaders[subReadersLength] != NULL )
+			subReadersLength++;
+	}
+
+	starts        = s;
+	base          = 0;
+	pointer       = 0;
+	current       = NULL;
+	term          = NULL;
+	
+	readerTermDocs   = NULL;
+
+	//Check if there are subReaders
+	if(subReaders != NULL && subReadersLength > 0){
+	  readerTermDocs = _CL_NEWARRAY(TermDocs*, subReadersLength+1);
+	
+	  CND_CONDITION(readerTermDocs != NULL,"No memory could be allocated for readerTermDocs");
+	
+	  //Initialize the readerTermDocs pointer array to NULLs
+	  for ( int32_t i=0;i<subReadersLength+1;i++){
+	     readerTermDocs[i]=NULL;
+	  }
+	}
+}
+
+MultiTermDocs::~MultiTermDocs(){
+//Func - Destructor
+//Pre  - true
+//Post - The instance has been destroyed
+
+  close();
+}
+
+
+TermPositions* MultiTermDocs::__asTermPositions(){
+  return NULL;
+}
+
+int32_t MultiTermDocs::doc() const {
+  CND_PRECONDITION(current!=NULL,"current==NULL, check that next() was called");
+  return base + current->doc();
+}
+int32_t MultiTermDocs::freq() const {
+	CND_PRECONDITION(current!=NULL,"current==NULL, check that next() was called");
+	return current->freq();
+}
+
+void MultiTermDocs::seek(TermEnum* termEnum){
+	seek(termEnum->term(false));
+}
+
+void MultiTermDocs::seek( Term* tterm) {
+//Func - Resets the instance for a new search
+//Pre  - tterm != NULL
+//Post - The instance has been reset for a new search
+
+	CND_PRECONDITION(tterm != NULL, "tterm is NULL");
+	
+	//Assigning tterm is done as below for a reason
+	//The construction ensures that if seek is called from within
+	//MultiTermDocs with as argument this->term (seek(this->term)) that the assignment
+	//will succeed and all referencecounters represent the correct situation
+	
+	//Get a pointer from tterm and increase its reference counter
+	Term *TempTerm = _CL_POINTER(tterm);
+	
+	//Finialize term to ensure we decrease the reference counter of the instance which term points to
+	_CLDECDELETE(term);
+	
+	//Assign TempTerm to term
+	term = TempTerm;
+	
+	base = 0;
+	pointer = 0;
+	current = NULL;
+}
+
+bool MultiTermDocs::next() {
+	if (current != NULL && current->next()) {
+	  return true;
+	} else if (pointer < subReadersLength) {
+	  base = starts[pointer];
+	  current = termDocs(pointer++);
+	  return next();
+	} else
+	  return false;
+}
+
+int32_t MultiTermDocs::read(int32_t* docs, int32_t* freqs, int32_t length) {
+	while (true) {
+	  while (current == NULL) {
+	    if (pointer < subReadersLength) {		  // try next segment
+	      base = starts[pointer];
+	      current = termDocs(pointer++);
+	    } else {
+	      return 0;
+	    }
+	  }
+	  int32_t end = current->read(docs, freqs,length);
+	  if (end == 0) {				  // none left in segment
+	    current = NULL;
+	  } else {					  // got some
+	    int32_t b = base;			  // adjust doc numbers
+	    for (int32_t i = 0; i < end; i++)
+	      docs[i] += b;
+	    return end;
+	  }
+	}
+}
+
+bool MultiTermDocs::skipTo(const int32_t target) {
+	do {
+	  if (!next())
+	    return false;
+	} while (target > doc());
+	return true;
+}
+
+void MultiTermDocs::close() {
+//Func - Closes all MultiTermDocs managed by this instance
+//Pre  - true
+//Post - All the MultiTermDocs have been closed
+
+
+	//Check if readerTermDocs is valid
+	if (readerTermDocs){
+        TermDocs* curTD = NULL;
+        //iterate through the readerTermDocs array
+        for (int32_t i = 0; i < subReadersLength; i++) {
+            //Retrieve the i-th TermDocs instance
+            curTD = readerTermDocs[i];
+            
+            //Check if it is a valid pointer
+            if (curTD != NULL) {
+                //Close it
+                curTD->close();
+                _CLDELETE(curTD);
+            }
+        }
+        
+        _CLDELETE_ARRAY(readerTermDocs);
+	}
+	
+	//current previously pointed to a member of readerTermDocs; ensure that
+	//it doesn't now point to invalid memory.
+	current = NULL;
+	base          = 0;
+	pointer       = 0;
+	
+	_CLDECDELETE(term);
+}
+
+TermDocs* MultiTermDocs::termDocs(const IndexReader* reader) const {
+	TermDocs* ret = reader->termDocs();
+	return ret;
+}
+
+TermDocs* MultiTermDocs::termDocs(const int32_t i) const {
+	if (term == NULL)
+	  return NULL;
+	TermDocs* result = readerTermDocs[i];
+	if (result == NULL){
+	  readerTermDocs[i] = termDocs(subReaders[i]);
+	  result = readerTermDocs[i];
+	}
+	result->seek(term);
+	
+	return result;
+}
+
+
+MultiTermEnum::MultiTermEnum(
+  IndexReader** subReaders, const int32_t *starts, const Term* t){
+//Func - Constructor
+//       Opens all enumerations of all readers
+//Pre  - readers != NULL and contains an array of IndexReader instances each responsible for
+//       reading a single segment
+//       subReadersLength >= 0 and represents the number of readers in the readers array
+//       starts is an array of
+//Post - An instance of has been created
+
+//Pre  - if readers is NULL then subReadersLength must be 0 else if readers != NULL then subReadersLength > 0
+//       s != NULL
+//Post - The instance has been created
+	
+	int32_t subReadersLength = 0;
+	if ( subReaders != NULL ){
+		while ( subReaders[subReadersLength] != NULL )
+			subReadersLength++;
+	}
+	CND_PRECONDITION(starts != NULL,"starts is NULL");
+	
+	//Temporary variables
+	IndexReader*   reader    = NULL;
+	TermEnum* termEnum  = NULL;
+	SegmentMergeInfo* smi      = NULL;
+	_docFreq = 0;
+	_term = NULL;
+	queue                      = _CLNEW SegmentMergeQueue(subReadersLength);
+	
+	CND_CONDITION (queue != NULL, "Could not allocate memory for queue");
+	
+	//iterate through all the readers
+	for ( int32_t i=0;i<subReadersLength;i++ ) {
+		//Get the i-th reader
+		reader = subReaders[i];
+		
+		//Check if the enumeration must start from term t
+		if (t != NULL) {
+			//termEnum is an enumeration of terms starting at or after the named term t
+			termEnum = reader->terms(t);
+		}else{
+			//termEnum is an enumeration of all the Terms and TermInfos in the set.
+			termEnum = reader->terms();
+		}
+
+		//Instantiate an new SegmentMerginfo
+		smi = _CLNEW SegmentMergeInfo(starts[i], termEnum, reader);
+		
+		// Note that in the call termEnum->getTerm(false) below false is required because
+		// otherwise a reference is leaked. By passing false getTerm is
+		// ordered to return an unowned reference instead. (Credits for DSR)
+		if (t == NULL ? smi->next() : termEnum->term(false) != NULL){
+			// initialize queue
+			queue->put(smi);
+		} else{
+			//Close the SegmentMergeInfo
+			smi->close();
+			//And have it deleted
+			_CLDELETE(smi);
+		}
+	}
+	
+	//Check if the queue has elements
+	if (t != NULL && queue->size() > 0) {
+		next();
+	}
+}
+
+MultiTermEnum::~MultiTermEnum(){
+//Func - Destructor
+//Pre  - true
+//Post - All the resource have been freed and the instance has been deleted
+
+	//Close the enumeration
+	close();
+	
+	//Delete the queue
+	_CLDELETE(queue);
+}
+
+bool MultiTermEnum::next(){
+//Func - Move the current term to the next in the set of enumerations
+//Pre  - true
+//Post - Returns true if term has been moved to the next in the set of enumerations
+//       Returns false if this was not possible
+
+	SegmentMergeInfo* top = queue->top();
+	if (top == NULL) {
+	    _CLDECDELETE(_term); 
+	    _term = NULL;
+	    return false;
+	}
+	
+	//The getTerm method requires the client programmer to indicate whether he
+	// owns the returned reference, so we can discard ours
+	// right away.
+	_CLDECDELETE(_term); 
+	
+	//Assign term the term of top and make sure the reference counter is increased
+	_term = _CL_POINTER(top->term);
+	_docFreq = 0;
+	
+	//Find the next term
+	while (top != NULL && _term->compareTo(top->term) == 0) {
+		//don't delete, this is the top
+		queue->pop(); 
+		// increment freq
+		_docFreq += top->termEnum->docFreq();	  
+		if (top->next()){
+			// restore queue
+			queue->put(top);				  
+		}else{
+			// done with a segment
+			top->close();				  
+			_CLDELETE(top);
+		}
+		top = queue->top();
+	}
+	
+	return true;
+}
+
+
+Term* MultiTermEnum::term() {
+//Func - Returns the current term of the set of enumerations
+//Pre  - pointer is true or false and indicates if the reference counter
+//       of term must be increased or not
+//       next() must have been called once!
+//Post - pointer = true -> term has been returned with an increased reference counter
+//       pointer = false -> term has been returned
+
+	return _CL_POINTER(_term);
+}
+
+Term* MultiTermEnum::term(bool pointer) {
+  	if ( pointer )
+    	return _CL_POINTER(_term);
+    else
+    	return _term;
+}
+
+int32_t MultiTermEnum::docFreq() const {
+//Func - Returns the document frequency of the current term in the set
+//Pre  - termInfo != NULL
+//       next() must have been called once
+//Post  - The document frequency of the current enumerated term has been returned
+
+  return _docFreq;
+}
+
+
+void MultiTermEnum::close() {
+//Func - Closes the set of enumerations in the queue
+//Pre  - queue holds a valid reference to a SegmentMergeQueue
+//Post - The queue has been closed all SegmentMergeInfo instance have been deleted by
+//       the closing of the queue
+//       term has been finalized and reset to NULL
+
+	// Needed when this enumeration hasn't actually been exhausted yet
+	_CLDECDELETE(_term);
+	
+	//Close the queue This will destroy all SegmentMergeInfo instances!
+	queue->close();
+
+}
+
+
+
+
+
+MultiTermPositions::MultiTermPositions(IndexReader** r, const int32_t* s){
+//Func - Constructor
+//Pre  - if r is NULL then rLen must be 0 else if r != NULL then rLen > 0
+//       s != NULL
+//Post - The instance has been created
+
+	subReaders       = r;
+	subReadersLength    = 0;
+	if ( subReaders != NULL ){
+		while ( subReaders[subReadersLength] != NULL )
+		subReadersLength ++ ;
+	}
+	
+	CND_PRECONDITION(s != NULL, "s is NULL");
+	
+	starts        = s;
+	base          = 0;
+	pointer       = 0;
+	current       = NULL;
+	term          = NULL;
+	
+	readerTermDocs   = NULL;
+
+	//Check if there are readers
+	if(subReaders != NULL && subReadersLength > 0){
+		readerTermDocs = (TermDocs**)_CL_NEWARRAY(SegmentTermPositions*,subReadersLength);
+		
+		CND_CONDITION(readerTermDocs != NULL,"No memory could be allocated for readerTermDocs");
+		
+		//Initialize the readerTermDocs pointer array
+		for ( int32_t i=0;i<subReadersLength;i++){
+			readerTermDocs[i]=NULL;
+		}
+	}
+}
+
+
+TermDocs* MultiTermPositions::__asTermDocs(){
+  return (TermDocs*) this;
+}
+TermPositions* MultiTermPositions::__asTermPositions(){
+  return (TermPositions*) this;
+}
+
+
+TermDocs* MultiTermPositions::termDocs(const IndexReader* reader) const {
+// Here in the MultiTermPositions class, we want this->current to always
+// be a SegmentTermPositions rather than merely a SegmentTermDocs.
+// To that end, we override the termDocs(IndexReader&) method to produce
+// a SegmentTermPositions via the underlying reader's termPositions method
+// rather merely producing a SegmentTermDocs via the reader's termDocs
+// method.
+
+	TermPositions* tp = reader->termPositions();
+	TermDocs* ret = tp->__asTermDocs();
+	
+	CND_CONDITION(ret != NULL,
+	    "Dynamic downcast in MultiTermPositions::termDocs from"
+	    " TermPositions to TermDocs failed."
+	  );
+	return ret;
+	}
+	
+int32_t MultiTermPositions::nextPosition() {
+	//Func -
+	//Pre  - current != NULL
+	//Post -
+	CND_PRECONDITION(current != NULL,"current is NULL");
+	
+	TermPositions* curAsTP = current->__asTermPositions();
+	
+	CND_CONDITION(curAsTP != NULL,
+	    "Dynamic downcast in MultiTermPositions::nextPosition from"
+	    " SegmentTermDocs to TermPositions failed."
+	)
+	return curAsTP->nextPosition();
+}
+
+
+CL_NS_END