diff options
Diffstat (limited to '3rdparty/clucene/src/CLucene/util')
30 files changed, 4278 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/util/Arrays.h b/3rdparty/clucene/src/CLucene/util/Arrays.h new file mode 100644 index 000000000..ba60c5638 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/Arrays.h @@ -0,0 +1,164 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_util_Arrays_ +#define _lucene_util_Arrays_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "VoidList.h" + +CL_NS_DEF(util) + class Arrays{ + public: + template<typename _type> + class _Arrays { + protected: + //used by binarySearch to check for equality + virtual bool equals(_type a,_type b) const = 0; + virtual int32_t compare(_type a,_type b) const = 0; + public: + virtual ~_Arrays(){ + } + + void sort(_type* a, int32_t alen, int32_t fromIndex, int32_t toIndex) const{ + CND_PRECONDITION(fromIndex < toIndex,"fromIndex >= toIndex"); + CND_PRECONDITION(fromIndex >= 0,"fromIndex < 0"); + + // First presort the array in chunks of length 6 with insertion + // sort. A mergesort would give too much overhead for this length. + for (int32_t chunk = fromIndex; chunk < toIndex; chunk += 6) + { + int32_t end = min(chunk + 6, toIndex); + for (int32_t i = chunk + 1; i < end; i++) + { + if (compare(a[i - 1], a[i]) > 0) + { + // not already sorted + int32_t j = i; + _type elem = a[j]; + do + { + a[j] = a[j - 1]; + j--; + } + while (j > chunk && compare(a[j - 1], elem) > 0); + a[j] = elem; + } + } + } + + int32_t len = toIndex - fromIndex; + // If length is smaller or equal 6 we are done. + if (len <= 6) + return; + + _type* src = a; + _type* dest = _CL_NEWARRAY(_type,alen); + _type* t = NULL; // t is used for swapping src and dest + + // The difference of the fromIndex of the src and dest array. + int32_t srcDestDiff = -fromIndex; + + // The merges are done in this loop + for (int32_t size = 6; size < len; size <<= 1) + { + for (int32_t start = fromIndex; start < toIndex; start += size << 1) + { + // mid is the start of the second sublist; + // end the start of the next sublist (or end of array). + int32_t mid = start + size; + int32_t end = min(toIndex, mid + size); + + // The second list is empty or the elements are already in + // order - no need to merge + if (mid >= end || compare(src[mid - 1], src[mid]) <= 0) + { + memcpy(dest + start + srcDestDiff, src+start, (end-start)*sizeof(_type)); + }// The two halves just need swapping - no need to merge + else if (compare(src[start], src[end - 1]) > 0) + { + memcpy(dest+end-size+srcDestDiff, src+start, size * sizeof(_type)); + memcpy(dest+start+srcDestDiff, src+mid, (end-mid) * sizeof(_type)); + + }else{ + // Declare a lot of variables to save repeating + // calculations. Hopefully a decent JIT will put these + // in registers and make this fast + int32_t p1 = start; + int32_t p2 = mid; + int32_t i = start + srcDestDiff; + + // The main merge loop; terminates as soon as either + // half is ended + while (p1 < mid && p2 < end) + { + dest[i++] = src[(compare(src[p1], src[p2]) <= 0 + ? p1++ : p2++)]; + } + + // Finish up by copying the remainder of whichever half + // wasn't finished. + if (p1 < mid) + memcpy(dest+i,src+p1, (mid-p1) * sizeof(_type)); + else + memcpy(dest+i,src+p2, (end-p2) * sizeof(_type)); + } + } + // swap src and dest ready for the next merge + t = src; + src = dest; + dest = t; + fromIndex += srcDestDiff; + toIndex += srcDestDiff; + srcDestDiff = -srcDestDiff; + } + + // make sure the result ends up back in the right place. Note + // that src and dest may have been swapped above, so src + // contains the sorted array. + if (src != a) + { + // Note that fromIndex == 0. + memcpy(a+srcDestDiff,src,toIndex * sizeof(_type)); + } + } + }; + }; + + template <typename _kt, typename _comparator, + typename class1, typename class2> + class CLListEquals: + public CL_NS_STD(binary_function)<class1*,class2*,bool> + { + typedef typename class1::const_iterator _itr1; + typedef typename class2::const_iterator _itr2; + public: + CLListEquals(){ + } + bool equals( class1* val1, class2* val2 ) const{ + static _comparator comp; + if ( val1 == val2 ) + return true; + size_t size = val1->size(); + if ( size != val2->size() ) + return false; + + _itr1 itr1 = val1->begin(); + _itr2 itr2 = val2->begin(); + while ( --size >= 0 ){ + if ( !comp(*itr1,*itr2) ) + return false; + itr1++; + itr2++; + } + return true; + } + }; +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/util/BitSet.cpp b/3rdparty/clucene/src/CLucene/util/BitSet.cpp new file mode 100644 index 000000000..3679bd120 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/BitSet.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +#include "CLucene/StdHeader.h" +#include "BitSet.h" +#include "CLucene/store/Directory.h" + +CL_NS_USE(store) +CL_NS_DEF(util) + +BitSet::BitSet(const BitSet& copy) + : _size(copy._size) + , _count(-1) +{ + int32_t len = (_size >> 3) + 1; + bits = _CL_NEWARRAY(uint8_t, len); + memcpy(bits, copy.bits, len); +} + +BitSet::BitSet(int32_t size) + : _size(size) + , _count(-1) +{ + int32_t len = (_size >> 3) + 1; + bits = _CL_NEWARRAY(uint8_t, len); + memset(bits, 0, len); +} + +BitSet::BitSet(CL_NS(store)::Directory* d, const QString& name) +{ + _count = -1; + CL_NS(store)::IndexInput* input = d->openInput(name); + try { + _size = input->readInt(); // read size + _count = input->readInt(); // read count + + bits = _CL_NEWARRAY(uint8_t,(_size >> 3) + 1); // allocate bits + input->readBytes(bits, (_size >> 3) + 1); // read bits + } _CLFINALLY ( + input->close(); + _CLDELETE(input ); + ); +} + +void BitSet::write(CL_NS(store)::Directory* d, const QString& name) +{ + CL_NS(store)::IndexOutput* output = d->createOutput(name); + try { + output->writeInt(size()); // write size + output->writeInt(count()); // write count + output->writeBytes(bits, (_size >> 3) + 1); // write bits + } _CLFINALLY ( + output->close(); + _CLDELETE(output); + ); +} + +BitSet::~BitSet() +{ + _CLDELETE_ARRAY(bits); +} + +void BitSet::set(int32_t bit, bool val) +{ + if (val) + bits[bit >> 3] |= 1 << (bit & 7); + else + bits[bit >> 3] &= ~(1 << (bit & 7)); + + _count = -1; +} + +int32_t BitSet::size() const +{ + return _size; +} + +int32_t BitSet::count() +{ + // if the BitSet has been modified + if (_count == -1) { + static const uint8_t BYTE_COUNTS[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; + + int32_t c = 0; + int32_t end = (_size >> 3) + 1; + for (int32_t i = 0; i < end; i++) + c += BYTE_COUNTS[bits[i]]; // sum bits per uint8_t + _count = c; + } + return _count; +} + +BitSet* BitSet::clone() const +{ + return _CLNEW BitSet(*this); +} + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/util/BitSet.h b/3rdparty/clucene/src/CLucene/util/BitSet.h new file mode 100644 index 000000000..e93847e98 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/BitSet.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +#ifndef _lucene_util_BitSet_ +#define _lucene_util_BitSet_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include <QtCore/QString> + +#include "CLucene/store/Directory.h" + +CL_NS_DEF(util) + +class BitSet : LUCENE_BASE +{ +public: + // Create a bitset with the specified size + BitSet (int32_t size); + BitSet(CL_NS(store)::Directory* d, const QString& name); + void write(CL_NS(store)::Directory* d, const QString& name); + + // Destructor for the bit set + ~BitSet(); + + // get the value of the specified bit + inline bool get(const int32_t bit) const + { + return (bits[bit >> 3] & (1 << (bit & 7))) != 0; + } + + // set the value of the specified bit + void set(int32_t bit, bool val = true); + + ///returns the size of the bitset + int32_t size() const; + + // Returns the total number of one bits in this BitSet. This is + // efficiently computed and cached, so that, if the BitSet is not changed, + // no recomputation is done for repeated calls. + int32_t count(); + BitSet *clone() const; + +protected: + BitSet(const BitSet& copy); + +private: + int32_t _size; + int32_t _count; + uint8_t *bits; +}; + +CL_NS_END + +#endif diff --git a/3rdparty/clucene/src/CLucene/util/Equators.cpp b/3rdparty/clucene/src/CLucene/util/Equators.cpp new file mode 100644 index 000000000..e112bd234 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/Equators.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +#include "CLucene/StdHeader.h" +#include "Equators.h" + +CL_NS_DEF(util) + +bool Equals::Int32::operator()(const int32_t val1, const int32_t val2) const +{ + return (val1)==(val2); +} + +bool Equals::Char::operator()(const char* val1, const char* val2) const +{ + if ( val1 == val2 ) + return true; + return (strcmp(val1, val2) == 0); +} + +#ifdef _UCS2 +bool Equals::WChar::operator()(const wchar_t* val1, const wchar_t* val2) const +{ + if (val1 == val2) + return true; + return (_tcscmp(val1, val2) == 0); +} +#endif + +bool Equals::Qstring::operator()(const QString& val1, const QString& val2) const +{ + return (val1 == val2); +} + +/////////////////////////////////////////////////////////////////////////////// +// Comparors +/////////////////////////////////////////////////////////////////////////////// + +int32_t Compare::Int32::getValue() const +{ + return value; +} + +Compare::Int32::Int32(int32_t val) +{ + value = val; +} + +Compare::Int32::Int32() +{ + value = 0; +} + +int32_t Compare::Int32::compareTo(void* o) +{ + try { + Int32* other = (Int32*)o; + if (value == other->value) + return 0; + // Returns just -1 or 1 on inequality; doing math might overflow. + return value > other->value ? 1 : -1; + } catch(...) { + _CLTHROWA(CL_ERR_Runtime, "Couldnt compare types"); + } +} + +bool Compare::Int32::operator()(int32_t t1, int32_t t2) const +{ + return t1 > t2 ? true : false; +} + +size_t Compare::Int32::operator()(int32_t t) const +{ + return t; +} + +qreal Compare::Float::getValue() const +{ + return value; +} + +Compare::Float::Float(qreal val) +{ + value = val; +} + +int32_t Compare::Float::compareTo(void* o) +{ + try { + Float* other = (Float*)o; + if (value == other->value) + return 0; + // Returns just -1 or 1 on inequality; doing math might overflow. + return value > other->value ? 1 : -1; + } catch(...) { + _CLTHROWA(CL_ERR_Runtime,"Couldnt compare types"); + } +} + +bool Compare::Char::operator()(const char* val1, const char* val2) const +{ + if ( val1 == val2) + return false; + return (strcmp(val1, val2) < 0); +} + +size_t Compare::Char::operator()(const char* val1) const +{ + return CL_NS(util)::Misc::ahashCode(val1); +} + +#ifdef _UCS2 +bool Compare::WChar::operator()(const wchar_t* val1, const wchar_t* val2) const +{ + if ( val1==val2) + return false; + return (_tcscmp(val1, val2) < 0); +} + +size_t Compare::WChar::operator()(const wchar_t* val1) const +{ + return CL_NS(util)::Misc::whashCode(val1); +} +#endif + +const TCHAR* Compare::TChar::getValue() const +{ + return s; +} + +Compare::TChar::TChar() +{ + s = NULL; +} + +Compare::TChar::TChar(const TCHAR* str) +{ + this->s = str; +} + +int32_t Compare::TChar::compareTo(void* o) +{ + try { + TChar* os = (TChar*)o; + return _tcscmp(s, os->s); + } catch(...) { + _CLTHROWA(CL_ERR_Runtime,"Couldnt compare types"); + } + +} + +bool Compare::TChar::operator()(const TCHAR* val1, const TCHAR* val2) const +{ + if (val1 == val2) + return false; + + return (_tcscmp(val1, val2) < 0); +} + +size_t Compare::TChar::operator()(const TCHAR* val1) const +{ + return CL_NS(util)::Misc::thashCode(val1); +} + +bool Compare::Qstring::operator()(const QString& val1, const QString& val2) const +{ + return (val1 < val2); +} + +size_t Compare::Qstring::operator ()(const QString& val1) const +{ + return CL_NS(util)::Misc::qhashCode(val1); +} + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/util/Equators.h b/3rdparty/clucene/src/CLucene/util/Equators.h new file mode 100644 index 000000000..11fcb0eaf --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/Equators.h @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +#ifndef _lucene_util_Equators_ +#define _lucene_util_Equators_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include <QtCore/QString> + +//#ifdef QT_LINUXBASE +// we are going to use qreal now, we basically maintain our own clucene anyway +//// LSB doesn't define float_t - see http://bugs.linuxbase.org/show_bug.cgi?id=2374 +//typedef float float_t; +//#endif + +CL_NS_DEF(util) + +/////////////////////////////////////////////////////////////////////////////// +// Equators +/////////////////////////////////////////////////////////////////////////////// + +class Equals{ +public: + class Int32:public CL_NS_STD(binary_function)<const int32_t*,const int32_t*,bool> + { + public: + bool operator()( const int32_t val1, const int32_t val2 ) const; + }; + + class Char:public CL_NS_STD(binary_function)<const char*,const char*,bool> + { + public: + bool operator()( const char* val1, const char* val2 ) const; + }; +#ifdef _UCS2 + class WChar: public CL_NS_STD(binary_function)<const wchar_t*,const wchar_t*,bool> + { + public: + bool operator()( const wchar_t* val1, const wchar_t* val2 ) const; + }; + class TChar: public WChar{ + }; +#else + class TChar: public Char{ + }; +#endif + + template<typename _cl> + class Void:public CL_NS_STD(binary_function)<const void*,const void*,bool> + { + public: + bool operator()( _cl* val1, _cl* val2 ) const{ + return val1==val2; + } + }; + + class Qstring : public CL_NS_STD(binary_function)<const QString&, const QString&, bool> + { + public: + bool operator() (const QString& val1, const QString& val2) const; + }; +}; + + +/////////////////////////////////////////////////////////////////////////////// +// Comparors +/////////////////////////////////////////////////////////////////////////////// + +class Comparable : LUCENE_BASE +{ +public: + virtual ~Comparable(){ + } + + virtual int32_t compareTo(void* o) = 0; +}; + +/** @internal */ +class Compare{ +public: + class _base + { // traits class for hash containers + public: + enum + { // parameters for hash table + bucket_size = 4, // 0 < bucket_size + min_buckets = 8 + }; // min_buckets = 2 ^^ N, 0 < N + + _base() + { + } + }; + + class Int32:public _base, public Comparable{ + int32_t value; + public: + int32_t getValue() const; + Int32(int32_t val); + Int32(); + int32_t compareTo(void* o); + bool operator()( int32_t t1, int32_t t2 ) const; + size_t operator()( int32_t t ) const; + }; + + + class Float:public Comparable{ + qreal value; + public: + qreal getValue() const; + Float(qreal val); + int32_t compareTo(void* o); + }; + + + class Char: public _base //<char*> + { + public: + bool operator()( const char* val1, const char* val2 ) const; + size_t operator()( const char* val1) const; + }; + +#ifdef _UCS2 + class WChar: public _base //<wchar_t*> + { + public: + bool operator()( const wchar_t* val1, const wchar_t* val2 ) const; + size_t operator()( const wchar_t* val1) const; + }; +#endif + + class TChar: public _base, public Comparable{ + const TCHAR* s; + public: + const TCHAR* getValue() const; + + TChar(); + TChar(const TCHAR* str); + int32_t compareTo(void* o); + bool operator()( const TCHAR* val1, const TCHAR* val2 ) const; + size_t operator()( const TCHAR* val1) const; + }; + + + template<typename _cl> + class Void:public _base //<const void*,const void*,bool> + { + public: + int32_t compareTo(_cl* o){ + if ( this == o ) + return o; + else + return this > o ? 1 : -1; + } + bool operator()( _cl* t1, _cl* t2 ) const{ + return t1 > t2 ? true : false; + } + size_t operator()( _cl* t ) const{ + return (size_t)t; + } + }; + + class Qstring : public _base + { + public: + bool operator() (const QString& val1, const QString& val2) const; + size_t operator() (const QString& val1) const; + }; +}; + +/////////////////////////////////////////////////////////////////////////////// +// allocators +/////////////////////////////////////////////////////////////////////////////// + +class Deletor +{ +public: + + template<typename _kt> + class Array{ + public: + static void doDelete(_kt* arr){ + _CLDELETE_LARRAY(arr); + } + }; + class tcArray{ + public: + static void doDelete(const TCHAR* arr){ + _CLDELETE_CARRAY(arr); + } + }; + class acArray{ + public: + static void doDelete(const char* arr){ + _CLDELETE_CaARRAY(arr); + } + }; + + class Unintern{ + public: + static void doDelete(TCHAR* arr); + }; + template<typename _kt> + class Object{ + public: + static void doDelete(_kt* obj){ + _CLLDELETE(obj); + } + }; + template<typename _kt> + class Void{ + public: + static void doDelete(_kt* obj){ + _CLVDELETE(obj); + } + }; + class Dummy{ + public: + static void doDelete(const void* nothing) + { + // TODO: remove all occurances where it hits this point + // CND_WARNING(false, "Deletor::Dummy::doDelete run, set deleteKey + // or deleteValue to false"); + } + }; + class DummyInt32{ + public: + static void doDelete(const int32_t nothing){ + } + }; + class DummyFloat{ + public: + static void doDelete(const qreal nothing){ + } + }; + template <typename _type> + class ConstNullVal{ + public: + static void doDelete(const _type nothing) + { + // TODO: remove all occurances where it hits this point + // CND_WARNING(false, "Deletor::Dummy::doDelete run, set deleteKey + // or deleteValue to false"); + } + }; + + template <typename _type> + class NullVal{ + public: + static void doDelete(_type nothing) + { + // TODO: remove all occurances where it hits this point + // CND_WARNING(false, "Deletor::Dummy::doDelete run, set deleteKey + // or deleteValue to false"); + } + }; + class DummyQString { + public: + static void doDelete(const QString& nothing) { + } + }; +}; +//////////////////////////////////////////////////////////////////////////////// + +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/util/FastCharStream.cpp b/3rdparty/clucene/src/CLucene/util/FastCharStream.cpp new file mode 100644 index 000000000..f9fbe9b10 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/FastCharStream.cpp @@ -0,0 +1,107 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "FastCharStream.h" + +#include "CLucene/util/Reader.h" + +CL_NS_DEF(util) + +const int32_t FastCharStream::maxRewindSize = LUCENE_MAX_WORD_LEN*2; + + FastCharStream::FastCharStream(Reader* reader): + pos(0), + rewindPos(0), + resetPos(0), + col(1), + line(1), + input(reader) + { + input->mark(maxRewindSize); + } + FastCharStream::~FastCharStream(){ + } + void FastCharStream::readChar(TCHAR &c) { + try{ + int32_t r = input->read(); + if ( r == -1 ) + input = NULL; + c = r; + }catch(CLuceneError& err){ + if ( err.number() == CL_ERR_IO ) + input = 0; + throw err; + } + } + int FastCharStream::GetNext() + { + // printf("getnext\n"); + if (input == 0 ) // end of file + { + _CLTHROWA(CL_ERR_IO,"warning : FileReader.GetNext : Read TCHAR over EOS."); + } + // this is rather inefficient + // implementing the functions from the java version of + // charstream will be much more efficient. + ++pos; + if ( pos > resetPos + maxRewindSize && rewindPos == 0) { + // move the marker one position (~expensive) + resetPos = pos-(maxRewindSize/2); + if ( resetPos != input->reset(resetPos) ) + _CLTHROWA(CL_ERR_IO,"Unexpected reset() result"); + input->mark(maxRewindSize); + input->skip((maxRewindSize/2) - 1); + } + TCHAR ch; + readChar(ch); + + if (input == NULL) { // eof + return -1; + } + if (rewindPos == 0) { + col += 1; + if(ch == '\n') { + line++; + col = 1; + } + } else { + rewindPos--; + } + return ch; + } + + void FastCharStream::UnGet(){ +// printf("UnGet \n"); + if (input == 0) + return; + if ( pos == 0 ) { + _CLTHROWA(CL_ERR_IO,"error : No character can be UnGet"); + } + rewindPos++; + + input->reset(pos-1); + pos--; + } + + int FastCharStream::Peek() { + int c = GetNext(); + UnGet(); + return c; + } + + bool FastCharStream::Eos() const { + return input==NULL; + } + + int32_t FastCharStream::Column() const { + return col; + } + + int32_t FastCharStream::Line() const { + return line; + } +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/util/FastCharStream.h b/3rdparty/clucene/src/CLucene/util/FastCharStream.h new file mode 100644 index 000000000..24e5b5612 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/FastCharStream.h @@ -0,0 +1,55 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_util_FastCharStream_ +#define _lucene_util_FastCharStream_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "CLucene/util/Reader.h" + +CL_NS_DEF(util) + + /** Ported implementation of the FastCharStream class. */ + class FastCharStream:LUCENE_BASE + { + static const int32_t maxRewindSize; + int32_t pos; + int32_t rewindPos; + int64_t resetPos; + int32_t col; + int32_t line; + // read character from stream return false on error + void readChar(TCHAR &); + public: + Reader* input; + + /// Initializes a new instance of the FastCharStream class LUCENE_EXPORT. + FastCharStream(Reader* reader); + ~FastCharStream(); + + /// Returns the next TCHAR from the stream. + int GetNext(); + + void UnGet(); + + /// Returns the current top TCHAR from the input stream without removing it. + int Peek(); + + + /// Returns <b>True</b> if the end of stream was reached. + bool Eos() const; + + /// Gets the current column. + int32_t Column() const; + + /// Gets the current line. + int32_t Line() const; + }; +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/util/Misc.cpp b/3rdparty/clucene/src/CLucene/util/Misc.cpp new file mode 100644 index 000000000..42e3fd0a8 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/Misc.cpp @@ -0,0 +1,295 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +#include "CLucene/StdHeader.h" +#include "Misc.h" + +#ifdef _CL_TIME_WITH_SYS_TIME +# include <sys/time.h> +# include <time.h> +#else +# if defined(_CL_HAVE_SYS_TIME_H) +# include <sys/time.h> +# else +# include <time.h> +# endif +#endif + +#ifdef _CL_HAVE_SYS_TIMEB_H +# include <sys/timeb.h> +#endif + +#ifdef UNDER_CE +#include <QTime> +#endif + +CL_NS_DEF(util) + +uint64_t Misc::currentTimeMillis() +{ +#ifndef UNDER_CE +#if defined(_CLCOMPILER_MSVC) || defined(__MINGW32__) || defined(__BORLANDC__) + struct _timeb tstruct; + _ftime(&tstruct); + + return (((uint64_t) tstruct.time) * 1000) + tstruct.millitm; +#else + struct timeval tstruct; + if (gettimeofday(&tstruct, NULL) < 0) { + _CLTHROWA(CL_ERR_Runtime,"Error in gettimeofday call."); + } + + return (((uint64_t) tstruct.tv_sec) * 1000) + tstruct.tv_usec / 1000; +#endif +#else //UNDER_CE + QT_USE_NAMESPACE + QTime t = QTime::currentTime(); + return t.second() * 1000 + t.msec(); +#endif //UNDER_CE +} + +// #pragma mark -- char related utils + +size_t Misc::ahashCode(const char* str) +{ + // Compute the hash code using a local variable to be reentrant. + size_t hashCode = 0; + while (*str != 0) + hashCode = hashCode * 31 + *str++; + return hashCode; +} + +size_t Misc::ahashCode(const char* str, size_t len) +{ + // Compute the hash code using a local variable to be reentrant. + size_t count = len; + size_t hashCode = 0; + for (size_t i = 0; i < count; i++) + hashCode = hashCode * 31 + *str++; + return hashCode; +} + +char* Misc::ajoin(const char* a, const char* b, const char* c, const char* d, + const char* e, const char* f) +{ +#define aLEN(x) (x == NULL ? 0 : strlen(x)) + const size_t totalLen = aLEN(a) + aLEN(b) + aLEN(c) + aLEN(d) + aLEN(e) + + aLEN(f) + sizeof(char); /* Space for terminator. */ + + char* buf = _CL_NEWARRAY(char, totalLen); + buf[0] = 0; + if (a != NULL) + strcat(buf, a); + + if (b != NULL) + strcat(buf, b); + + if (c != NULL) + strcat(buf, c); + + if (d != NULL) + strcat(buf, d); + + if (e != NULL) + strcat(buf, e); + + if (f != NULL) + strcat(buf, f); + + return buf; +} + +char* Misc::segmentname(const char* segment, const char* ext, int32_t x) +{ + CND_PRECONDITION(ext != NULL, "ext is NULL"); + + char* buf = _CL_NEWARRAY(char, CL_MAX_PATH); + if (x == -1) + _snprintf(buf, CL_MAX_PATH, "%s%s", segment, ext); + else + _snprintf(buf, CL_MAX_PATH, "%s%s%d", segment, ext, x); + return buf; +} + +void Misc::segmentname(char* buffer, int32_t bufferLen, const char* segment, + const char* ext, int32_t x) +{ + CND_PRECONDITION(buffer != NULL, "buffer is NULL"); + CND_PRECONDITION(segment != NULL, "segment is NULL"); + CND_PRECONDITION(ext != NULL, "extention is NULL"); + + if (x == -1) + _snprintf(buffer, bufferLen, "%s%s", segment, ext); + else + _snprintf(buffer, bufferLen, "%s%s%d", segment, ext, x); +} + +// #pragma mark -- qt related utils + +size_t Misc::qhashCode(const QString& str) +{ + size_t hashCode = 0; + for (int i = 0; i < str.count(); ++i) + hashCode = hashCode * 31 + str.at(i).unicode(); + return hashCode; +} + +size_t Misc::qhashCode(const QString& str, size_t len) +{ + size_t count = len; + size_t hashCode = 0; + for (size_t i = 0; i < count; ++i) + hashCode = hashCode * 31 + str.at(i).unicode(); + return hashCode; +} + +QString Misc::qjoin(const QString &a, const QString &b, const QString &c, + const QString &d, const QString &e, const QString &f) +{ + QString buffer; + + if (!a.isNull() && !a.isEmpty()) + buffer.append(a); + + if (!b.isNull() && !b.isEmpty()) + buffer.append(b); + + if (!c.isNull() && !c.isEmpty()) + buffer.append(c); + + if (!d.isNull() && !d.isEmpty()) + buffer.append(d); + + if (!e.isNull() && !e.isEmpty()) + buffer.append(e); + + if (!f.isNull() && !f.isEmpty()) + buffer.append(f); + + return buffer; +} + +QString Misc::segmentname(const QString& segment, const QString& ext, int32_t x) +{ + CND_PRECONDITION(!ext.isEmpty(), "ext is NULL"); + + if (x == -1) + return QString(segment + ext); + + QString buf(QLatin1String("%1%2%3")); + return buf.arg(segment).arg(ext).arg(x); +} + +void Misc::segmentname(QString& buffer, int32_t bufferLen, + const QString& segment, const QString& ext, int32_t x) +{ + CND_PRECONDITION(!segment.isEmpty(), "segment is NULL"); + CND_PRECONDITION(!ext.isEmpty(), "extention is NULL"); + + buffer = segment + ext; + if (x != -1) + buffer += QString::number(x); +} + +// #pragma mark -- TCHAR related utils + +int32_t Misc::stringDifference(const TCHAR* s1, int32_t len1, const TCHAR* s2, + int32_t len2) +{ + int32_t len = len1 < len2 ? len1 : len2; + for (int32_t i = 0; i < len; i++) + if (s1[i] != s2[i]) + return i; + return len; +} + +/* DSR:CL_BUG: (See comment for join method in Misc.h): */ +TCHAR* Misc::join (const TCHAR* a, const TCHAR* b, const TCHAR* c, + const TCHAR* d, const TCHAR* e, const TCHAR* f) +{ +#define LEN(x) (x == NULL ? 0 : _tcslen(x)) + const size_t totalLen = LEN(a) + LEN(b) + LEN(c) + LEN(d) + LEN(e) + LEN(f) + + sizeof(TCHAR); /* Space for terminator. */ + + TCHAR* buf = _CL_NEWARRAY(TCHAR, totalLen); + buf[0] = 0; + if (a != NULL) + _tcscat(buf, a); + + if (b != NULL) + _tcscat(buf, b); + + if (c != NULL) + _tcscat(buf, c); + + if (d != NULL) + _tcscat(buf, d); + + if (e != NULL) + _tcscat(buf, e); + + if (f != NULL) + _tcscat(buf, f); + + return buf; +} + +#ifdef _UCS2 + +size_t Misc::whashCode(const wchar_t* str) +{ + // Compute the hash code using a local variable to be reentrant. + size_t hashCode = 0; + while (*str != 0) + hashCode = hashCode * 31 + *str++; + return hashCode; +} + +size_t Misc::whashCode(const wchar_t* str, size_t len) +{ + // Compute the hash code using a local variable to be reentrant. + size_t count = len; + size_t hashCode = 0; + for (size_t i = 0; i < count; i++) + hashCode = hashCode * 31 + *str++; + return hashCode; +} + +char* Misc::_wideToChar(const wchar_t* s CL_FILELINEPARAM) +{ + size_t len = _tcslen(s); + char* msg = _CL_NEWARRAY(char, len + 1); + _cpywideToChar(s, msg, len + 1); + return msg; +} + +void Misc::_cpywideToChar(const wchar_t* s, char* d, size_t len) +{ + size_t sLen = wcslen(s); + for (uint32_t i = 0; i < len && i < sLen + 1; i++) + d[i] = LUCENE_OOR_CHAR(s[i]); +} + +wchar_t* Misc::_charToWide(const char* s CL_FILELINEPARAM) +{ + size_t len = strlen(s); + wchar_t* msg = _CL_NEWARRAY(wchar_t, len + 1); + _cpycharToWide(s, msg, len + 1); + return msg; +} + +void Misc::_cpycharToWide(const char* s, wchar_t* d, size_t len) +{ + size_t sLen = strlen(s); + for (uint32_t i = 0; i < len && i < sLen + 1; i++) + d[i] = s[i]; +} + +#endif + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/util/Misc.h b/3rdparty/clucene/src/CLucene/util/Misc.h new file mode 100644 index 000000000..561c6e4d9 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/Misc.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +#ifndef _lucene_util_Misc_H +#define _lucene_util_Misc_H + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include <QtCore/QString> + +CL_NS_DEF(util) + +class Misc +{ +public: + static uint64_t currentTimeMillis(); + + static size_t ahashCode(const char* str); + static size_t ahashCode(const char* str, size_t len); + static char* ajoin(const char* a, const char* b, const char* c = NULL, + const char* d = NULL, const char* e = NULL, const char* f = NULL); + static char* segmentname(const char* segment, const char* ext, int32_t x = -1); + static void segmentname(char* buffer, int32_t bufferLen, const char* segment, + const char* ext, int32_t x = -1); + + static size_t qhashCode(const QString& str); + static size_t qhashCode(const QString& str, size_t len); + static QString qjoin(const QString& a, const QString& b, + const QString& c = QString(), const QString& d = QString(), + const QString& e = QString(), const QString& f = QString()); + static QString segmentname(const QString& segment, const QString& ext, + int32_t x = -1 ); + static void segmentname(QString& buffer, int32_t bufferLen, + const QString& Segment, const QString& ext, int32_t x = -1); + + // Compares two strings, character by character, and returns the + // first position where the two strings differ from one another. + // + // @param s1 The first string to compare + // @param s1Len The length of the first string to compare + // @param s2 The second string to compare + // @param s2Len The length of the second string to compare + // @return The first position where the two strings differ. + static int32_t stringDifference(const TCHAR* s1, int32_t s1Len, + const TCHAR* s2, int32_t s2Len); + static TCHAR* join (const TCHAR* a, const TCHAR* b, const TCHAR* c = NULL, + const TCHAR* d = NULL, const TCHAR* e = NULL, const TCHAR* f = NULL ); + +#ifdef _UCS2 + static size_t whashCode(const wchar_t* str); + static size_t whashCode(const wchar_t* str, size_t len); + +# define thashCode whashCode + + static char* _wideToChar(const wchar_t* s CL_FILELINEPARAM); + static void _cpywideToChar(const wchar_t* s, char* d, size_t len); + + static wchar_t* _charToWide(const char* s CL_FILELINEPARAM); + static void _cpycharToWide(const char* s, wchar_t* d, size_t len); + +#else +# define thashCode ahashCode +#endif +}; + +CL_NS_END + +#endif diff --git a/3rdparty/clucene/src/CLucene/util/PriorityQueue.h b/3rdparty/clucene/src/CLucene/util/PriorityQueue.h new file mode 100644 index 000000000..45649ee7f --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/PriorityQueue.h @@ -0,0 +1,177 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_util_PriorityQueue_ +#define _lucene_util_PriorityQueue_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif +CL_NS_DEF(util) + +// A PriorityQueue maintains a partial ordering of its elements such that the +// least element can always be found in constant time. Put()'s and pop()'s +// require log(size) time. +template <class _type,typename _valueDeletor> class PriorityQueue:LUCENE_BASE { + private: + _type* heap; //(was object[]) + size_t _size; + bool dk; + size_t maxSize; + + void upHeap(){ + size_t i = _size; + _type node = heap[i]; // save bottom node (WAS object) + int32_t j = ((uint32_t)i) >> 1; + while (j > 0 && lessThan(node,heap[j])) { + heap[i] = heap[j]; // shift parents down + i = j; + j = ((uint32_t)j) >> 1; + } + heap[i] = node; // install saved node + } + void downHeap(){ + size_t i = 1; + _type node = heap[i]; // save top node + size_t j = i << 1; // find smaller child + size_t k = j + 1; + if (k <= _size && lessThan(heap[k], heap[j])) { + j = k; + } + while (j <= _size && lessThan(heap[j],node)) { + heap[i] = heap[j]; // shift up child + i = j; + j = i << 1; + k = j + 1; + if (k <= _size && lessThan(heap[k], heap[j])) { + j = k; + } + } + heap[i] = node; // install saved node + } + + protected: + PriorityQueue(){ + this->_size = 0; + this->dk = false; + this->heap = NULL; + this->maxSize = 0; + } + + // Determines the ordering of objects in this priority queue. Subclasses + // must define this one method. + virtual bool lessThan(_type a, _type b)=0; + + // Subclass constructors must call this. + void initialize(const int32_t maxSize, bool deleteOnClear){ + _size = 0; + dk = deleteOnClear; + int32_t heapSize = maxSize + 1; + heap = _CL_NEWARRAY(_type,heapSize); + this->maxSize = maxSize; + } + + public: + virtual ~PriorityQueue(){ + clear(); + _CLDELETE_ARRAY(heap); + } + + /** + * Adds an Object to a PriorityQueue in log(size) time. + * If one tries to add more objects than maxSize from initialize + * a RuntimeException (ArrayIndexOutOfBound) is thrown. + */ + void put(_type element){ + if ( _size>=maxSize ) + _CLTHROWA(CL_ERR_IndexOutOfBounds,"add is out of bounds"); + + ++_size; + heap[_size] = element; + upHeap(); + } + + /** + * Adds element to the PriorityQueue in log(size) time if either + * the PriorityQueue is not full, or not lessThan(element, top()). + * @param element + * @return true if element is added, false otherwise. + */ + bool insert(_type element){ + if(_size < maxSize){ + put(element); + return true; + }else if(_size > 0 && !lessThan(element, top())){ + if ( dk ){ + _valueDeletor::doDelete(heap[1]); + } + heap[1] = element; + adjustTop(); + return true; + }else + return false; + } + + /** + * Returns the least element of the PriorityQueue in constant time. + */ + _type top(){ + if (_size > 0) + return heap[1]; + else + return NULL; + } + + /** Removes and returns the least element of the PriorityQueue in log(size) + * time. + */ + _type pop(){ + if (_size > 0) { + _type result = heap[1]; // save first value + heap[1] = heap[_size]; // move last to first + + heap[_size] = (_type)0; // permit GC of objects + --_size; + downHeap(); // adjust heap + return result; + } else + return (_type)NULL; + } + + /**Should be called when the object at top changes values. Still log(n) + worst case, but it's at least twice as fast to <pre> + { pq.top().change(); pq.adjustTop(); } + </pre> instead of <pre> + { o = pq.pop(); o.change(); pq.push(o); } + </pre> + */ + void adjustTop(){ + downHeap(); + } + + + /** + * Returns the number of elements currently stored in the PriorityQueue. + */ + size_t size(){ + return _size; + } + + /** + * Removes all entries from the PriorityQueue. + */ + void clear(){ + for (size_t i = 1; i <= _size; ++i){ + if ( dk ){ + _valueDeletor::doDelete(heap[i]); + } + } + _size = 0; + } + }; + +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/util/Reader.cpp b/3rdparty/clucene/src/CLucene/util/Reader.cpp new file mode 100644 index 000000000..1ce97106d --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/Reader.cpp @@ -0,0 +1,186 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "Reader.h" + +CL_NS_DEF(util) + +StringReader::StringReader ( const TCHAR* value ): + Reader(NULL,true){ + reader = new jstreams::StringReader<TCHAR>(value); +} +StringReader::StringReader ( const TCHAR* value, const int32_t length ): + Reader(NULL,true){ + reader = new jstreams::StringReader<TCHAR>(value,length); +} +StringReader::StringReader ( const TCHAR* value, const int32_t length, bool copyData ): + Reader(NULL,true){ + reader = new jstreams::StringReader<TCHAR>(value,length, copyData); +} +StringReader::~StringReader(){ +} + + +FileReader::FileReader ( const char* path, const char* enc, + const int32_t cachelen, const int32_t /*cachebuff*/ ): + Reader(NULL, true) +{ + this->input = new jstreams::FileInputStream(path, cachelen); + this->reader = new SimpleInputStreamReader(this->input,enc); //(this is a jstream object) +} + +FileReader::~FileReader (){ + if (input) + delete input; +} +int32_t FileReader::read(const TCHAR*& start, int32_t _min, int32_t _max) { + return reader->read(start, _min, _max); +} +int64_t FileReader::mark(int32_t readlimit) { + return reader->mark(readlimit); +} +int64_t FileReader::reset(int64_t newpos) { + return reader->reset(newpos); +} + + + +SimpleInputStreamReader::SimpleInputStreamReader(jstreams::StreamBase<char> *i, const char* enc) +{ + finishedDecoding = false; + input = i; + charbuf.setSize(262); + + if ( strcmp(enc,"ASCII")==0 ) + encoding = ASCII; +#ifdef _UCS2 + else if ( strcmp(enc,"UTF-8")==0 ) + encoding = UTF8; + else if ( strcmp(enc,"UCS-2LE")==0 ) + encoding = UCS2_LE; +#endif + else + _CLTHROWA(CL_ERR_IllegalArgument,"Unsupported encoding, use jstreams iconv based instead"); + + mark(262); + charsLeft = 0; +} +SimpleInputStreamReader::~SimpleInputStreamReader(){ + input = NULL; +} +int32_t SimpleInputStreamReader::decode(TCHAR* start, int32_t space){ + // decode from charbuf + const char *inbuf = charbuf.readPos; + const char *inbufend = charbuf.readPos + charbuf.avail; + TCHAR *outbuf = start; + const TCHAR *outbufend = outbuf + space; + + if ( encoding == ASCII ){ + while ( outbuf<outbufend && inbuf<inbufend ){ + *outbuf = *inbuf; + outbuf++; + inbuf++; + } + +#ifdef _UCS2 + } + else if ( encoding == UCS2_LE ){ + while ( outbuf<outbufend && (inbuf+1)<inbufend ){ + uint8_t c1 = *inbuf; + uint8_t c2 = *(inbuf+1); + unsigned short c = c1 | (c2<<8); + + #ifdef _UCS2 + *outbuf = c; + #else + *outbuf = LUCENE_OOR_CHAR(c); + #endif + outbuf++; + inbuf+=2; + } + + }else if ( encoding == UTF8 ){ + while ( outbuf<outbufend && inbuf<inbufend ){ + size_t utflen = lucene_utf8charlen(inbuf); + if ( utflen==0 ){ + error = "Invalid multibyte sequence."; + status = jstreams::Error; + return -1; + }else if ( inbuf+utflen > inbufend ){ + break; //character incomplete + }else{ + size_t rd = lucene_utf8towc(outbuf,inbuf,inbufend-inbuf); + if ( rd == 0 ){ + error = "Invalid multibyte sequence."; + status = jstreams::Error; + return -1; + }else{ + inbuf+=rd; + outbuf++; + } + } + } +#endif //_UCS2 + }else + _CLTHROWA(CL_ERR_Runtime,"Unexpected encoding"); + + if ( outbuf < outbufend ) { + //we had enough room to convert the entire input + if ( inbuf < inbufend ) { + // last character is incomplete + // move from inbuf to the end to the start of + // the buffer + memmove(charbuf.start, inbuf, inbufend-inbuf); + charbuf.readPos = charbuf.start; + charbuf.avail = inbufend-inbuf; + } else if ( outbuf < outbufend ) { //input sequence was completely converted + charbuf.readPos = charbuf.start; + charbuf.avail = 0; + if (input == NULL) { + finishedDecoding = true; + } + } + } else { + charbuf.readPos += charbuf.avail - (inbufend-inbuf); + charbuf.avail = inbufend-inbuf; + } + return outbuf-start; +} + +int32_t SimpleInputStreamReader::fillBuffer(TCHAR* start, int32_t space) { + // fill up charbuf + if (input && charbuf.readPos == charbuf.start) { + const char *begin; + int32_t numRead; + numRead = input->read(begin, 1, charbuf.size - charbuf.avail); + //printf("filled up charbuf\n"); + if (numRead < -1) { + error = input->getError(); + status = jstreams::Error; + input = 0; + return numRead; + } + if (numRead < 1) { + // signal end of input buffer + input = 0; + if (charbuf.avail) { + error = "stream ends on incomplete character"; + status = jstreams::Error; + } + return -1; + } + // copy data into other buffer + memmove( charbuf.start + charbuf.avail, begin, numRead * sizeof(char)); + charbuf.avail = numRead + charbuf.avail; + } + // decode + int32_t n = decode(start, space); + //printf("decoded %i\n", n); + return n; +} + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/util/Reader.h b/3rdparty/clucene/src/CLucene/util/Reader.h new file mode 100644 index 000000000..6b018b3aa --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/Reader.h @@ -0,0 +1,138 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_util_Reader_ +#define _lucene_util_Reader_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "streambase.h" +#include "stringreader.h" +#include "fileinputstream.h" +#include "bufferedstream.h" + +CL_NS_DEF(util) +/** +* An inline wrapper that reads from Jos van den Oever's jstreams +*/ +class Reader:LUCENE_BASE { +typedef jstreams::StreamBase<TCHAR> jsReader; +public: + bool deleteReader; + jsReader* reader; + + Reader(jsReader* reader, bool deleteReader){ + this->reader = reader; + this->deleteReader = deleteReader; + } + virtual ~Reader(){ + if ( deleteReader ) + delete reader; + reader = NULL; + } + inline int read(){ + const TCHAR*b; + int32_t nread = reader->read(b, 1,1); + if ( nread < -1 ) //if not eof + _CLTHROWA(CL_ERR_IO,reader->getError() ); + else if ( nread == -1 ) + return -1; + else + return b[0]; + } + /** + * Read at least 1 character, and as much as is conveniently available + */ + inline int32_t read(const TCHAR*& start){ + int32_t nread = reader->read(start,1,0); + if ( nread < -1 ) //if not eof + _CLTHROWA(CL_ERR_IO,reader->getError()); + else + return nread; + } + inline int32_t read(const TCHAR*& start, int32_t len){ + int32_t nread = reader->read(start, len, len); + if ( nread < -1 ) //if not eof + _CLTHROWA(CL_ERR_IO,reader->getError()); + else + return nread; + } + inline int64_t skip(int64_t ntoskip){ + int64_t skipped = reader->skip(ntoskip); + if ( skipped < 0 ) + _CLTHROWA(CL_ERR_IO,reader->getError()); + else + return skipped; + } + inline int64_t mark(int32_t readAheadlimit){ + int64_t pos = reader->mark(readAheadlimit); + if ( pos < 0 ) + _CLTHROWA(CL_ERR_IO,reader->getError()); + else + return pos; + } + int64_t reset(int64_t pos){ + int64_t r = reader->reset(pos); + if ( r < 0 ) + _CLTHROWA(CL_ERR_IO,reader->getError()); + else + return r; + } +}; + +///A helper class which constructs a the jstreams StringReader. +class StringReader: public Reader{ +public: + StringReader ( const TCHAR* value ); + StringReader ( const TCHAR* value, const int32_t length ); + StringReader ( const TCHAR* value, const int32_t length, bool copyData ); + ~StringReader(); +}; + +/** A very simple inputstreamreader implementation. For a +* more complete InputStreamReader, use the jstreams version +* located in the contrib package +*/ +class SimpleInputStreamReader: public jstreams::BufferedInputStream<TCHAR>{ + int32_t decode(TCHAR* start, int32_t space); + int encoding; + enum{ + ASCII=1, + UTF8=2, + UCS2_LE=3 + }; + bool finishedDecoding; + jstreams::StreamBase<char>* input; + int32_t charsLeft; + + jstreams::InputStreamBuffer<char> charbuf; + int32_t fillBuffer(TCHAR* start, int32_t space); +public: + SimpleInputStreamReader(jstreams::StreamBase<char> *i, const char* encoding); + ~SimpleInputStreamReader(); +}; + +/** +* A helper class which constructs a FileReader with a specified +* simple encodings, or a given inputstreamreader +*/ +class FileReader: public Reader{ + jstreams::FileInputStream* input; +public: + FileReader ( const char* path, const char* enc, + const int32_t cachelen = 13, + const int32_t cachebuff = 14 ); //todo: optimise these cache values + ~FileReader (); + + int32_t read(const TCHAR*& start, int32_t _min, int32_t _max); + int64_t mark(int32_t readlimit); + int64_t reset(int64_t); +}; + +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/util/StringBuffer.cpp b/3rdparty/clucene/src/CLucene/util/StringBuffer.cpp new file mode 100644 index 000000000..b5f1ca238 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/StringBuffer.cpp @@ -0,0 +1,335 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "StringBuffer.h" +#include "Misc.h" + +CL_NS_DEF(util) + + StringBuffer::StringBuffer(TCHAR* buf,int32_t maxlen, const bool consumeBuffer){ + buffer = buf; + bufferLength = maxlen; + bufferOwner = !consumeBuffer; + len = 0; + } + StringBuffer::StringBuffer(){ + //Func - Constructor. Allocates a buffer with the default length. + //Pre - true + //Post - buffer of length bufferLength has been allocated + + //Initialize + bufferLength = LUCENE_DEFAULT_TOKEN_BUFFER_SIZE; + len = 0; + //Allocate a buffer of length bufferLength + buffer = _CL_NEWARRAY(TCHAR,bufferLength); + bufferOwner = true; + } + + StringBuffer::StringBuffer(const int32_t initSize){ + //Func - Constructor. Allocates a buffer of length initSize + 1 + //Pre - initSize > 0 + //Post - A buffer has been allocated of length initSize + 1 + + //Initialize the bufferLength to initSize + 1 The +1 is for the terminator '\0' + bufferLength = initSize + 1; + len = 0; + //Allocate a buffer of length bufferLength + buffer = _CL_NEWARRAY(TCHAR,bufferLength); + bufferOwner = true; + } + + StringBuffer::StringBuffer(const TCHAR* value){ + //Func - Constructor. + // Creates an instance of Stringbuffer containing a copy of the string value + //Pre - value != NULL + //Post - An instance of StringBuffer has been created containing the copy of the string value + + //Initialize the length of the string to be stored in buffer + len = (int32_t) _tcslen(value); + + //Calculate the space occupied in buffer by a copy of value + const int32_t occupiedLength = len + 1; + + // Minimum allocated buffer length is LUCENE_DEFAULT_TOKEN_BUFFER_SIZE. + bufferLength = (occupiedLength >= LUCENE_DEFAULT_TOKEN_BUFFER_SIZE + ? occupiedLength : LUCENE_DEFAULT_TOKEN_BUFFER_SIZE); + + //Allocate a buffer of length bufferLength + buffer = _CL_NEWARRAY(TCHAR,bufferLength); + bufferOwner = true; + //Copy the string value into buffer + _tcsncpy(buffer, value, occupiedLength); + //Assert that the buffer has been terminated at the end of the string + CND_PRECONDITION (buffer[len] == '\0', "Buffer was not correctly terminated"); + } + + StringBuffer::~StringBuffer() { + // Func - Destructor + // Pre - true + // Post - Instanc has been destroyed + + if( bufferOwner ){ + _CLDELETE_CARRAY(buffer); + }else + buffer = NULL; + } + void StringBuffer::clear(){ + //Func - Clears the Stringbuffer and resets it to it default empty state + //Pre - true + //Post - pre(buffer) has been destroyed and a new one has been allocated + + //Destroy the current buffer if present + _CLDELETE_CARRAY(buffer); + + //Initialize + len = 0; + bufferLength = LUCENE_DEFAULT_TOKEN_BUFFER_SIZE; + //Allocate a buffer of length bufferLength + buffer = _CL_NEWARRAY(TCHAR,bufferLength); + } + + void StringBuffer::appendChar(const TCHAR character) { + //Func - Appends a single character + //Pre - true + //Post - The character has been appended to the string in the buffer + + //Check if the current buffer length is sufficient to have the string value appended + if (len + 1 > bufferLength){ + //Have the size of the current string buffer increased because it is too small + growBuffer(len + 1); + } + //Put character at position len which is the end of the string in the buffer + //Note that this action might overwrite the terminator of the string '\0', which + //is kind of tricky + buffer[len] = character; + //Increase the len by to represent the correct length of the string in the buffer + len++; + } + + void StringBuffer::append(const TCHAR* value) { + //Func - Appends a copy of the string value + //Pre - value != NULL + //Post - value has been copied and appended to the string in buffer + + append(value, _tcslen(value)); + } + void StringBuffer::append(const TCHAR* value, size_t appendedLength) { + //Func - Appends a copy of the string value + //Pre - value != NULL + // appendedLength contains the length of the string value which is to be appended + //Post - value has been copied and appended to the string in buffer + + //Check if the current buffer length is sufficient to have the string value appended + if (len + appendedLength + 1 > bufferLength){ + //Have the size of the current string buffer increased because it is too small + growBuffer(len + appendedLength + 1); + } + + //Copy the string value into the buffer at postion len + _tcsncpy(buffer + len, value, appendedLength); + + //Add the length of the copied string to len to reflect the new length of the string in + //the buffer (Note: len is not the bufferlength!) + len += appendedLength; + } + + void StringBuffer::appendInt(const int32_t value) { + //Func - Appends an integer (after conversion to a character string) + //Pre - true + //Post - The converted integer value has been appended to the string in buffer + + //instantiate a buffer of 30 charactes for the conversion of the integer + TCHAR buf[30]; + //Convert the integer value to a string buf using the radix 10 (duh) + _i64tot(value, buf, 10); + //Have the converted integer now stored in buf appended to the string in buffer + append(buf); + } + + void StringBuffer::appendFloat(const qreal value, const int32_t digits){ + //Func - Appends a qreal (after conversion to a character string) + //Pre - digits > 0. Indicates the minimum number of characters printed + //Post - The converted qreal value has been appended to the string in buffer + + //using sprintf("%f" was not reliable on other plaforms... we use a custom float convertor + //bvk: also, using sprintf and %f seems excessivelly slow + if(digits>8) + _CLTHROWA(CL_ERR_IllegalArgument,"Too many digits..."); + + //the maximum number of characters that int64 will hold is 23. so we need 23*2+2 + TCHAR buf[48]; //the buffer to hold + int64_t v = (int64_t)value; //the integer value of the float + _i64tot(v,buf,10); //add the whole number + + size_t len = 99-_tcslen(buf); //how many digits we have to work with? + size_t dig = len< (size_t)digits ? len : digits; + if ( dig > 0 ){ + _tcscat(buf,_T(".")); //add a decimal point + + int64_t remi=(int64_t)((value-v)*pow((qreal)10,(qreal)(dig+1))); //take the remainder and make a whole number + if ( remi<0 ) remi*=-1; + int64_t remadj=remi/10; + if ( remi-(remadj*10) >=5 ) + remadj++; //adjust remainder + + // add as many zeros as necessary between the decimal point and the + // significant part of the number. Fixes a bug when trying to print + // numbers that have zeros right after the decimal point + if (remadj) { + int32_t numZeros = dig - (int32_t)log10((qreal)remadj) - 1; + while(numZeros-- > 0) + _tcscat(buf,_T("0")); //add a zero before the decimal point + } + + _i64tot(remadj,buf+_tcslen(buf),10); //add the remainder + } + + append(buf); + } + + void StringBuffer::prepend(const TCHAR* value){ + //Func - Puts a copy of the string value infront of the current string in the StringBuffer + //Pre - value != NULL + //Post - The string in pre(buffer) has been shifted n positions where n equals the length of value. + // The string value was then copied to the beginning of stringbuffer + + prepend(value, _tcslen(value)); + } + + void StringBuffer::prepend(const TCHAR* value, const size_t prependedLength) { + //Func - Puts a copy of the string value in front of the string in the StringBuffer + //Pre - value != NULL + // prependedLength contains the length of the string value which is to be prepended + //Post - A copy of the string value is has been in front of the string in buffer + //todo: something is wrong with this code, i'm sure... it only grows (and therefore moves if the buffer is to small) + //Check if the current buffer length is sufficient to have the string value prepended + if (prependedLength + len + 1 > bufferLength){ + //Have the size of the current string buffer increased because it is too small + //Because prependedLength is passed as the second argument to growBuffer, + //growBuffer will have left the first prependedLength characters empty + //when it recopied buffer during reallocation. + growBuffer(prependedLength + len + 1, prependedLength); + } + + //Copy the string value into the buffer at postion 0 + _tcsncpy(buffer, value, prependedLength); + //Add the length of the copied string to len to reflect the new length of the string in + //the buffer (Note: len is not the bufferlength!) + len += prependedLength; + } + + int32_t StringBuffer::length() const{ + //Func - Returns the length of the string in the StringBuffer + //Pre - true + //Post - The length len of the string in the buffer has been returned + + return len; + } + TCHAR* StringBuffer::toString(){ + //Func - Returns a copy of the current string in the StringBuffer sized equal to the length of the string + // in the StringBuffer. + //Pre - true + //Post - The copied string has been returned + + //Instantiate a buffer equal to the length len + 1 + TCHAR* ret = _CL_NEWARRAY(TCHAR,len + 1); + if (ret){ + //Copy the string in buffer + _tcsncpy(ret, buffer, len); + //terminate the string + ret[len] = '\0'; + } + //return the the copy + return ret; + } + TCHAR* StringBuffer::getBuffer() { + //Func - '\0' terminates the buffer and returns its pointer + //Pre - true + //Post - buffer has been '\0' terminated and returned + + // Check if the current buffer is '\0' terminated + if (len == bufferLength){ + //Make space for terminator, if necessary. + growBuffer(len + 1); + } + //'\0' buffer so it can be returned properly + buffer[len] = '\0'; + + return buffer; + } + + void StringBuffer::reserve(const int32_t size){ + if ( bufferLength >= size ) + return; + bufferLength = size; + + //Allocate a new buffer of length bufferLength + TCHAR* tmp = _CL_NEWARRAY(TCHAR,bufferLength); + _tcsncpy(tmp, buffer, len); + tmp[len] = '\0'; + + //destroy the old buffer + if (buffer){ + _CLDELETE_CARRAY(buffer); + } + //Assign the new buffer tmp to buffer + buffer = tmp; + } + + void StringBuffer::growBuffer(const int32_t minLength) { + //Func - Has the buffer grown to a minimum length of minLength or bigger + //Pre - minLength >= len + 1 + //Post - The buffer has been grown to a minimum length of minLength or bigger + + growBuffer(minLength, 0); + } + void StringBuffer::growBuffer(const int32_t minLength, const int32_t skippingNInitialChars) { + //Func - Has the buffer grown to a minimum length of minLength or bigger and shifts the + // current string in buffer by skippingNInitialChars forward + //Pre - After growth, must have at least enough room for contents + terminator so + // minLength >= skippingNInitialChars + len + 1 + // skippingNInitialChars >= 0 + //Post - The buffer has been grown to a minimum length of minLength or bigger and + // if skippingNInitialChars > 0, the contents of the buffer has beeen shifted + // forward by skippingNInitialChars positions as the buffer is reallocated, + // leaving the first skippingNInitialChars uninitialized (presumably to be + // filled immediately thereafter by the caller). + + CND_PRECONDITION (skippingNInitialChars >= 0, "skippingNInitialChars is less than zero"); + CND_PRECONDITION (minLength >= skippingNInitialChars + len + 1,"skippingNInitialChars is not large enough"); + + //More aggressive growth strategy to offset smaller default buffer size: + if ( !bufferOwner ){ + if ( bufferLength<minLength ) + _CLTHROWA(CL_ERR_IllegalArgument,"[StringBuffer::grow] un-owned buffer could not be grown"); + return; + } + + bufferLength *= 2; + //Check that bufferLength is bigger than minLength + if (bufferLength < minLength){ + //Have bufferLength become minLength because it still was too small + bufferLength = minLength; + } + + //Allocate a new buffer of length bufferLength + TCHAR* tmp = _CL_NEWARRAY(TCHAR,bufferLength); + //The old buffer might not have been null-terminated, so we _tcsncpy + //only len bytes, not len+1 bytes (the latter might read one char off the + //end of the old buffer), then apply the terminator to the new buffer. + _tcsncpy(tmp + skippingNInitialChars, buffer, len); + tmp[skippingNInitialChars + len] = '\0'; + + //destroy the old buffer + if (buffer){ + _CLDELETE_CARRAY(buffer); + } + //Assign the new buffer tmp to buffer + buffer = tmp; + } + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/util/StringBuffer.h b/3rdparty/clucene/src/CLucene/util/StringBuffer.h new file mode 100644 index 000000000..505b57594 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/StringBuffer.h @@ -0,0 +1,77 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_util_StringBuffer_ +#define _lucene_util_StringBuffer_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + + +CL_NS_DEF(util) + class StringBuffer:LUCENE_BASE{ + public: + ///Constructor. Allocates a buffer with the default length. + StringBuffer(); + ///Constructor. Allocates a buffer of length initSize + 1 + StringBuffer(const int32_t initSize); + ///Constructor. Creates an instance of Stringbuffer containing a copy of + ///the string value + StringBuffer(const TCHAR* value); + ///Constructs a StringBuffer using another buffer. The StringBuffer can + ///the be used to easily manipulate the buffer. + StringBuffer(TCHAR* buf,int32_t maxlen, const bool consumeBuffer); + ///Destructor + ~StringBuffer(); + ///Clears the Stringbuffer and resets it to it default empty state + void clear(); + + ///Appends a single character + void appendChar(const TCHAR chr); + ///Appends a copy of the string value + void append(const TCHAR* value); + ///Appends a copy of the string value + void append(const TCHAR* value, size_t appendedLength); + ///Appends an integer (after conversion to a character string) + void appendInt(const int32_t value); + ///Appends a qreal (after conversion to a character string) + void appendFloat(const qreal value, const int32_t digits); + ///Puts a copy of the string value in front of the current string in the StringBuffer + void prepend(const TCHAR* value); + ///Puts a copy of the string value in front of the current string in the StringBuffer + void prepend(const TCHAR* value, size_t prependedLength); + + ///Contains the length of string in the StringBuffer + ///Public so that analyzers can edit the length directly + int32_t len; + ///Returns the length of the string in the StringBuffer + int32_t length() const; + ///Returns a copy of the current string in the StringBuffer + TCHAR* toString(); + ///Returns a null terminated reference to the StringBuffer's text + TCHAR* getBuffer(); + + + ///reserve a minimum amount of data for the buffer. + ///no change made if the buffer is already longer than length + void reserve(const int32_t length); + private: + ///A buffer that contains strings + TCHAR* buffer; + ///The length of the buffer + int32_t bufferLength; + bool bufferOwner; + + ///Has the buffer grown to a minimum length of minLength or bigger + void growBuffer(const int32_t minLength); + ///Has the buffer grown to a minimum length of minLength or bigger and shifts the + ///current string in buffer by skippingNInitialChars forward + void growBuffer(const int32_t minLength, const int32_t skippingNInitialChars); + + }; +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/util/StringIntern.cpp b/3rdparty/clucene/src/CLucene/util/StringIntern.cpp new file mode 100644 index 000000000..cb7a889d1 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/StringIntern.cpp @@ -0,0 +1,158 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "StringIntern.h" +CL_NS_DEF(util) + + __wcsintrntype::iterator wblank; + bool blanksinitd=false; + __wcsintrntype CLStringIntern::stringPool(true); + __strintrntype CLStringIntern::stringaPool(true); + DEFINE_MUTEX(CLStringIntern::THIS_LOCK) + + void CLStringIntern::shutdown(){ + #ifdef _DEBUG + SCOPED_LOCK_MUTEX(THIS_LOCK) + if ( stringaPool.size() > 0 ){ + printf("ERROR: stringaPool still contains intern'd strings (refcounts):\n"); + __strintrntype::iterator itr = stringaPool.begin(); + while ( itr != stringaPool.end() ){ + printf(" %s (%d)\n",(itr->first), (itr->second)); + ++itr; + } + } + + if ( stringPool.size() > 0 ){ + printf("ERROR: stringPool still contains intern'd strings (refcounts):\n"); + __wcsintrntype::iterator itr = stringPool.begin(); + while ( itr != stringPool.end() ){ + _tprintf(_T(" %s (%d)\n"),(itr->first), (itr->second)); + ++itr; + } + } + #endif + } + + const TCHAR* CLStringIntern::intern(const TCHAR* str CL_FILELINEPARAM){ + if ( str == NULL ) + return NULL; + if ( str[0] == 0 ) + return LUCENE_BLANK_STRING; + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + __wcsintrntype::iterator itr = stringPool.find(str); + if ( itr==stringPool.end() ){ +#ifdef _UCS2 + TCHAR* ret = lucenewcsdup(str CL_FILELINEREF); +#else + TCHAR* ret = lucenestrdup(str CL_FILELINEREF); +#endif + stringPool[ret]= 1; + return ret; + }else{ + (itr->second)++; + return itr->first; + } + } + + bool CLStringIntern::unintern(const TCHAR* str){ + if ( str == NULL ) + return false; + if ( str[0] == 0 ) + return false; + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + __wcsintrntype::iterator itr = stringPool.find(str); + if ( itr != stringPool.end() ){ + if ( (itr->second) == 1 ){ + stringPool.removeitr(itr); + return true; + }else + (itr->second)--; + } + return false; + } + + const char* CLStringIntern::internA(const char* str CL_FILELINEPARAM){ + if ( str == NULL ) + return NULL; + if ( str[0] == 0 ) + return _LUCENE_BLANK_ASTRING; + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + __strintrntype::iterator itr = stringaPool.find(str); + if ( itr==stringaPool.end() ){ + char* ret = lucenestrdup(str CL_FILELINE); + stringaPool[ret] = 1; + return ret; + }else{ + (itr->second)++; + return itr->first; + } + } + + bool CLStringIntern::uninternA(const char* str){ + if ( str == NULL ) + return false; + if ( str[0] == 0 ) + return false; + + SCOPED_LOCK_MUTEX(THIS_LOCK) + + __strintrntype::iterator itr = stringaPool.find(str); + if ( itr!=stringaPool.end() ){ + if ( (itr->second) == 1 ){ + stringaPool.removeitr(itr); + return true; + }else + (itr->second)--; + } + return false; + } + + /* removed because of multi-threading problems... + __wcsintrntype::iterator CLStringIntern::internitr(const TCHAR* str CL_FILELINEPARAM){ + if ( str[0] == 0 ){ + if ( !blanksinitd ){ + CLStringIntern::stringPool.put(LUCENE_BLANK_STRING,1); + wblank=stringPool.find(str); + blanksinitd=true; + } + return wblank; + } + __wcsintrntype::iterator itr = stringPool.find(str); + if (itr==stringPool.end()){ +#ifdef _UCS2 + TCHAR* ret = lucenewcsdup(str CL_FILELINEREF); +#else + TCHAR* ret = lucenestrdup(str CL_FILELINEREF); +#endif + stringPool.put(ret,1); + return stringPool.find(str); + }else{ + (itr->second)++; + return itr; + } + } + bool CLStringIntern::uninternitr(__wcsintrntype::iterator itr){ + if ( itr!=stringPool.end() ){ + if ( itr==wblank ) + return false; + if ( (itr->second) == 1 ){ + stringPool.removeitr(itr); + return true; + }else + (itr->second)--; + } + return false; + } +*/ + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/util/StringIntern.h b/3rdparty/clucene/src/CLucene/util/StringIntern.h new file mode 100644 index 000000000..ded060c64 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/StringIntern.h @@ -0,0 +1,61 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_util_StringIntern_H +#define _lucene_util_StringIntern_H + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "VoidMap.h" +CL_NS_DEF(util) +typedef CL_NS(util)::CLHashMap<const TCHAR*,int,CL_NS(util)::Compare::TChar,CL_NS(util)::Equals::TChar,CL_NS(util)::Deletor::tcArray, CL_NS(util)::Deletor::DummyInt32 > __wcsintrntype; +typedef CL_NS(util)::CLHashMap<const char*,int,CL_NS(util)::Compare::Char,CL_NS(util)::Equals::Char,CL_NS(util)::Deletor::acArray, CL_NS(util)::Deletor::DummyInt32 > __strintrntype; + + /** Functions for intern'ing strings. This + * is a process of pooling strings thus using less memory, + * and furthermore allows intern'd strings to be directly + * compared: + * string1==string2, rather than _tcscmp(string1,string2) + */ + class CLStringIntern{ + static __wcsintrntype stringPool; + static __strintrntype stringaPool; + STATIC_DEFINE_MUTEX(THIS_LOCK) + public: + /** + * Internalise the specified string. + * \return Returns a pointer to the internalised string + */ + static const char* internA(const char* str CL_FILELINEPARAM); + /** + * Uninternalise the specified string. Decreases + * the reference count and frees the string if + * reference count is zero + * \returns true if string was destroyed, otherwise false + */ + static bool uninternA(const char* str); + + /** + * Internalise the specified string. + * \return Returns a pointer to the internalised string + */ + static const TCHAR* intern(const TCHAR* str CL_FILELINEPARAM); + + /** + * Uninternalise the specified string. Decreases + * the reference count and frees the string if + * reference count is zero + * \returns true if string was destroyed, otherwise false + */ + static bool unintern(const TCHAR* str); + + static void shutdown(); + }; + +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/util/ThreadLocal.cpp b/3rdparty/clucene/src/CLucene/util/ThreadLocal.cpp new file mode 100644 index 000000000..a54c86916 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/ThreadLocal.cpp @@ -0,0 +1,55 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "CLucene/LuceneThreads.h" +#include "ThreadLocal.h" + +CL_NS_DEF(util) + +DEFINE_MUTEX(ThreadLocalBase::ThreadLocalBase_THIS_LOCK) + +ThreadLocalBase::ShutdownHooksType ThreadLocalBase::shutdownHooks(false); +ThreadLocalBase::ThreadLocalsType ThreadLocalBase::threadLocals(false,false); + +ThreadLocalBase::ThreadLocalBase(){ +} +ThreadLocalBase::~ThreadLocalBase(){ +} + +void ThreadLocalBase::UnregisterCurrentThread(){ + _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; + SCOPED_LOCK_MUTEX(ThreadLocalBase_THIS_LOCK) + + ThreadLocalsType::iterator itr = threadLocals.lower_bound(id); + ThreadLocalsType::iterator end = threadLocals.upper_bound(id); + while ( itr != end ){ + itr->second->setNull(); + ++itr; + } +} +void ThreadLocalBase::shutdown(){ + SCOPED_LOCK_MUTEX(ThreadLocalBase_THIS_LOCK) + + ThreadLocalsType::iterator itr = threadLocals.begin(); + while ( itr != threadLocals.end() ){ + itr->second->setNull(); + ++itr; + } + + ShutdownHooksType::iterator itr2 = shutdownHooks.begin(); + while ( itr2 != shutdownHooks.end() ){ + ShutdownHook* hook = *itr2; + hook(false); + } +} +void ThreadLocalBase::registerShutdownHook(ShutdownHook* hook){ + SCOPED_LOCK_MUTEX(ThreadLocalBase_THIS_LOCK) + shutdownHooks.insert(hook); +} + + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/util/ThreadLocal.h b/3rdparty/clucene/src/CLucene/util/ThreadLocal.h new file mode 100644 index 000000000..f67c76ca9 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/ThreadLocal.h @@ -0,0 +1,143 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +* +* Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +------------------------------------------------------------------------------*/ +#ifndef _lucene_util_ThreadLocal_H +#define _lucene_util_ThreadLocal_H + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "CLucene/util/VoidMap.h" + +CL_NS_DEF(util) + +class ThreadLocalBase: LUCENE_BASE{ +public: + /** + * A hook called when CLucene is starting or shutting down, + * this can be used for setting up and tearing down static + * variables + */ + typedef void ShutdownHook(bool startup); + +protected: + STATIC_DEFINE_MUTEX(ThreadLocalBase_THIS_LOCK) + typedef CL_NS(util)::CLMultiMap<_LUCENE_THREADID_TYPE, ThreadLocalBase*, + CL_NS(util)::CLuceneThreadIdCompare, + CL_NS(util)::Deletor::ConstNullVal<_LUCENE_THREADID_TYPE>, + CL_NS(util)::Deletor::ConstNullVal<ThreadLocalBase*> > ThreadLocalsType; + static ThreadLocalsType threadLocals; + //todo: using http://en.wikipedia.org/wiki/Thread-local_storage#Pthreads_implementation + //would work better... but lots of testing would be needed first... + typedef CL_NS(util)::CLSetList<ShutdownHook*, + CL_NS(util)::Compare::Void<ShutdownHook>, + CL_NS(util)::Deletor::ConstNullVal<ShutdownHook*> > ShutdownHooksType; + static ShutdownHooksType shutdownHooks; + + ThreadLocalBase(); +public: + virtual ~ThreadLocalBase(); + + /** + * Call this function to clear the local thread data for this + * ThreadLocal. Calling set(NULL) does the same thing, except + * this function is virtual and can be called without knowing + * the template. + */ + virtual void setNull() = 0; + + /** + * If you want to clean up thread specific memory, then you should + * make sure this thread is called when the thread is not going to be used + * again. This will clean up threadlocal data which can contain quite a lot + * of data, so if you are creating lots of new threads, then it is a good idea + * to use this function, otherwise there will be many memory leaks. + */ + static void UnregisterCurrentThread(); + + /** + * Call this function to shutdown CLucene + */ + static void shutdown(); + + /** + * Add this function to the shutdown hook list. This function will be called + * when CLucene is shutdown. + */ + static void registerShutdownHook(ShutdownHook* hook); +}; + +template<typename T,typename _deletor> +class ThreadLocal: public ThreadLocalBase{ + typedef CL_NS(util)::CLSet<_LUCENE_THREADID_TYPE, T, + CL_NS(util)::CLuceneThreadIdCompare, + CL_NS(util)::Deletor::ConstNullVal<_LUCENE_THREADID_TYPE>, + _deletor > LocalsType; + LocalsType locals; + DEFINE_MUTEX(locals_LOCK) +public: + ThreadLocal(); + ~ThreadLocal(); + T get(); + void setNull(); + void set(T t); +}; + +template<typename T,typename _deletor> +ThreadLocal<T,_deletor>::ThreadLocal(): + locals(false,true) +{ + //add this object to the base's list of threadlocals to be + //notified in case of UnregisterThread() + _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; + SCOPED_LOCK_MUTEX(ThreadLocalBase_THIS_LOCK) + threadLocals.insert( CL_NS_STD(pair)<const _LUCENE_THREADID_TYPE, ThreadLocalBase*>(id, this) ); +} + +template<typename T,typename _deletor> +ThreadLocal<T,_deletor>::~ThreadLocal(){ + //remove this object to the base's list of threadlocals + _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; + SCOPED_LOCK_MUTEX(ThreadLocalBase_THIS_LOCK) + + //remove all the thread local data for this object + locals.clear(); + + //remove this object from the ThreadLocalBase threadLocal list + ThreadLocalsType::iterator itr = threadLocals.lower_bound(id); + ThreadLocalsType::iterator end = threadLocals.upper_bound(id); + while ( itr != end ){ + if ( itr->second == this){ + threadLocals.erase(itr); + break; + } + ++itr; + } +} + +template<typename T,typename _deletor> +T ThreadLocal<T,_deletor>::get(){ + return locals.get(_LUCENE_CURRTHREADID); +} + +template<typename T,typename _deletor> +void ThreadLocal<T,_deletor>::setNull(){ + set(NULL); +} + +template<typename T,typename _deletor> +void ThreadLocal<T,_deletor>::set(T t){ + _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; + locals.remove(id); + if ( t != NULL ) + locals.insert( CL_NS_STD(pair)<const _LUCENE_THREADID_TYPE,T>(id, t) ); +} + +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/util/VoidList.h b/3rdparty/clucene/src/CLucene/util/VoidList.h new file mode 100644 index 000000000..cd6908876 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/VoidList.h @@ -0,0 +1,175 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_util_VoidList_ +#define _lucene_util_VoidList_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "Equators.h" + +CL_NS_DEF(util) + +/** +* A template to encapsulate various list type classes +* @internal +*/ +template<typename _kt,typename _base,typename _valueDeletor> +class __CLList:public _base,LUCENE_BASE { +private: + bool dv; + typedef _base base; +public: + DEFINE_MUTEX(THIS_LOCK) + + typedef typename _base::const_iterator const_iterator; + typedef typename _base::iterator iterator; + + virtual ~__CLList(){ + clear(); + } + + __CLList ( const bool deleteValue ): + dv(deleteValue) + { + } + + void setDoDelete(bool val){ dv=val; } + + //sets array to the contents of this array. + //array must be size+1, otherwise memory may be overwritten + void toArray(_kt* into) const{ + int i=0; + for ( const_iterator itr=base::begin();itr!=base::end();itr++ ){ + into[i] = *itr; + i++; + } + into[i] = NULL; + } + + void set(int32_t i, _kt val) { + if ( dv ) + _valueDeletor::doDelete((*this)[i]); + (*this)[i] = val; + } + + //todo: check this + void delete_back(){ + if ( base::size() > 0 ){ + iterator itr = base::end(); + if ( itr != base::begin()) + itr --; + _kt key = *itr; + base::erase(itr); + if ( dv ) + _valueDeletor::doDelete(key); + } + } + + void delete_front(){ + if ( base::size() > 0 ){ + iterator itr = base::begin(); + _kt key = *itr; + base::erase(itr); + if ( dv ) + _valueDeletor::doDelete(key); + } + } + + void clear(){ + if ( dv ){ + iterator itr = base::begin(); + while ( itr != base::end() ){ + _valueDeletor::doDelete(*itr); + ++itr; + } + } + base::clear(); + } + + void remove(int32_t i, bool dontDelete=false){ + iterator itr=base::begin(); + itr+=i; + _kt key = *itr; + base::erase( itr ); + if ( dv && !dontDelete ) + _valueDeletor::doDelete(key); + } + void remove(iterator itr, bool dontDelete=false){ + _kt key = *itr; + base::erase( itr ); + if ( dv && !dontDelete ) + _valueDeletor::doDelete(key); + } + +}; + +//growable arrays of Objects (like a collection or list) +//a list, so can contain duplicates +//it grows in chunks... todo: check jlucene for initial size of array, and growfactors +template<typename _kt, typename _valueDeletor=CL_NS(util)::Deletor::Dummy> +class CLVector:public __CLList<_kt, CL_NS_STD(vector)<_kt> , _valueDeletor> +{ +public: + CLVector ( const bool deleteValue=true ): + __CLList<_kt, CL_NS_STD(vector)<_kt> , _valueDeletor>(deleteValue) + { + } +}; + +//An array-backed implementation of the List interface +//a list, so can contain duplicates +//*** a very simple list - use <valarray> +//(This class is roughly equivalent to Vector, except that it is unsynchronized.) +#define CLArrayList CLVector +#define CLHashSet CLHashList + +//implementation of the List interface, provides access to the first and last list elements in O(1) +//no comparator is required... and so can contain duplicates +//a simple list with no comparator +//*** a very simple list - use <list> +#ifdef LUCENE_DISABLE_HASHING + #define CLHashList CLSetList +#else + +template<typename _kt, + typename _Comparator=CL_NS(util)::Compare::TChar, + typename _valueDeletor=CL_NS(util)::Deletor::Dummy> +class CLHashList:public __CLList<_kt, CL_NS_HASHING(hash_set)<_kt,_Comparator> , _valueDeletor> +{ +public: + CLHashList ( const bool deleteValue=true ): + __CLList<_kt, CL_NS_HASHING(hash_set)<_kt,_Comparator> , _valueDeletor>(deleteValue) + { + } +}; +#endif + +template<typename _kt, typename _valueDeletor=CL_NS(util)::Deletor::Dummy> +class CLLinkedList:public __CLList<_kt, CL_NS_STD(list)<_kt> , _valueDeletor> +{ +public: + CLLinkedList ( const bool deleteValue=true ): + __CLList<_kt, CL_NS_STD(list)<_kt> , _valueDeletor>(deleteValue) + { + } +}; +template<typename _kt, + typename _Comparator=CL_NS(util)::Compare::TChar, + typename _valueDeletor=CL_NS(util)::Deletor::Dummy> +class CLSetList:public __CLList<_kt, CL_NS_STD(set)<_kt,_Comparator> , _valueDeletor> +{ +public: + CLSetList ( const bool deleteValue=true ): + __CLList<_kt, CL_NS_STD(set)<_kt,_Comparator> , _valueDeletor>(deleteValue) + { + } +}; + +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/util/VoidMap.h b/3rdparty/clucene/src/CLucene/util/VoidMap.h new file mode 100644 index 000000000..b22b507e9 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/VoidMap.h @@ -0,0 +1,270 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_util_VoidMap_ +#define _lucene_util_VoidMap_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + + +CL_NS_DEF(util) + +/** +* A template to encapsulate various map type classes +* @internal +*/ +template<typename _kt, typename _vt, + typename _base, + typename _KeyDeletor=CL_NS(util)::Deletor::Dummy, + typename _ValueDeletor=CL_NS(util)::Deletor::Dummy> +class __CLMap:public _base,LUCENE_BASE { +private: + bool dk; + bool dv; + typedef _base base; +public: + DEFINE_MUTEX(THIS_LOCK) + + typedef typename _base::iterator iterator; + typedef typename _base::const_iterator const_iterator; + typedef CL_NS_STD(pair)<_kt, _vt> _pair; + + ///Default constructor for the __CLMap + __CLMap (): + dk(true), + dv(true) + { + } + + ///Deconstructor for the __CLMap + ~__CLMap (){ + clear(); + } + + void setDeleteKey(bool val){ dk = val; } + void setDeleteValue(bool val){ dv = val; } + + ///Construct the VoidMap and set the deleteTypes to the specified values + ///\param deleteKey if true then the key variable is deleted when an object is deleted + ///\param keyDelType delete the key variable using the specified type + ///\param deleteValue if true then the value variable is deleted when an object is deleted + ///\param valueDelType delete the value variable using the specified type + /*__CLMap ( const bool deleteKey, const bool deleteValue ): + dk(deleteKey), + dv(deleteValue) + { + }*/ + + ///checks to see if the specified key exists + ///\param k the key to check for + ///\returns true if the key exists + bool exists(_kt k)const{ + const_iterator itr = base::find(k); + bool ret = itr!=base::end(); + return ret; + } + + ///put the specified pair into the map. remove any old items first + ///\param k the key + ///\param v the value + void put(_kt k,_vt v){ + //todo: check if this is always right! + //must should look through code, for + //cases where map is not unique!!! + if ( dk || dv ) + remove(k); + + //todo: replacing the old item might be quicker... + + base::insert(_pair(k,v)); + } + + + ///using a non-const key, get a non-const value + _vt get( _kt k) const { + const_iterator itr = base::find(k); + if ( itr==base::end() ) + return _vt(); + else + return itr->second; + } + ///using a non-const key, get the actual key + _kt getKey( _kt k) const { + const_iterator itr = base::find(k); + if ( itr==base::end() ) + return _kt(); + else + return itr->first; + } + + void removeitr (iterator itr, const bool dontDeleteKey = false, const bool dontDeleteValue = false){ + //delete key&val first. This prevents potential loops (deleting object removes itself) + _kt key = itr->first; + _vt val = itr->second; + base::erase(itr); + + //keys & vals need to be deleted after erase, because the hashvalue is still needed + if ( dk && !dontDeleteKey ) + _KeyDeletor::doDelete(key); + if ( dv && !dontDeleteValue ) + _ValueDeletor::doDelete(val); + } + ///delete and optionally delete the specified key and associated value + void remove(_kt key, const bool dontDeleteKey = false, const bool dontDeleteValue = false){ + iterator itr = base::find(key); + if ( itr!=base::end() ) + removeitr(itr,dontDeleteKey,dontDeleteValue); + } + + ///clear all keys and values in the map + void clear(){ + if ( dk || dv ){ + iterator itr = base::begin(); + while ( itr!=base::end() ){ + #ifdef _CL_HAVE_EXT_HASH_MAP + removeitr(itr); + itr = base::begin(); + + #else + if ( dk ) + _KeyDeletor::doDelete(itr->first); + if ( dv ) + _ValueDeletor::doDelete(itr->second); + ++itr; + + #endif + } + } + base::clear(); + } +}; + +// makes no guarantees as to the order of the map +// cannot contain duplicate keys; each key can map to at most one value +#define CLHashtable CLHashMap + +#if defined(_CL_HAVE_GOOGLE_DENSE_HASH_MAP) +//do nothing +#elif defined(LUCENE_DISABLE_HASHING) + + //a CLSet with CLHashMap traits +template<typename _kt, typename _vt, + typename _Compare, + typename _EqualDummy, + typename _KeyDeletor=CL_NS(util)::Deletor::Dummy, + typename _ValueDeletor=CL_NS(util)::Deletor::Dummy> +class CLHashMap:public __CLMap<_kt,_vt, + CL_NS_STD(map)<_kt,_vt, _Compare>, + _KeyDeletor,_ValueDeletor> +{ + typedef typename CL_NS_STD(map)<_kt,_vt,_Compare> _base; + typedef __CLMap<_kt, _vt, CL_NS_STD(map)<_kt,_vt, _Compare>, + _KeyDeletor,_ValueDeletor> _this; +public: + CLHashMap ( const bool deleteKey=false, const bool deleteValue=false ) + { + _this::setDeleteKey(deleteKey); + _this::setDeleteValue(deleteValue); + } +}; +#elif defined(_CL_HAVE_EXT_HASH_MAP) + //ext/hash_map syntax +//HashMap class is roughly equivalent to Hashtable, except that it is unsynchronized +template<typename _kt, typename _vt, + typename _Hasher, + typename _Equals, + typename _KeyDeletor=CL_NS(util)::Deletor::Dummy, + typename _ValueDeletor=CL_NS(util)::Deletor::Dummy> +class CLHashMap:public __CLMap<_kt,_vt, + CL_NS_HASHING(hash_map)<_kt,_vt, _Hasher,_Equals>, + _KeyDeletor,_ValueDeletor> +{ + typedef __CLMap<_kt,_vt, CL_NS_HASHING(hash_map)<_kt,_vt, _Hasher,_Equals>, + _KeyDeletor,_ValueDeletor> _this; +public: + CLHashMap ( const bool deleteKey=false, const bool deleteValue=false ) + { + _this::setDeleteKey(deleteKey); + _this::setDeleteValue(deleteValue); + } +}; + +#else +//HashMap class is roughly equivalent to Hashtable, except that it is unsynchronized +template<typename _kt, typename _vt, + typename _Hasher, + typename _Equals, + typename _KeyDeletor=CL_NS(util)::Deletor::Dummy, + typename _ValueDeletor=CL_NS(util)::Deletor::Dummy> +class CLHashMap:public __CLMap<_kt,_vt, + CL_NS_HASHING(hash_map)<_kt,_vt, _Hasher>, + _KeyDeletor,_ValueDeletor> +{ + typedef __CLMap<_kt,_vt, CL_NS_HASHING(hash_map)<_kt,_vt, _Hasher>, + _KeyDeletor,_ValueDeletor> _this; +public: + CLHashMap ( const bool deleteKey=false, const bool deleteValue=false ) + { + _this::setDeleteKey(deleteKey); + _this::setDeleteValue(deleteValue); + } +}; +#endif + +//A collection that contains no duplicates +//does not guarantee that the order will remain constant over time +template<typename _kt, typename _vt, + typename _Compare, + typename _KeyDeletor=CL_NS(util)::Deletor::Dummy, + typename _ValueDeletor=CL_NS(util)::Deletor::Dummy> +class CLSet:public __CLMap<_kt,_vt, + CL_NS_STD(map)<_kt,_vt, _Compare>, + _KeyDeletor,_ValueDeletor> +{ + typedef typename CL_NS_STD(map)<_kt,_vt,_Compare> _base; + typedef __CLMap<_kt, _vt, CL_NS_STD(map)<_kt,_vt, _Compare>, + _KeyDeletor,_ValueDeletor> _this; +public: + CLSet ( const bool deleteKey=false, const bool deleteValue=false ) + { + _this::setDeleteKey(deleteKey); + _this::setDeleteValue(deleteValue); + } +}; + + +//A collection that can contains duplicates +template<typename _kt, typename _vt, + typename _Compare, + typename _KeyDeletor=CL_NS(util)::Deletor::Dummy, + typename _ValueDeletor=CL_NS(util)::Deletor::Dummy> +class CLMultiMap:public __CLMap<_kt,_vt, + CL_NS_STD(multimap)<_kt,_vt>, + _KeyDeletor,_ValueDeletor> +{ + typedef typename CL_NS_STD(multimap)<_kt,_vt> _base; + typedef __CLMap<_kt, _vt, CL_NS_STD(multimap)<_kt,_vt>, + _KeyDeletor,_ValueDeletor> _this; +public: + CLMultiMap ( const bool deleteKey=false, const bool deleteValue=false ) + { + _this::setDeleteKey(deleteKey); + _this::setDeleteValue(deleteValue); + } +}; + + +//*** need to create a class that allows duplicates - use <set> +//#define CLSet __CLMap +CL_NS_END + +#ifdef _CL_HAVE_GOOGLE_DENSE_HASH_MAP +#include "GoogleSparseMap.h" +#endif + +#endif diff --git a/3rdparty/clucene/src/CLucene/util/bufferedstream.h b/3rdparty/clucene/src/CLucene/util/bufferedstream.h new file mode 100644 index 000000000..d905955b1 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/bufferedstream.h @@ -0,0 +1,157 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Jos van den Oever +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +/* This file is part of Strigi Desktop Search + * + * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#ifndef BUFFEREDSTREAM_H +#define BUFFEREDSTREAM_H + +#include "streambase.h" +#include "inputstreambuffer.h" + +#include <cassert> +#include <stdio.h> + +namespace jstreams { + +template <class T> +class BufferedInputStream : public StreamBase<T> { +private: + bool finishedWritingToBuffer; + InputStreamBuffer<T> buffer; + + void writeToBuffer(int32_t minsize); + int32_t read_(const T*& start, int32_t min, int32_t max); +protected: + /** + * This function must be implemented by the subclasses. + * It should write a maximum of @p space characters at the buffer + * position pointed to by @p start. If no more data is available due to + * end of file, -1 should be returned. If an error occurs, the status + * should be set to Error, an error message should be set and the function + * must return -1. + **/ + virtual int32_t fillBuffer(T* start, int32_t space) = 0; + // this function might be useful if you want to reuse a bufferedstream + void resetBuffer() {printf("implement 'resetBuffer'\n");} + BufferedInputStream<T>(); +public: + int32_t read(const T*& start, int32_t min, int32_t max); + int64_t reset(int64_t); + virtual int64_t skip(int64_t ntoskip); +}; + +template <class T> +BufferedInputStream<T>::BufferedInputStream() { + finishedWritingToBuffer = false; +} + +template <class T> +void +BufferedInputStream<T>::writeToBuffer(int32_t ntoread) { + int32_t missing = ntoread - buffer.avail; + int32_t nwritten = 0; + while (missing > 0 && nwritten >= 0) { + int32_t space; + space = buffer.makeSpace(missing); + T* start = buffer.readPos + buffer.avail; + nwritten = fillBuffer(start, space); + assert(StreamBase<T>::status != Eof); + if (nwritten > 0) { + buffer.avail += nwritten; + missing = ntoread - buffer.avail; + } + } + if (nwritten < 0) { + finishedWritingToBuffer = true; + } +} +template <class T> +int32_t +BufferedInputStream<T>::read(const T*& start, int32_t min, int32_t max) { + if (StreamBase<T>::status == Error) return -2; + if (StreamBase<T>::status == Eof) return -1; + + // do we need to read data into the buffer? + if (!finishedWritingToBuffer && min > buffer.avail) { + // do we have enough space in the buffer? + writeToBuffer(min); + if (StreamBase<T>::status == Error) return -2; + } + + int32_t nread = buffer.read(start, max); + + BufferedInputStream<T>::position += nread; + if (BufferedInputStream<T>::position > BufferedInputStream<T>::size + && BufferedInputStream<T>::size > 0) { + // error: we read more than was specified in size + // this is an error because all dependent code might have been labouring + // under a misapprehension + BufferedInputStream<T>::status = Error; + BufferedInputStream<T>::error = "Stream is longer than specified."; + nread = -2; + } else if (BufferedInputStream<T>::status == Ok && buffer.avail == 0 + && finishedWritingToBuffer) { + BufferedInputStream<T>::status = Eof; + if (BufferedInputStream<T>::size == -1) { + BufferedInputStream<T>::size = BufferedInputStream<T>::position; + } + // save one call to read() by already returning -1 if no data is there + if (nread == 0) nread = -1; + } + return nread; +} +template <class T> +int64_t +BufferedInputStream<T>::reset(int64_t newpos) { + if (StreamBase<T>::status == Error) return -2; + // check to see if we have this position + int64_t d = BufferedInputStream<T>::position - newpos; + if (buffer.readPos - d >= buffer.start && -d < buffer.avail) { + BufferedInputStream<T>::position -= d; + buffer.avail += (int32_t)d; + buffer.readPos -= d; + StreamBase<T>::status = Ok; + } + return StreamBase<T>::position; +} +template <class T> +int64_t +BufferedInputStream<T>::skip(int64_t ntoskip) { + const T *begin; + int32_t nread; + int64_t skipped = 0; + while (ntoskip) { + int32_t step = (int32_t)((ntoskip > buffer.size) ?buffer.size :ntoskip); + nread = read(begin, 1, step); + if (nread <= 0) { + return skipped; + } + ntoskip -= nread; + skipped += nread; + } + return skipped; +} +} + +#endif diff --git a/3rdparty/clucene/src/CLucene/util/dirent.cpp b/3rdparty/clucene/src/CLucene/util/dirent.cpp new file mode 100644 index 000000000..3c5c54200 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/dirent.cpp @@ -0,0 +1,221 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Matt J. Weinstein +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" + +#if !defined(_CL_HAVE_DIRENT_H) && !defined(_CL_HAVE_SYS_NDIR_H) && !defined(_CL_HAVE_SYS_DIR_H) && !defined(_CL_HAVE_NDIR_H) +#include "dirent.h" + +DIR * +opendir (const char *szPath) +{ + DIR *nd; + char szFullPath[CL_MAX_PATH]; + + errno = 0; + + if (!szPath) + { + errno = EFAULT; + return NULL; + } + + if (szPath[0] == '\0') + { + errno = ENOTDIR; + return NULL; + } + + /* Attempt to determine if the given path really is a directory. */ + struct _stat rcs; + if ( _stat(szPath,&rcs) == -1) + { + /* call GetLastError for more error info */ + errno = ENOENT; + return NULL; + } + if (!(rcs.st_mode & _S_IFDIR)) + { + /* Error, entry exists but not a directory. */ + errno = ENOTDIR; + return NULL; + } + + /* Make an absolute pathname. */ + _realpath(szPath,szFullPath); + + /* Allocate enough space to store DIR structure and the complete + * directory path given. */ + //nd = (DIR *) malloc (sizeof (DIR) + _tcslen (szFullPath) + _tcslen (DIRENT_SLASH) + + // _tcslen (DIRENT_SEARCH_SUFFIX)+1); + nd = new DIR; + + if (!nd) + { + /* Error, out of memory. */ + errno = ENOMEM; + return NULL; + } + + /* Create the search expression. */ + strcpy (nd->dd_name, szFullPath); + + /* Add on a slash if the path does not end with one. */ + if (nd->dd_name[0] != '\0' && + nd->dd_name[strlen (nd->dd_name) - 1] != '/' && + nd->dd_name[strlen (nd->dd_name) - 1] != '\\') + { + strcat (nd->dd_name, DIRENT_SLASH); + } + + /* Add on the search pattern */ + strcat (nd->dd_name, DIRENT_SEARCH_SUFFIX); + + /* Initialize handle to -1 so that a premature closedir doesn't try + * to call _findclose on it. */ + nd->dd_handle = -1; + + /* Initialize the status. */ + nd->dd_stat = 0; + + /* Initialize the dirent structure. ino and reclen are invalid under + * Win32, and name simply points at the appropriate part of the + * findfirst_t structure. */ + //nd->dd_dir.d_ino = 0; + //nd->dd_dir.d_reclen = 0; + nd->dd_dir.d_namlen = 0; + nd->dd_dir.d_name = nd->dd_dta.name; + + return nd; +} + + +struct dirent * readdir (DIR * dirp) +{ + errno = 0; + + /* Check for valid DIR struct. */ + if (!dirp) + { + errno = EFAULT; + return NULL; + } + + if (dirp->dd_dir.d_name != dirp->dd_dta.name) + { + /* The structure does not seem to be set up correctly. */ + errno = EINVAL; + return NULL; + } + + bool bCallFindNext = true; + + if (dirp->dd_stat < 0) + { + /* We have already returned all files in the directory + * (or the structure has an invalid dd_stat). */ + return NULL; + } + else if (dirp->dd_stat == 0) + { + /* We haven't started the search yet. */ + /* Start the search */ + dirp->dd_handle = _findfirst (dirp->dd_name, &(dirp->dd_dta)); + + if (dirp->dd_handle == -1) + { + /* Whoops! Seems there are no files in that + * directory. */ + dirp->dd_stat = -1; + } + else + { + dirp->dd_stat = 1; + } + + /* Dont call _findnext first time. */ + bCallFindNext = false; + } + + while (dirp->dd_stat > 0) + { + if (bCallFindNext) + { + /* Get the next search entry. */ + if (_findnext (dirp->dd_handle, &(dirp->dd_dta))) + { + /* We are off the end or otherwise error. */ + _findclose (dirp->dd_handle); + dirp->dd_handle = -1; + dirp->dd_stat = -1; + return NULL; + } + else + { + /* Update the status to indicate the correct + * number. */ + dirp->dd_stat++; + } + } + + /* Successfully got an entry. Everything about the file is + * already appropriately filled in except the length of the + * file name. */ + dirp->dd_dir.d_namlen = strlen (dirp->dd_dir.d_name); + + bool bThisFolderOrUpFolder = dirp->dd_dir.d_name[0] == '.' && + (dirp->dd_dir.d_name[1] == 0 || (dirp->dd_dir.d_name[1] == '.' && dirp->dd_dir.d_name[2] == 0)); + + if (!bThisFolderOrUpFolder) + { + struct _stat buf; + char buffer[CL_MAX_DIR]; + size_t bl = strlen(dirp->dd_name)-strlen(DIRENT_SEARCH_SUFFIX); + strncpy(buffer,dirp->dd_name,bl); + buffer[bl]=0; + strcat(buffer, dirp->dd_dir.d_name); + if ( _stat(buffer,&buf) == 0 ) + { + /* Finally we have a valid entry. */ + return &dirp->dd_dir; + } + } + + /* Allow to find next file. */ + bCallFindNext = true; + } + + return NULL; +} + + + +int32_t +closedir (DIR * dirp) +{ + int32_t rc; + + errno = 0; + rc = 0; + + if (!dirp) + { + errno = EFAULT; + return -1; + } + + if (dirp->dd_handle != -1) + { + rc = _findclose (dirp->dd_handle); + } + + /* Delete the dir structure. */ + _CLVDELETE(dirp); + + return rc; +} +#endif //HAVE_DIRENT_H + diff --git a/3rdparty/clucene/src/CLucene/util/dirent.h b/3rdparty/clucene/src/CLucene/util/dirent.h new file mode 100644 index 000000000..71cd34c0a --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/dirent.h @@ -0,0 +1,105 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Matt J. Weinstein +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef lucene_util_dirent_H +#define lucene_util_dirent_H + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#if !defined(_CL_HAVE_DIRENT_H) && !defined(_CL_HAVE_SYS_NDIR_H) && !defined(_CL_HAVE_SYS_DIR_H) && !defined(_CL_HAVE_NDIR_H) + +/** +\unit + * dirent.c + * + * Derived from DIRLIB.C by Matt J. Weinstein + * This note appears in the DIRLIB.H + * DIRLIB.H by M. J. Weinstein Released to public domain 1-Jan-89 + * + * Updated by Jeremy Bettis <jeremy@hksys.com> + * Significantly revised and rewinddir, seekdir and telldir added by Colin + * Cut down again & changed by Ben van Klinken + * Peters <colin@fu.is.saga-u.ac.jp> + * + */ + +/** dirent structure - used by the dirent.h directory iteration functions */ +struct dirent +{ + unsigned short d_namlen; /* Length of name in d_name. */ + char *d_name; /* File name. */ +}; + +/** DIR structure - used by the dirent.h directory iteration functions*/ +struct DIR +{ + /** disk transfer area for this dir */ + struct _finddata_t dd_dta; + + /* dirent struct to return from dir (NOTE: this makes this thread + * safe as long as only one thread uses a particular DIR struct at + * a time) */ + struct dirent dd_dir; + + /** _findnext handle */ + intptr_t dd_handle; + + /** + * Status of search: + * 0 = not started yet (next entry to read is first entry) + * -1 = off the end + * positive = 0 based index of next entry + */ + int32_t dd_stat; + + /** given path for dir with search pattern (struct is extended) */ + char dd_name[CL_MAX_DIR]; + +}; + +#define DIRENT_SEARCH_SUFFIX "*" +#define DIRENT_SLASH PATH_DELIMITERA + + +/** +* Returns a pointer to a DIR structure appropriately filled in to begin +* searching a directory. +*/ +DIR* opendir (const char* filespec); + +/** +* Return a pointer to a dirent structure filled with the information on the +* next entry in the directory. +*/ +struct dirent* readdir (DIR* dir); + +/** +* Frees up resources allocated by opendir. +*/ +int32_t closedir (DIR* dir); + + +#elif defined (_CL_HAVE_DIRENT_H) +# include <dirent.h> +# define NAMLEN(dirent) strlen((dirent)->d_name) + +#else +# define dirent direct +# define NAMLEN(dirent) (dirent)->d_namlen +# if defined(_CL_HAVE_SYS_NDIR_H) +# include <sys/ndir.h> +# endif +# if defined(_CL_HHAVE_SYS_DIR_H) +# include <sys/dir.h> +# endif +# if defined(_CL_HHAVE_NDIR_H) +# include <ndir.h> +# endif + +#endif //HAVE_DIRENT_H +#endif diff --git a/3rdparty/clucene/src/CLucene/util/fileinputstream.cpp b/3rdparty/clucene/src/CLucene/util/fileinputstream.cpp new file mode 100644 index 000000000..9125d8478 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/fileinputstream.cpp @@ -0,0 +1,103 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Jos van den Oever +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +/* This file is part of Strigi Desktop Search + * + * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#include "jstreamsconfig.h" +#include "fileinputstream.h" + +#ifndef UNDER_CE +#include <cerrno> +#endif +#include <cstring> +namespace jstreams { + +const int32_t FileInputStream::defaultBufferSize = 1048576; +FileInputStream::FileInputStream(const char *filepath, int32_t buffersize) { + // try to open the file for reading + file = fopen(filepath, "rb"); + this->filepath = filepath; + if (file == 0) { + // handle error + error = "Could not read file '"; + error += filepath; + error += "': "; +#ifndef UNDER_CE + error += strerror(errno); +#endif + status = Error; + return; + } + // determine file size. if the stream is not seekable, the size will be -1 + fseek(file, 0, SEEK_END); + size = ftell(file); + fseek(file, 0, SEEK_SET); + + // if the file has size 0, make sure that it's really empty + // this is useful for filesystems like /proc that report files as size 0 + // for files that do contain content + if (size == 0) { + char dummy[1]; + size_t n = fread(dummy, 1, 1, file); + if (n == 1) { + size = -1; + fseek(file, 0, SEEK_SET); + } else { + fclose(file); + file = 0; + return; + } + } + + // allocate memory in the buffer + int32_t bufsize = (size <= buffersize) ?size+1 :buffersize; + mark(bufsize); +} +FileInputStream::~FileInputStream() { + if (file) { + if (fclose(file)) { + // handle error + error = "Could not close file '" + filepath + "'."; + } + } +} +int32_t +FileInputStream::fillBuffer(char* start, int32_t space) { + if (file == 0) return -1; + // read into the buffer + int32_t nwritten = fread(start, 1, space, file); + // check the file stream status + if (ferror(file)) { + error = "Could not read from file '" + filepath + "'."; + fclose(file); + file = 0; + status = Error; + return -1; + } + if (feof(file)) { + fclose(file); + file = 0; + } + return nwritten; +} +} diff --git a/3rdparty/clucene/src/CLucene/util/fileinputstream.h b/3rdparty/clucene/src/CLucene/util/fileinputstream.h new file mode 100644 index 000000000..144423da8 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/fileinputstream.h @@ -0,0 +1,38 @@ +/** + * Copyright 2003-2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef FILEINPUTSTREAM_H +#define FILEINPUTSTREAM_H + +#include "bufferedstream.h" + +namespace jstreams { + +class FileInputStream : public BufferedInputStream<char> { +private: + FILE *file; + std::string filepath; + +public: + static const int32_t defaultBufferSize; + FileInputStream(const char *filepath, int32_t buffersize=defaultBufferSize); + ~FileInputStream(); + int32_t fillBuffer(char* start, int32_t space); +}; + +} // end namespace jstreams + +#endif + diff --git a/3rdparty/clucene/src/CLucene/util/inputstreambuffer.h b/3rdparty/clucene/src/CLucene/util/inputstreambuffer.h new file mode 100644 index 000000000..873e811cd --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/inputstreambuffer.h @@ -0,0 +1,126 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Jos van den Oever +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +/* This file is part of Strigi Desktop Search + * + * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#ifndef INPUTSTREAMBUFFER_H +#define INPUTSTREAMBUFFER_H + +#include <cstdlib> + +namespace jstreams { + +template <class T> +class InputStreamBuffer { +private: +public: + T* start; + int32_t size; + T* readPos; + int32_t avail; + + InputStreamBuffer(); + ~InputStreamBuffer(); + void setSize(int32_t size); + int32_t read(const T*& start, int32_t max=0); + + /** + * This function prepares the buffer for a new write. + * returns the number of available places. + **/ + int32_t makeSpace(int32_t needed); +}; + +template <class T> +InputStreamBuffer<T>::InputStreamBuffer() { + readPos = start = 0; + size = avail = 0; +} +template <class T> +InputStreamBuffer<T>::~InputStreamBuffer() { + free(start); +} +template <class T> +void +InputStreamBuffer<T>::setSize(int32_t size) { + // store pointer information + int32_t offset = (int32_t)(readPos - start); + + // allocate memory in the buffer + if ( start == 0 ) + start = (T*)malloc(size*sizeof(T)); + else + start = (T*)realloc(start, size*sizeof(T)); + this->size = size; + + // restore pointer information + readPos = start + offset; +} +template <class T> +int32_t +InputStreamBuffer<T>::makeSpace(int32_t needed) { + // determine how much space is available for writing + int32_t space = size - ((int32_t)(readPos - start)) - avail; + if (space >= needed) { + // there's enough space + return space; + } + + if (avail) { + if (readPos != start) { +// printf("moving\n"); + // move data to the start of the buffer + memmove(start, readPos, avail*sizeof(T)); + space += (int32_t)(readPos - start); + readPos = start; + } + } else { + // we may start writing at the start of the buffer + readPos = start; + space = size; + } + if (space >= needed) { + // there's enough space now + return space; + } + + // still not enough space, we have to allocate more +// printf("resize %i %i %i %i %i\n", avail, needed, space, size + needed - space, size); + setSize(size + needed - space); + return needed; +} +template <class T> +int32_t +InputStreamBuffer<T>::read(const T*& start, int32_t max) { + start = readPos; + if (max <= 0 || max > avail) { + max = avail; + } + readPos += max; + avail -= max; + return max; +} + +} // end namespace jstreams + +#endif diff --git a/3rdparty/clucene/src/CLucene/util/jstreamsconfig.h b/3rdparty/clucene/src/CLucene/util/jstreamsconfig.h new file mode 100644 index 000000000..2a6ce9f8d --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/jstreamsconfig.h @@ -0,0 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ + +//this is just a compatibility header for jstreams +#include "CLucene/StdHeader.h" diff --git a/3rdparty/clucene/src/CLucene/util/streambase.h b/3rdparty/clucene/src/CLucene/util/streambase.h new file mode 100644 index 000000000..b0d9dc167 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/streambase.h @@ -0,0 +1,148 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Jos van den Oever +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +* +* Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +------------------------------------------------------------------------------*/ +/* This file is part of Strigi Desktop Search + * + * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#ifndef STREAMBASE_H +#define STREAMBASE_H + +#include <string> + +#if defined(_BUILD_FOR_QT_) + #include "StdHeader.h" +#endif + +#define INT32MAX 0x7FFFFFFFL + +namespace jstreams { + +enum StreamStatus { Ok, Eof, Error }; + +/** + * @short Base class for stream read access to many different file types. + * + * This class is based on the interface java.io.InputStream. It allows + * for uniform access to streamed resources. + * The main difference with the java equivalent is a performance improvement. + * When reading data, data is not copied into a buffer provided by the caller, + * but a pointer to the read data is provided. This makes this interface + * especially useful for deriving from it and implementing filterers or + * transformers. + */ +// java mapping: long=int64, int=int32, byte=uint8_t +template <class T> +class StreamBase { +protected: + int64_t size; + int64_t position; + std::string error; + StreamStatus status; +public: + StreamBase() :size(-1), position(0), status(Ok){ } + virtual ~StreamBase(){} + /** + * @brief Return a string representation of the last error. + * If no error has occurred, an empty string is returned. + **/ + const char* getError() const { return error.c_str(); } + StreamStatus getStatus() const { return status; } + /** + * @brief Get the current position in the stream. + * The value obtained from this function can be used to reset the stream. + **/ + int64_t getPosition() const { return position; } + /** + * @brief Return the size of the stream. + * If the size of the stream is unknown, -1 + * is returned. If the end of the stream has been reached the size is + * always known. + **/ + int64_t getSize() const { return size; } + /** + * @brief Reads characters from the stream and sets \a start to + * the first character that was read. + * + * If @p ntoread is @c 0, then at least one character will be read. + * + * @param start Pointer passed by reference that will be set to point to + * the retrieved array of characters. If the end of the stream + * is encountered or an error occurs, the value of @p start + * is undefined. + * @param min The number of characters to read from the stream. + * @param max The maximum number of characters to read from the stream. + * @return the number of characters that were read. If -1 is returned, the + * end of the stream has been reached. If -2 is returned, an error + * has occurred. + **/ + virtual int32_t read(const T*& start, int32_t min, int32_t max) = 0; + /** + * Skip @param ntoskip bytes. Unless an error occurs or the end of file is + * encountered, this amount of bytes is skipped. + * This function returns new position in the stream. + **/ + virtual int64_t skip(int64_t ntoskip); + /** + * @brief Repositions this stream to given requested position. + * Reset is guaranteed to work after a successful call to read(), + * when the new position is in the range of the data returned by read(). + * This means that @p pos must lie between than the position + * corresponding to the start parameter (x) of the read function + * and the position corresponding to the last position in the returned + * buffer (x + nread). + **/ + virtual int64_t reset(int64_t pos) = 0; + int64_t mark(int32_t readlimit) { + int64_t p = getPosition(); + const T* ptr; + read(ptr, readlimit, -1); + return reset(p); + } +}; +#define SKIPSTEP 1024 +template <class T> +int64_t +StreamBase<T>::skip(int64_t ntoskip) { + const T *begin; + int32_t nread; + int64_t skipped = 0; + while (ntoskip) { + int32_t step = (int32_t)((ntoskip > SKIPSTEP) ?SKIPSTEP :ntoskip); + nread = read(begin, 1, step); + if (nread < -1 ) { + // an error occurred + return nread; + } else if (nread < 1) { + ntoskip = 0; + } else { + skipped += nread; + ntoskip -= nread; + } + } + return skipped; +} + +} // end namespace jstreams + +#endif diff --git a/3rdparty/clucene/src/CLucene/util/stringreader.h b/3rdparty/clucene/src/CLucene/util/stringreader.h new file mode 100644 index 000000000..698d07e37 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/stringreader.h @@ -0,0 +1,124 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Jos van den Oever +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +/* This file is part of Strigi Desktop Search + * + * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#ifndef STRINGREADER_H +#define STRINGREADER_H + +/** + * Author: Jos van den Oever + * Ben van Klinken + **/ + + +#include "streambase.h" + +namespace jstreams { + +template <class T> +class StringReader : public StreamBase<T> { +private: + int64_t markpt; + T* data; + bool dataowner; + StringReader(const StringReader<T>&); + void operator=(const StringReader<T>&); +public: + StringReader(const T* value, int32_t length = -1, bool copy = true); + ~StringReader(); + int32_t read(const T*& start, int32_t min, int32_t max); + int64_t skip(int64_t ntoskip); + int64_t reset(int64_t pos); +}; + +typedef StringReader<char> StringInputStream; + +template <class T> +StringReader<T>::StringReader(const T* value, int32_t length, bool copy) + : markpt(0), dataowner(copy) { + if (length < 0) { + length = 0; + while (value[length] != '\0') { + length++; + } + } + StreamBase<T>::size = length; + if (copy) { + data = new T[length+1]; + size_t s = (size_t)(length*sizeof(T)); + memcpy(data, value, s); + data[length] = 0; + } else { + // casting away const is ok, because we don't write anyway + data = (T*)value; + } +} +template <class T> +StringReader<T>::~StringReader() { + if (dataowner) { + delete [] data; + } +} +template <class T> +int32_t +StringReader<T>::read(const T*& start, int32_t min, int32_t max) { + int64_t left = StreamBase<T>::size - StreamBase<T>::position; + if (left == 0) { + StreamBase<T>::status = Eof; + return -1; + } + if (min < 0) min = 0; + int32_t nread = (int32_t)((max > left || max < 1) ?left :max); + start = data + StreamBase<T>::position; + StreamBase<T>::position += nread; + if (StreamBase<T>::position == StreamBase<T>::size) { + StreamBase<T>::status = Eof; + } + return nread; +} +template <class T> +int64_t +StringReader<T>::skip(int64_t ntoskip) { + const T* start; + return read(start, ntoskip, ntoskip); +} +template <class T> +int64_t +StringReader<T>::reset(int64_t newpos) { + if (newpos < 0) { + StreamBase<T>::status = Ok; + StreamBase<T>::position = 0; + } else if (newpos < StreamBase<T>::size) { + StreamBase<T>::status = Ok; + StreamBase<T>::position = newpos; + } else { + StreamBase<T>::position = StreamBase<T>::size; + StreamBase<T>::status = Eof; + } + return StreamBase<T>::position; +} + +} // end namespace jstreams + +#endif diff --git a/3rdparty/clucene/src/CLucene/util/subinputstream.h b/3rdparty/clucene/src/CLucene/util/subinputstream.h new file mode 100644 index 000000000..8ae3e33c7 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/util/subinputstream.h @@ -0,0 +1,141 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Jos van den Oever +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +/* This file is part of Strigi Desktop Search + * + * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#ifndef SUBINPUTSTREAM_H +#define SUBINPUTSTREAM_H + +#include "streambase.h" + +namespace jstreams { + +template<class T> +class SubInputStream : public StreamBase<T> { +private: + const int64_t offset; + StreamBase<T> *input; +public: + SubInputStream(StreamBase<T> *input, int64_t size=-1); + int32_t read(const T*& start, int32_t min, int32_t max); + int64_t reset(int64_t newpos); + int64_t skip(int64_t ntoskip); +}; +template<class T> +SubInputStream<T>::SubInputStream(StreamBase<T> *i, int64_t length) + : offset(i->getPosition()), input(i) { + assert(length >= -1); +// printf("substream offset: %lli\n", offset); + StreamBase<T>::size = length; +} + +template<class T> +int32_t SubInputStream<T>::read(const T*& start, int32_t min, int32_t max) { + if (StreamBase<T>::size != -1) { + const int64_t left = StreamBase<T>::size - StreamBase<T>::position; + if (left == 0) { + return -1; + } + // restrict the amount of data that can be read + if (max <= 0 || max > left) { + max = (int32_t)left; + } + if (min > max) min = max; + if (left < min) min = (int32_t)left; + } + int32_t nread = input->read(start, min, max); + if (nread < -1) { + fprintf(stderr, "substream too short.\n"); + StreamBase<T>::status = Error; + StreamBase<T>::error = input->getError(); + } else if (nread < min) { + if (StreamBase<T>::size == -1) { + StreamBase<T>::status = Eof; + if (nread > 0) { + StreamBase<T>::position += nread; + StreamBase<T>::size = StreamBase<T>::position; + } + } else { +// fprintf(stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! nread %i min %i max %i size %lli\n", nread, min, max, size); +// fprintf(stderr, "pos %lli parentpos %lli\n", position, input->getPosition()); +// fprintf(stderr, "status: %i error: %s\n", input->getStatus(), input->getError()); + // we expected data but didn't get enough so that's an error + StreamBase<T>::status = Error; + StreamBase<T>::error = "Premature end of stream\n"; + nread = -2; + } + } else { + StreamBase<T>::position += nread; + if (StreamBase<T>::position == StreamBase<T>::size) { + StreamBase<T>::status = Eof; + } + } + return nread; +} + +template<class T> +int64_t SubInputStream<T>::reset(int64_t newpos) { +// fprintf(stderr, "subreset pos: %lli newpos: %lli offset: %lli\n", position, +// newpos, offset); + StreamBase<T>::position = input->reset(newpos + offset); + if (StreamBase<T>::position < offset) { + printf("###########\n"); + StreamBase<T>::status = Error; + StreamBase<T>::error = input->getError(); + } else { + StreamBase<T>::position -= offset; + StreamBase<T>::status = input->getStatus(); + } + return StreamBase<T>::position; +} + +template<class T> +int64_t SubInputStream<T>::skip(int64_t ntoskip) { +// printf("subskip pos: %lli ntoskip: %lli offset: %lli\n", position, ntoskip, offset); + if (StreamBase<T>::size == StreamBase<T>::position) { + StreamBase<T>::status = Eof; + return -1; + } + if (StreamBase<T>::size != -1) { + const int64_t left = StreamBase<T>::size - StreamBase<T>::position; + // restrict the amount of data that can be skipped + if (ntoskip > left) { + ntoskip = left; + } + } + int64_t skipped = input->skip(ntoskip); + if (input->getStatus() == Error) { + StreamBase<T>::status = Error; + StreamBase<T>::error = input->getError(); + } else { + StreamBase<T>::position += skipped; + if (StreamBase<T>::position == StreamBase<T>::size) { + StreamBase<T>::status = Eof; + } + } + return skipped; +} + +} //end namespace jstreams + +#endif |