summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/search/WildcardTermEnum.cpp
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/search/WildcardTermEnum.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/search/WildcardTermEnum.cpp150
1 files changed, 150 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/search/WildcardTermEnum.cpp b/3rdparty/clucene/src/CLucene/search/WildcardTermEnum.cpp
new file mode 100644
index 000000000..bed9e6e0c
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/search/WildcardTermEnum.cpp
@@ -0,0 +1,150 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "WildcardTermEnum.h"
+
+CL_NS_USE(index)
+CL_NS_DEF(search)
+
+ bool WildcardTermEnum::termCompare(Term* term) {
+ if ( term!=NULL && __term->field() == term->field() ) {
+ const TCHAR* searchText = term->text();
+ const TCHAR* patternText = __term->text();
+ if ( _tcsncmp( searchText, pre, preLen ) == 0 ){
+ return wildcardEquals(patternText+preLen, __term->textLength()-preLen, 0, searchText, term->textLength(), preLen);
+ }
+ }
+ _endEnum = true;
+ return false;
+ }
+
+ /** Creates new WildcardTermEnum */
+ WildcardTermEnum::WildcardTermEnum(IndexReader* reader, Term* term):
+ FilteredTermEnum(),
+ __term(_CL_POINTER(term)),
+ fieldMatch(false),
+ _endEnum(false)
+ {
+
+ pre = stringDuplicate(term->text());
+
+ const TCHAR* sidx = _tcschr( pre, LUCENE_WILDCARDTERMENUM_WILDCARD_STRING );
+ const TCHAR* cidx = _tcschr( pre, LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR );
+ const TCHAR* tidx = sidx;
+ if (tidx == NULL)
+ tidx = cidx;
+ else if ( cidx && cidx > pre)
+ tidx = min(sidx, cidx);
+ CND_PRECONDITION(tidx != NULL, "tidx==NULL");
+ int32_t idx = (int32_t)(tidx - pre);
+ preLen = idx;
+ CND_PRECONDITION(preLen<term->textLength(), "preLen >= term->textLength()");
+ pre[preLen]=0; //trim end
+
+ Term* t = _CLNEW Term(__term, pre);
+ setEnum( reader->terms(t) );
+ _CLDECDELETE(t);
+ }
+
+ void WildcardTermEnum::close()
+ {
+ if ( __term != NULL ){
+ FilteredTermEnum::close();
+
+ _CLDECDELETE(__term);
+ __term = NULL;
+
+ _CLDELETE_CARRAY( pre );
+ }
+ }
+ WildcardTermEnum::~WildcardTermEnum() {
+ close();
+ }
+
+ qreal WildcardTermEnum::difference() {
+ return 1.0f;
+ }
+
+ bool WildcardTermEnum::endEnum() {
+ return _endEnum;
+ }
+
+ bool WildcardTermEnum::wildcardEquals(const TCHAR* pattern, int32_t patternLen, int32_t patternIdx, const TCHAR* str, int32_t strLen, int32_t stringIdx)
+ {
+ for (int32_t p = patternIdx; ; ++p)
+ {
+ for (int32_t s = stringIdx; ; ++p, ++s)
+ {
+ // End of str yet?
+ bool sEnd = (s >= strLen);
+ // End of pattern yet?
+ bool pEnd = (p >= patternLen);
+
+ // If we're looking at the end of the str...
+ if (sEnd)
+ {
+ // Assume the only thing left on the pattern is/are wildcards
+ bool justWildcardsLeft = true;
+
+ // Current wildcard position
+ int32_t wildcardSearchPos = p;
+ // While we haven't found the end of the pattern,
+ // and haven't encountered any non-wildcard characters
+ while (wildcardSearchPos < patternLen && justWildcardsLeft)
+ {
+ // Check the character at the current position
+ TCHAR wildchar = pattern[wildcardSearchPos];
+ // If it's not a wildcard character, then there is more
+ // pattern information after this/these wildcards.
+
+ if (wildchar != LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR &&
+ wildchar != LUCENE_WILDCARDTERMENUM_WILDCARD_STRING){
+ justWildcardsLeft = false;
+ }else{
+ // to prevent "cat" matches "ca??"
+ if (wildchar == LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR)
+ return false;
+
+ wildcardSearchPos++; // Look at the next character
+ }
+ }
+
+ // This was a prefix wildcard search, and we've matched, so
+ // return true.
+ if (justWildcardsLeft)
+ return true;
+ }
+
+ // If we've gone past the end of the str, or the pattern,
+ // return false.
+ if (sEnd || pEnd)
+ break;
+
+ // Match a single character, so continue.
+ if (pattern[p] == LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR)
+ continue;
+
+ if (pattern[p] == LUCENE_WILDCARDTERMENUM_WILDCARD_STRING)
+ {
+ // Look at the character beyond the '*'.
+ ++p;
+ // Examine the str, starting at the last character.
+ for (int32_t i = strLen; i >= s; --i)
+ {
+ if (wildcardEquals(pattern, patternLen, p, str, strLen, i))
+ return true;
+ }
+ break;
+ }
+ if (pattern[p] != str[s])
+ break;
+ }
+ return false;
+ }
+ }
+
+CL_NS_END