diff options
Diffstat (limited to '3rdparty/clucene/src/CLucene/queryParser')
12 files changed, 2302 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/queryParser/Lexer.cpp b/3rdparty/clucene/src/CLucene/queryParser/Lexer.cpp new file mode 100644 index 000000000..861c5d3cb --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/Lexer.cpp @@ -0,0 +1,371 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "Lexer.h" + +#include "CLucene/util/FastCharStream.h" +#include "CLucene/util/Reader.h" +#include "CLucene/util/StringBuffer.h" +#include "TokenList.h" +#include "QueryToken.h" +#include "QueryParserBase.h" + +CL_NS_USE(util) + +CL_NS_DEF(queryParser) +Lexer::Lexer(QueryParserBase* queryparser, const TCHAR* query) { + //Func - Constructor + //Pre - query != NULL and contains the query string + //Post - An instance of Lexer has been created + + this->queryparser = queryparser; + + CND_PRECONDITION(query != NULL, "query is NULL"); + + //The InputStream of Reader must be destroyed in the destructor + delSR = true; + + StringReader *r = _CLNEW StringReader(query); + + //Check to see if r has been created properly + CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r"); + + //Instantie a FastCharStream instance using r and assign it to reader + reader = _CLNEW FastCharStream(r); + + //Check to see if reader has been created properly + CND_CONDITION(reader != NULL, "Could not allocate memory for FastCharStream reader"); + + //The InputStream of Reader must be destroyed in the destructor + delSR = true; + +} + + +Lexer::Lexer(QueryParserBase* queryparser, Reader* source) { + //Func - Constructor + // Initializes a new instance of the Lexer class with the specified + // TextReader to lex. + //Pre - Source contains a valid reference to a Reader + //Post - An instance of Lexer has been created using source as the reader + + this->queryparser = queryparser; + + //Instantie a FastCharStream instance using r and assign it to reader + reader = _CLNEW FastCharStream(source); + + //Check to see if reader has been created properly + CND_CONDITION(reader != NULL, "Could not allocate memory for FastCharStream reader"); + + //The InputStream of Reader must not be destroyed in the destructor + delSR = false; +} + + +Lexer::~Lexer() { + //Func - Destructor + //Pre - true + //Post - if delSR was true the InputStream input of reader has been deleted + // The instance of Lexer has been destroyed + + if (delSR) { + _CLDELETE(reader->input); + } + + _CLDELETE(reader); +} + + +void Lexer::Lex(TokenList *tokenList) { + //Func - Breaks the input stream onto the tokens list tokens + //Pre - tokens != NULL and contains a TokenList in which the tokens can be stored + //Post - The tokens have been added to the TokenList tokens + + CND_PRECONDITION(tokenList != NULL, "tokens is NULL"); + + //Get all the tokens + while(true) { + //Add the token to the tokens list + + //Get the next token + QueryToken* token = _CLNEW QueryToken; + if ( !GetNextToken(token) ){ + _CLDELETE(token); + break; + } + tokenList->add(token); + } + + //The end has been reached so create an EOF_ token + //Add the final token to the TokenList _tokens + tokenList->add(_CLNEW QueryToken( QueryToken::EOF_)); +} + + +bool Lexer::GetNextToken(QueryToken* token) { + while(!reader->Eos()) { + int ch = reader->GetNext(); + + if ( ch == -1 ) + break; + + // skipping whitespaces + if( _istspace(ch)!=0 ) { + continue; + } + TCHAR buf[2] = {ch,'\0'}; + switch(ch) { + case '+': + token->set(buf, QueryToken::PLUS); + return true; + case '-': + token->set(buf, QueryToken::MINUS); + return true; + case '(': + token->set(buf, QueryToken::LPAREN); + return true; + case ')': + token->set(buf, QueryToken::RPAREN); + return true; + case ':': + token->set(buf, QueryToken::COLON); + return true; + case '!': + token->set(buf, QueryToken::NOT); + return true; + case '^': + token->set(buf, QueryToken::CARAT); + return true; + case '~': + if( _istdigit( reader->Peek() )!=0 ) { + TCHAR number[LUCENE_MAX_FIELD_LEN]; + ReadIntegerNumber(ch, number,LUCENE_MAX_FIELD_LEN); + token->set(number, QueryToken::SLOP); + return true; + }else{ + token->set(buf, QueryToken::FUZZY); + return true; + } + break; + case '"': + return ReadQuoted(ch, token); + case '[': + return ReadInclusiveRange(ch, token); + case '{': + return ReadExclusiveRange(ch, token); + case ']': + case '}': + case '*': + queryparser->throwParserException( _T("Unrecognized TCHAR %d at %d::%d."), + ch, reader->Column(), reader->Line() ); + return false; + default: + return ReadTerm(ch, token); + + // end of swith + } + + } + return false; +} + + +void Lexer::ReadIntegerNumber(const TCHAR ch, TCHAR* buf, int buflen) { + int bp=0; + buf[bp++] = ch; + + int c = reader->Peek(); + while( c!=-1 && _istdigit(c)!=0 && bp<buflen-1 ) { + buf[bp++] = reader->GetNext(); + c = reader->Peek(); + } + buf[bp++] = 0; +} + + +bool Lexer::ReadInclusiveRange(const TCHAR prev, QueryToken* token) { + int ch = prev; + StringBuffer range; + range.appendChar(ch); + + while(!reader->Eos()) { + ch = reader->GetNext(); + if ( ch == -1 ) + break; + range.appendChar(ch); + + if(ch == ']'){ + token->set(range.getBuffer(), QueryToken::RANGEIN); + return true; + } + } + queryparser->throwParserException(_T("Unterminated inclusive range! %d %d::%d"),' ', + reader->Column(),reader->Column()); + return false; +} + + +bool Lexer::ReadExclusiveRange(const TCHAR prev, QueryToken* token) { + int ch = prev; + StringBuffer range; + range.appendChar(ch); + + while(!reader->Eos()) { + ch = reader->GetNext(); + + if (ch==-1) + break; + range.appendChar(ch); + + if(ch == '}'){ + token->set(range.getBuffer(), QueryToken::RANGEEX); + return true; + } + } + queryparser->throwParserException(_T("Unterminated exclusive range! %d %d::%d"),' ', + reader->Column(),reader->Column() ); + return false; +} + +bool Lexer::ReadQuoted(const TCHAR prev, QueryToken* token) { + int ch = prev; + StringBuffer quoted; + quoted.appendChar(ch); + + while(!reader->Eos()) { + ch = reader->GetNext(); + + if (ch==-1) + break; + + quoted.appendChar(ch); + + if(ch == '"'){ + token->set(quoted.getBuffer(), QueryToken::QUOTED); + return true; + } + } + queryparser->throwParserException(_T("Unterminated string! %d %d::%d"),' ', + reader->Column(),reader->Column()); + return false; +} + + +bool Lexer::ReadTerm(const TCHAR prev, QueryToken* token) { + int ch = prev; + bool completed = false; + int32_t asteriskCount = 0; + bool hasQuestion = false; + + StringBuffer val; + TCHAR buf[3]; //used for readescaped + + while(true) { + switch(ch) { + case -1: + break; + case '\\': + { + if ( ReadEscape(ch, buf) ) + val.append( buf ); + else + return false; + } + break; + + case LUCENE_WILDCARDTERMENUM_WILDCARD_STRING: + asteriskCount++; + val.appendChar(ch); + break; + case LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR: + hasQuestion = true; + val.appendChar(ch); + break; + case '\n': + case '\t': + case ' ': + case '+': + case '-': + case '!': + case '(': + case ')': + case ':': + case '^': + case '[': + case ']': + case '{': + case '}': + case '~': + case '"': + // create new QueryToken + reader->UnGet(); + completed = true; + break; + default: + val.appendChar(ch); + break; + // end of switch + } + + if(completed || ch==-1 || reader->Eos() ) + break; + else + ch = reader->GetNext(); + } + + // create new QueryToken + if(hasQuestion) + token->set(val.getBuffer(), QueryToken::WILDTERM); + else if(asteriskCount == 1 && val.getBuffer()[val.length() - 1] == '*') + token->set(val.getBuffer(), QueryToken::PREFIXTERM); + else if(asteriskCount > 0) + token->set(val.getBuffer(), QueryToken::WILDTERM); + else if( _tcsicmp(val.getBuffer(), _T("AND"))==0 || _tcscmp(val.getBuffer(), _T("&&"))==0 ) + token->set(val.getBuffer(), QueryToken::AND_); + else if( _tcsicmp(val.getBuffer(), _T("OR"))==0 || _tcscmp(val.getBuffer(), _T("||"))==0) + token->set(val.getBuffer(), QueryToken::OR); + else if( _tcsicmp(val.getBuffer(), _T("NOT"))==0 ) + token->set(val.getBuffer(), QueryToken::NOT); + else { + bool isnum = true; + int32_t nlen=val.length(); + for (int32_t i=0;i<nlen;++i) { + TCHAR ch=val.getBuffer()[i]; + if ( _istalpha(ch) ) { + isnum=false; + break; + } + } + + if ( isnum ) + token->set(val.getBuffer(), QueryToken::NUMBER); + else + token->set(val.getBuffer(), QueryToken::TERM); + } + return true; +} + + +bool Lexer::ReadEscape(TCHAR prev, TCHAR* buf) { + TCHAR ch = prev; + int bp=0; + buf[bp++] = ch; + + ch = reader->GetNext(); + int32_t idx = _tcscspn( buf, _T("\\+-!():^[]{}\"~*") ); + if(idx == 0) { + buf[bp++] = ch; + buf[bp++]=0; + return true; + } + queryparser->throwParserException(_T("Unrecognized escape sequence at %d %d::%d"), ' ', + reader->Column(),reader->Line()); + return false; +} + + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/queryParser/Lexer.h b/3rdparty/clucene/src/CLucene/queryParser/Lexer.h new file mode 100644 index 000000000..b3b55523e --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/Lexer.h @@ -0,0 +1,67 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +* +* Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +------------------------------------------------------------------------------*/ +#ifndef _lucene_queryParser_Lexer_ +#define _lucene_queryParser_Lexer_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "CLucene/util/FastCharStream.h" +#include "CLucene/util/Reader.h" +#include "CLucene/util/StringBuffer.h" + +#include "TokenList.h" + +CL_NS_DEF(queryParser) +class QueryParserBase; + // A simple Lexer that is used by QueryParser. + class Lexer:LUCENE_BASE + { + private: + CL_NS(util)::FastCharStream* reader; + QueryParserBase* queryparser; //holds the queryparser so that we can do callbacks + bool delSR; //Indicates if the reader must be deleted or not + + public: + // Initializes a new instance of the Lexer class with the specified + // query to lex. + Lexer(QueryParserBase* queryparser, const TCHAR* query); + + // Initializes a new instance of the Lexer class with the specified + // TextReader to lex. + Lexer(QueryParserBase* queryparser, CL_NS(util)::Reader* source); + + //Breaks the input stream onto the tokens list tokens + void Lex(TokenList *tokens); + + ~Lexer(); + + private: + bool GetNextToken(QueryToken* token); + + // Reads an integer number. buf should quite large, probably as large as a field should ever be + void ReadIntegerNumber(const TCHAR ch, TCHAR* buf, int buflen); + + // Reads an inclusive range like [some words] + bool ReadInclusiveRange(const TCHAR prev, QueryToken* token); + + // Reads an exclusive range like {some words} + bool ReadExclusiveRange(const TCHAR prev, QueryToken* token); + + // Reads quoted string like "something else" + bool ReadQuoted(const TCHAR prev, QueryToken* token); + + bool ReadTerm(const TCHAR prev, QueryToken* token); + + //reads an escaped character into the buf. Buf requires at least 3 characters + bool ReadEscape(const TCHAR prev, TCHAR* buf); + }; +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/queryParser/MultiFieldQueryParser.cpp b/3rdparty/clucene/src/CLucene/queryParser/MultiFieldQueryParser.cpp new file mode 100644 index 000000000..b57896b66 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/MultiFieldQueryParser.cpp @@ -0,0 +1,215 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +* +* Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "MultiFieldQueryParser.h" +#include "CLucene/analysis/AnalysisHeader.h" +#include "CLucene/search/BooleanQuery.h" +#include "CLucene/search/PhraseQuery.h" +#include "CLucene/search/SearchHeader.h" +#include "QueryParser.h" + +CL_NS_USE(index) +CL_NS_USE(util) +CL_NS_USE(search) +CL_NS_USE(analysis) + +CL_NS_DEF(queryParser) + +MultiFieldQueryParser::MultiFieldQueryParser(const TCHAR** fields, + CL_NS(analysis)::Analyzer* analyzer, BoostMap* boosts) + : QueryParser(NULL, analyzer) +{ + this->fields = fields; + this->boosts = boosts; +} + +MultiFieldQueryParser::~MultiFieldQueryParser() +{ +} + +//static +Query* MultiFieldQueryParser::parse(const TCHAR* query, const TCHAR** fields, + Analyzer* analyzer) +{ + BooleanQuery* bQuery = _CLNEW BooleanQuery(); + int32_t i = 0; + while (fields[i] != NULL){ + Query* q = QueryParser::parse(query, fields[i], analyzer); + if (q && (q->getQueryName() != _T("BooleanQuery") + || ((BooleanQuery*)q)->getClauseCount() > 0)) { + bQuery->add(q , true, false, false); + } else { + _CLDELETE(q); + } + i++; + } + return bQuery; +} + +//static +Query* MultiFieldQueryParser::parse(const TCHAR* query, const TCHAR** fields, + const uint8_t* flags, Analyzer* analyzer) +{ + BooleanQuery* bQuery = _CLNEW BooleanQuery(); + int32_t i = 0; + while ( fields[i] != NULL ) { + Query* q = QueryParser::parse(query, fields[i], analyzer); + if (q && (q->getQueryName() != _T("BooleanQuery") + || ((BooleanQuery*)q)->getClauseCount() > 0)) { + uint8_t flag = flags[i]; + switch (flag) { + case MultiFieldQueryParser::REQUIRED_FIELD: + bQuery->add(q, true, true, false); + break; + case MultiFieldQueryParser::PROHIBITED_FIELD: + bQuery->add(q, true, false, true); + break; + default: + bQuery->add(q, true, false, false); + break; + } + } else { + _CLDELETE(q); + } + i++; + } + return bQuery; +} + + +Query* MultiFieldQueryParser::GetFieldQuery(const TCHAR* field, TCHAR* queryText, int32_t slop){ + if (field == NULL) { + CL_NS_STD(vector)<BooleanClause*> clauses; + for (int i = 0; fields[i]!=NULL; ++i) { + Query* q = QueryParser::GetFieldQuery(fields[i], queryText); + if (q != NULL) { + //If the user passes a map of boosts + if (boosts != NULL) { + //Get the boost from the map and apply them + BoostMap::const_iterator itr = boosts->find(fields[i]); + if (itr != boosts->end()) { + q->setBoost(itr->second); + } + } + if (q->getQueryName() == PhraseQuery::getClassName()) { + ((PhraseQuery*)q)->setSlop(slop); + } + //if (q instanceof MultiPhraseQuery) { + // ((MultiPhraseQuery) q).setSlop(slop); + //} + q = QueryAddedCallback(fields[i], q); + if ( q ) + clauses.push_back(_CLNEW BooleanClause(q, true, false,false)); + } + } + if (clauses.size() == 0) // happens for stopwords + return NULL; + Query* q = QueryParser::GetBooleanQuery(clauses); + return q; + }else{ + Query* q = QueryParser::GetFieldQuery(field, queryText); + if ( q ) + q = QueryAddedCallback(field,q); + return q; + } +} + + +Query* MultiFieldQueryParser::GetFieldQuery(const TCHAR* field, TCHAR* queryText){ + return GetFieldQuery(field, queryText, 0); +} + + +CL_NS(search)::Query* MultiFieldQueryParser::GetFuzzyQuery(const TCHAR* field, TCHAR* termStr){ + if (field == NULL) { + CL_NS_STD(vector)<BooleanClause*> clauses; + for (int i = 0; fields[i]!=NULL; ++i) { + Query* q = QueryParser::GetFuzzyQuery(fields[i], termStr); //todo: , minSimilarity + if ( q ){ + q = QueryAddedCallback(fields[i], q); + if ( q ){ + clauses.push_back(_CLNEW BooleanClause(q,true,false,false) ); + } + } + } + return QueryParser::GetBooleanQuery(clauses); + }else{ + Query* q = QueryParser::GetFuzzyQuery(field, termStr);//todo: , minSimilarity + if ( q ) + q = QueryAddedCallback(field,q); + return q; + } +} + +Query* MultiFieldQueryParser::GetPrefixQuery(const TCHAR* field, TCHAR* termStr){ + if (field == NULL) { + CL_NS_STD(vector)<BooleanClause*> clauses; + for (int i = 0; fields[i]!=NULL; ++i) { + Query* q = QueryParser::GetPrefixQuery(fields[i], termStr); + if ( q ){ + q = QueryAddedCallback(fields[i],q); + if ( q ){ + clauses.push_back(_CLNEW BooleanClause(q,true,false,false)); + } + } + } + return QueryParser::GetBooleanQuery(clauses); + }else{ + Query* q = QueryParser::GetPrefixQuery(field, termStr); + if ( q ) + q = QueryAddedCallback(field,q); + return q; + } +} + +Query* MultiFieldQueryParser::GetWildcardQuery(const TCHAR* field, TCHAR* termStr){ + if (field == NULL) { + CL_NS_STD(vector)<BooleanClause*> clauses; + for (int i = 0; fields[i]!=NULL; ++i) { + Query* q = QueryParser::GetWildcardQuery(fields[i], termStr); + if ( q ){ + q = QueryAddedCallback(fields[i],q); + if ( q ){ + clauses.push_back(_CLNEW BooleanClause(q,true,false,false)); + } + } + } + return QueryParser::GetBooleanQuery(clauses); + }else{ + Query* q = QueryParser::GetWildcardQuery(field, termStr); + if ( q ) + q = QueryAddedCallback(field,q); + return q; + } +} + + +Query* MultiFieldQueryParser::GetRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, bool inclusive){ + if (field == NULL) { + CL_NS_STD(vector)<BooleanClause*> clauses; + for (int i = 0; fields[i]!=NULL; ++i) { + Query* q = QueryParser::GetRangeQuery(fields[i], part1, part2, inclusive); + if ( q ){ + q = QueryAddedCallback(fields[i],q); + if ( q ){ + clauses.push_back(_CLNEW BooleanClause(q,true,false,false)); + } + } + } + return QueryParser::GetBooleanQuery(clauses); + }else{ + Query* q = QueryParser::GetRangeQuery(field, part1, part2, inclusive); + if ( q ) + q = QueryAddedCallback(field,q); + return q; + } +} + + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/queryParser/MultiFieldQueryParser.h b/3rdparty/clucene/src/CLucene/queryParser/MultiFieldQueryParser.h new file mode 100644 index 000000000..bf7d652a7 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/MultiFieldQueryParser.h @@ -0,0 +1,136 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef MultiFieldQueryParser_H +#define MultiFieldQueryParser_H + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "CLucene/analysis/AnalysisHeader.h" +#include "CLucene/search/SearchHeader.h" +#include "QueryParser.h" + + +CL_NS_DEF(queryParser) + +typedef CL_NS(util)::CLHashMap<const TCHAR*, + qreal, + CL_NS(util)::Compare::TChar, + CL_NS(util)::Equals::TChar, + CL_NS(util)::Deletor::tcArray, + CL_NS(util)::Deletor::DummyFloat + > BoostMap; + + /** + * A QueryParser which constructs queries to search multiple fields. + * + */ + class MultiFieldQueryParser: public QueryParser + { + protected: + const TCHAR** fields; + BoostMap* boosts; + public: + LUCENE_STATIC_CONSTANT(uint8_t, NORMAL_FIELD=0); + LUCENE_STATIC_CONSTANT(uint8_t, REQUIRED_FIELD=1); + LUCENE_STATIC_CONSTANT(uint8_t, PROHIBITED_FIELD=2); + + /** + * Creates a MultiFieldQueryParser. + * + * <p>It will, when parse(String query) + * is called, construct a query like this (assuming the query consists of + * two terms and you specify the two fields <code>title</code> and <code>body</code>):</p> + * + * <code> + * (title:term1 body:term1) (title:term2 body:term2) + * </code> + * + * <p>When setDefaultOperator(AND_OPERATOR) is set, the result will be:</p> + * + * <code> + * +(title:term1 body:term1) +(title:term2 body:term2) + * </code> + * + * <p>In other words, all the query's terms must appear, but it doesn't matter in + * what fields they appear.</p> + */ + MultiFieldQueryParser(const TCHAR** fields, CL_NS(analysis)::Analyzer* a, BoostMap* boosts = NULL); + virtual ~MultiFieldQueryParser(); + + /** + * <p> + * Parses a query which searches on the fields specified. + * <p> + * If x fields are specified, this effectively constructs: + * <pre> + * <code> + * (field1:query) (field2:query) (field3:query)...(fieldx:query) + * </code> + * </pre> + * + * @param query Query string to parse + * @param fields Fields to search on + * @param analyzer Analyzer to use + * @throws ParserException if query parsing fails + * @throws TokenMgrError if query parsing fails + */ + static CL_NS(search)::Query* parse(const TCHAR* query, const TCHAR** fields, CL_NS(analysis)::Analyzer* analyzer); + + /** + * <p> + * Parses a query, searching on the fields specified. + * Use this if you need to specify certain fields as required, + * and others as prohibited. + * <p><pre> + * Usage: + * <code> + * String[] fields = {"filename", "contents", "description"}; + * int32_t[] flags = {MultiFieldQueryParser.NORMAL FIELD, + * MultiFieldQueryParser.REQUIRED FIELD, + * MultiFieldQueryParser.PROHIBITED FIELD,}; + * parse(query, fields, flags, analyzer); + * </code> + * </pre> + *<p> + * The code above would construct a query: + * <pre> + * <code> + * (filename:query) +(contents:query) -(description:query) + * </code> + * </pre> + * + * @param query Query string to parse + * @param fields Fields to search on + * @param flags Flags describing the fields + * @param analyzer Analyzer to use + * @throws ParserException if query parsing fails + * @throws TokenMgrError if query parsing fails + */ + static CL_NS(search)::Query* parse(const TCHAR* query, const TCHAR** fields, const uint8_t* flags, CL_NS(analysis)::Analyzer* analyzer); + + + + protected: + CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, TCHAR* queryText); + CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, TCHAR* queryText, int32_t slop); + CL_NS(search)::Query* GetFuzzyQuery(const TCHAR* field, TCHAR* termStr); + CL_NS(search)::Query* GetRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, bool inclusive); + CL_NS(search)::Query* GetPrefixQuery(const TCHAR* field, TCHAR* termStr); + CL_NS(search)::Query* GetWildcardQuery(const TCHAR* field, TCHAR* termStr); + + /** + * A special virtual function for the MultiFieldQueryParser which can be used + * to clean up queries. Once the field name is known and the query has been + * created, its passed to this function. + * An example of this usage is to set boosts. + */ + virtual CL_NS(search)::Query* QueryAddedCallback(const TCHAR* field, CL_NS(search)::Query* query){ return query; } + }; +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp b/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp new file mode 100644 index 000000000..b11eec0bb --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp @@ -0,0 +1,509 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +* +* Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "QueryParser.h" + +#include "CLucene/analysis/AnalysisHeader.h" +#include "CLucene/util/Reader.h" +#include "CLucene/search/SearchHeader.h" +#include "CLucene/index/Term.h" + +#include "TokenList.h" +#include "QueryToken.h" +#include "QueryParserBase.h" +#include "Lexer.h" + +CL_NS_USE(util) +CL_NS_USE(index) +CL_NS_USE(analysis) +CL_NS_USE(search) + +CL_NS_DEF(queryParser) + + QueryParser::QueryParser(const TCHAR* _field, Analyzer* _analyzer) : QueryParserBase(_analyzer){ + //Func - Constructor. + // Instantiates a QueryParser for the named field _field + //Pre - _field != NULL + //Post - An instance has been created + + if ( _field ) + field = STRDUP_TtoT(_field); + else + field = NULL; + tokens = NULL; + lowercaseExpandedTerms = true; + } + + QueryParser::~QueryParser() { + //Func - Destructor + //Pre - true + //Post - The instance has been destroyed + + _CLDELETE_CARRAY(field); + } + + //static + Query* QueryParser::parse(const TCHAR* query, const TCHAR* field, Analyzer* analyzer){ + //Func - Returns a new instance of the Query class with a specified query, field and + // analyzer values. + //Pre - query != NULL and holds the query to parse + // field != NULL and holds the default field for query terms + // analyzer holds a valid reference to an Analyzer and is used to + // find terms in the query text + //Post - query has been parsed and an instance of Query has been returned + + CND_PRECONDITION(query != NULL, "query is NULL"); + CND_PRECONDITION(field != NULL, "field is NULL"); + + QueryParser parser(field, analyzer); + return parser.parse(query); + } + + Query* QueryParser::parse(const TCHAR* query){ + //Func - Returns a parsed Query instance + //Pre - query != NULL and contains the query value to be parsed + //Post - Returns a parsed Query Instance + + CND_PRECONDITION(query != NULL, "query is NULL"); + + //Instantie a Stringer that can read the query string + Reader* r = _CLNEW StringReader(query); + + //Check to see if r has been created properly + CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r"); + + //Pointer for the return value + Query* ret = NULL; + + try{ + //Parse the query managed by the StringReader R and return a parsed Query instance + //into ret + ret = parse(r); + }_CLFINALLY ( + _CLDELETE(r); + ); + + return ret; + } + + Query* QueryParser::parse(Reader* reader){ + //Func - Returns a parsed Query instance + //Pre - reader contains a valid reference to a Reader and manages the query string + //Post - A parsed Query instance has been returned or + + //instantiate the TokenList tokens + TokenList _tokens; + this->tokens = &_tokens; + + //Instantiate a lexer + Lexer lexer(this, reader); + + //tokens = lexer.Lex(); + //Lex the tokens + lexer.Lex(tokens); + + //Peek to the first token and check if is an EOF + if (tokens->peek()->Type == QueryToken::EOF_){ + // The query string failed to yield any tokens. We discard the + // TokenList tokens and raise an exceptioin. + QueryToken* token = this->tokens->extract(); + _CLDELETE(token); + _CLTHROWA(CL_ERR_Parse, "No query given."); + } + + //Return the parsed Query instance + Query* ret = MatchQuery(field); + this->tokens = NULL; + return ret; + } + + int32_t QueryParser::MatchConjunction(){ + //Func - matches for CONJUNCTION + // CONJUNCTION ::= <AND> | <OR> + //Pre - tokens != NULL + //Post - if the first token is an AND or an OR then + // the token is extracted and deleted and CONJ_AND or CONJ_OR is returned + // otherwise CONJ_NONE is returned + + CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + + switch(tokens->peek()->Type){ + case QueryToken::AND_ : + //Delete the first token of tokenlist + ExtractAndDeleteToken(); + return CONJ_AND; + case QueryToken::OR : + //Delete the first token of tokenlist + ExtractAndDeleteToken(); + return CONJ_OR; + default : + return CONJ_NONE; + } + } + + int32_t QueryParser::MatchModifier(){ + //Func - matches for MODIFIER + // MODIFIER ::= <PLUS> | <MINUS> | <NOT> + //Pre - tokens != NULL + //Post - if the first token is a PLUS the token is extracted and deleted and MOD_REQ is returned + // if the first token is a MINUS or NOT the token is extracted and deleted and MOD_NOT is returned + // otherwise MOD_NONE is returned + CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + + switch(tokens->peek()->Type){ + case QueryToken::PLUS : + //Delete the first token of tokenlist + ExtractAndDeleteToken(); + return MOD_REQ; + case QueryToken::MINUS : + case QueryToken::NOT : + //Delete the first token of tokenlist + ExtractAndDeleteToken(); + return MOD_NOT; + default : + return MOD_NONE; + } + } + + Query* QueryParser::MatchQuery(const TCHAR* field){ + //Func - matches for QUERY + // QUERY ::= [MODIFIER] QueryParser::CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)* + //Pre - field != NULL + //Post - + + CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + + CL_NS_STD(vector)<BooleanClause*> clauses; + + Query* q = NULL; + + int32_t mods = MOD_NONE; + int32_t conj = CONJ_NONE; + + //match for MODIFIER + mods = MatchModifier(); + + //match for CLAUSE + q = MatchClause(field); + AddClause(clauses, CONJ_NONE, mods, q); + + // match for CLAUSE* + while(true){ + QueryToken* p = tokens->peek(); + if(p->Type == QueryToken::EOF_){ + QueryToken* qt = MatchQueryToken(QueryToken::EOF_); + _CLDELETE(qt); + break; + } + + if(p->Type == QueryToken::RPAREN){ + //MatchQueryToken(QueryToken::RPAREN); + break; + } + + //match for a conjuction (AND OR NOT) + conj = MatchConjunction(); + //match for a modifier + mods = MatchModifier(); + + q = MatchClause(field); + if ( q != NULL ) + AddClause(clauses, conj, mods, q); + } + + // finalize query + if(clauses.size() == 1){ //bvk: removed this && firstQuery != NULL + BooleanClause* c = clauses[0]; + Query* q = c->query; + + //Condition check to be sure clauses[0] is valid + CND_CONDITION(c != NULL, "c is NULL"); + + //Tell the boolean clause not to delete its query + c->deleteQuery=false; + //Clear the clauses list + clauses.clear(); + _CLDELETE(c); + + return q; + }else{ + return GetBooleanQuery(clauses); + } + } + + Query* QueryParser::MatchClause(const TCHAR* field){ + //Func - matches for CLAUSE + // CLAUSE ::= [TERM <COLONQueryParser::>] ( TERM | (<LPAREN> QUERY <RPAREN>)) + //Pre - field != NULL + //Post - + + Query* q = NULL; + const TCHAR* sfield = field; + bool delField = false; + + QueryToken *DelToken = NULL; + + //match for [TERM <COLON>] + QueryToken* term = tokens->extract(); + if(term->Type == QueryToken::TERM && tokens->peek()->Type == QueryToken::COLON){ + DelToken = MatchQueryToken(QueryToken::COLON); + + CND_CONDITION(DelToken != NULL,"DelToken is NULL"); + _CLDELETE(DelToken); + + TCHAR* tmp = STRDUP_TtoT(term->Value); + discardEscapeChar(tmp); + delField = true; + sfield = tmp; + _CLDELETE(term); + }else{ + tokens->push(term); + term = NULL; + } + + // match for + // TERM | (<LPAREN> QUERY <RPAREN>) + if(tokens->peek()->Type == QueryToken::LPAREN){ + DelToken = MatchQueryToken(QueryToken::LPAREN); + + CND_CONDITION(DelToken != NULL,"DelToken is NULL"); + _CLDELETE(DelToken); + + q = MatchQuery(sfield); + //DSR:2004.11.01: + //If exception is thrown while trying to match trailing parenthesis, + //need to prevent q from leaking. + + try{ + DelToken = MatchQueryToken(QueryToken::RPAREN); + + CND_CONDITION(DelToken != NULL,"DelToken is NULL"); + _CLDELETE(DelToken); + + }catch(...) { + _CLDELETE(q); + throw; + } + }else{ + q = MatchTerm(sfield); + } + + if ( delField ) + _CLDELETE_CARRAY(sfield); + return q; + } + + + Query* QueryParser::MatchTerm(const TCHAR* field){ + //Func - matches for TERM + // TERM ::= TERM | PREFIXTERM | WILDTERM | NUMBER + // [ <FUZZY> ] [ <CARAT> <NUMBER> [<FUZZY>]] + // | (<RANGEIN> | <RANGEEX>) [<CARAT> <NUMBER>] + // | <QUOTED> [SLOP] [<CARAT> <NUMBER>] + //Pre - field != NULL + //Post - + + QueryToken* term = NULL; + QueryToken* slop = NULL; + QueryToken* boost = NULL; + + bool prefix = false; + bool wildcard = false; + bool fuzzy = false; + bool rangein = false; + Query* q = NULL; + + term = tokens->extract(); + QueryToken* DelToken = NULL; //Token that is about to be deleted + + switch(term->Type){ + case QueryToken::TERM: + case QueryToken::NUMBER: + case QueryToken::PREFIXTERM: + case QueryToken::WILDTERM: + { //start case + //Check if type of QueryToken term is a prefix term + if(term->Type == QueryToken::PREFIXTERM){ + prefix = true; + } + //Check if type of QueryToken term is a wildcard term + if(term->Type == QueryToken::WILDTERM){ + wildcard = true; + } + //Peek to see if the type of the next token is fuzzy term + if(tokens->peek()->Type == QueryToken::FUZZY){ + DelToken = MatchQueryToken(QueryToken::FUZZY); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + fuzzy = true; + } + if(tokens->peek()->Type == QueryToken::CARAT){ + DelToken = MatchQueryToken(QueryToken::CARAT); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + boost = MatchQueryToken(QueryToken::NUMBER); + + if(tokens->peek()->Type == QueryToken::FUZZY){ + DelToken = MatchQueryToken(QueryToken::FUZZY); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + fuzzy = true; + } + } //end if type==CARAT + + discardEscapeChar(term->Value); //clean up + if(wildcard){ + q = GetWildcardQuery(field,term->Value); + break; + }else if(prefix){ + //Create a PrefixQuery + term->Value[_tcslen(term->Value)-1] = 0; //discard the * + q = GetPrefixQuery(field,term->Value); + break; + }else if(fuzzy){ + //Create a FuzzyQuery + + //Check if the last char is a ~ + if(term->Value[_tcslen(term->Value)-1] == '~'){ + //remove the ~ + term->Value[_tcslen(term->Value)-1] = '\0'; + } + + q = GetFuzzyQuery(field,term->Value); + break; + }else{ + q = GetFieldQuery(field, term->Value); + break; + } + } + + + case QueryToken::RANGEIN: + case QueryToken::RANGEEX:{ + if(term->Type == QueryToken::RANGEIN){ + rangein = true; + } + + if(tokens->peek()->Type == QueryToken::CARAT){ + DelToken = MatchQueryToken(QueryToken::CARAT); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + boost = MatchQueryToken(QueryToken::NUMBER); + } + + TCHAR* noBrackets = term->Value + 1; + noBrackets[_tcslen(noBrackets)-1] = 0; + q = ParseRangeQuery(field, noBrackets, rangein); + break; + } + + + case QueryToken::QUOTED:{ + if(tokens->peek()->Type == QueryToken::SLOP){ + slop = MatchQueryToken(QueryToken::SLOP); + } + + if(tokens->peek()->Type == QueryToken::CARAT){ + DelToken = MatchQueryToken(QueryToken::CARAT); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + boost = MatchQueryToken(QueryToken::NUMBER); + } + + //remove the quotes + TCHAR* quotedValue = term->Value+1; + quotedValue[_tcslen(quotedValue)-1] = '\0'; + + int32_t islop = phraseSlop; + if(slop != NULL ){ + try { + TCHAR* end; //todo: should parse using float... + islop = (int32_t)_tcstoi64(slop->Value+1, &end, 10); + }catch(...){ + //ignored + } + } + + q = GetFieldQuery(field, quotedValue, islop); + _CLDELETE(slop); + } + } // end of switch + + _CLDELETE(term); + + + if( q!=NULL && boost != NULL ){ + qreal f = 1.0F; + try { + TCHAR* tmp; + f = _tcstod(boost->Value, &tmp); + }catch(...){ + //ignored + } + _CLDELETE(boost); + + q->setBoost( f); + } + + return q; + } + + QueryToken* QueryParser::MatchQueryToken(QueryToken::Types expectedType){ + //Func - matches for QueryToken of the specified type and returns it + // otherwise Exception throws + //Pre - tokens != NULL + //Post - + + CND_PRECONDITION(tokens != NULL,"tokens is NULL"); + + if(tokens->count() == 0){ + throwParserException(_T("Error: Unexpected end of program"),' ',0,0); + } + + //Extract a token form the TokenList tokens + QueryToken* t = tokens->extract(); + //Check if the type of the token t matches the expectedType + if (expectedType != t->Type){ + TCHAR buf[200]; + _sntprintf(buf,200,_T("Error: Unexpected QueryToken: %d, expected: %d"),t->Type,expectedType); + _CLDELETE(t); + throwParserException(buf,' ',0,0); + } + + //Return the matched token + return t; + } + + void QueryParser::ExtractAndDeleteToken(void){ + //Func - Extracts the first token from the Tokenlist tokenlist + // and destroys it + //Pre - true + //Post - The first token has been extracted and destroyed + + CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + + //Extract the token from the TokenList tokens + QueryToken* t = tokens->extract(); + //Condition Check Token may not be NULL + CND_CONDITION(t != NULL, "Token is NULL"); + //Delete Token + _CLDELETE(t); + } + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/queryParser/QueryParser.h b/3rdparty/clucene/src/CLucene/queryParser/QueryParser.h new file mode 100644 index 000000000..a2fc85c89 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/QueryParser.h @@ -0,0 +1,165 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_queryParser_QueryParser_ +#define _lucene_queryParser_QueryParser_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "CLucene/analysis/AnalysisHeader.h" +#include "CLucene/util/Reader.h" +#include "CLucene/search/SearchHeader.h" +#include "CLucene/index/Term.h" + +#include "TokenList.h" +#include "QueryToken.h" +#include "QueryParserBase.h" +#include "Lexer.h" + +CL_NS_DEF(queryParser) + +/** +* @brief CLucene's default query parser. +* +* <p>It's a query parser. +* The only method that clients should need to call is Parse(). +* The syntax for query const TCHAR*s is as follows: +* A Query is a series of clauses. A clause may be prefixed by:</p> +* <ul> +* <li>a plus (+) or a minus (-) sign, indicating that the +* clause is required or prohibited respectively; or</li> +* <li>a term followed by a colon, indicating the field to be searched. +* This enables one to construct queries which search multiple fields.</li> +* </ul> +* <p> +* A clause may be either:</p> +* <ul> +* <li>a term, indicating all the documents that contain this term; or</li> +* <li>a nested query, enclosed in parentheses. Note that this may be +* used with a +/- prefix to require any of a set of terms.</li> +* </ul> +* <p> +* Thus, in BNF, the query grammar is:</p> +* <code> +* Query ::= ( Clause )* +* Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) +* </code> +* <p> +* Examples of appropriately formatted queries can be found in the test cases. +* </p> +*/ +class QueryParser : public QueryParserBase +{ +private: + const TCHAR* field; + TokenList* tokens; +public: + /** + * Initializes a new instance of the QueryParser class with a specified field and + * analyzer values. + */ + QueryParser(const TCHAR* field, CL_NS(analysis)::Analyzer* analyzer); + ~QueryParser(); + + /** + * Returns a parsed Query instance. + * Note: this call is not threadsafe, either use a seperate QueryParser for each thread, or use a thread lock + * <param name="query">The query value to be parsed.</param> + * <returns>A parsed Query instance.</returns> + */ + virtual CL_NS(search)::Query* parse(const TCHAR* query); + + /** + * Returns a parsed Query instance. + * Note: this call is not threadsafe, either use a seperate QueryParser for each thread, or use a thread lock + * <param name="reader">The TextReader value to be parsed.</param> + * <returns>A parsed Query instance.</returns> + */ + virtual CL_NS(search)::Query* parse(CL_NS(util)::Reader* reader); + + /** + * Returns a new instance of the Query class with a specified query, field and + * analyzer values. + */ + static CL_NS(search)::Query* parse(const TCHAR* query, const TCHAR* field, CL_NS(analysis)::Analyzer* analyzer); + + CL_NS(analysis)::Analyzer* getAnalyzer() { return analyzer; } + + /** + * @return Returns the field. + */ + const TCHAR* getField() { return field; } + + //deprecated functions + _CL_DEPRECATED( setLowercaseExpandedTerms ) void setLowercaseWildcardTerms(bool lowercaseWildcardTerms){ setLowercaseExpandedTerms(lowercaseWildcardTerms); } + _CL_DEPRECATED( getLowercaseExpandedTerms ) bool getLowercaseWildcardTerms() const { return getLowercaseExpandedTerms(); } +protected: + //these functions may be defined under certain compilation conditions. + //note that this functionality is deprecated, you should create your own queryparser + //if you want to remove this functionality...it will be removed... be warned! +#ifdef NO_PREFIX_QUERY + virtual CL_NS(search)::Query* GetPrefixQuery(const TCHAR* field,const TCHAR* termStr){ return NULL; } +#endif +#ifdef NO_FUZZY_QUERY + virtual CL_NS(search)::Query* GetFuzzyQuery(const TCHAR* field,const TCHAR* termStr){ return NULL; } +#endif +#ifdef NO_RANGE_QUERY + virtual CL_NS(search)::Query* GetRangeQuery(const TCHAR* field, const TCHAR* part1, const TCHAR* part2, bool inclusive) { return NULL; } +#endif +#ifdef NO_WILDCARD_QUERY + virtual CL_NS(search)::Query* GetWildcardQuery(const TCHAR* field, TCHAR* termStr) { return NULL; } +#endif +private: + /** + * matches for CONJUNCTION + * CONJUNCTION ::= <AND> | <OR> + */ + int32_t MatchConjunction(); + + /** + * matches for MODIFIER + * MODIFIER ::= <PLUS> | <MINUS> | <NOT> + */ + int32_t MatchModifier(); + + /** + * matches for QUERY + * QUERY ::= [MODIFIER] CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)* + */ + CL_NS(search)::Query* MatchQuery(const TCHAR* field); + + /** + * matches for CLAUSE + * CLAUSE ::= [TERM <COLON>] ( TERM | (<LPAREN> QUERY <RPAREN>)) + */ + CL_NS(search)::Query* MatchClause(const TCHAR* field); + + /** + * matches for TERM + * TERM ::= TERM | PREFIXTERM | WILDTERM | NUMBER + * [ <FUZZY> ] [ <CARAT> <NUMBER> [<FUZZY>]] + * + * | (<RANGEIN> | <RANGEEX>) [<CARAT> <NUMBER>] + * | <QUOTED> [SLOP] [<CARAT> <NUMBER>] + */ + CL_NS(search)::Query* MatchTerm(const TCHAR* field); + + /** + * matches for QueryToken of the specified type and returns it + * otherwise Exception throws + */ + QueryToken* MatchQueryToken(QueryToken::Types expectedType); + + /** + * Extracts the first token from the Tokenlist tokenlist + * and destroys it + */ + void ExtractAndDeleteToken(void); +}; +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/queryParser/QueryParserBase.cpp b/3rdparty/clucene/src/CLucene/queryParser/QueryParserBase.cpp new file mode 100644 index 000000000..7b95b30f9 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/QueryParserBase.cpp @@ -0,0 +1,369 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +* +* Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "QueryParserBase.h" + +#include "CLucene/search/TermQuery.h" +#include "CLucene/search/PhraseQuery.h" +#include "CLucene/search/RangeQuery.h" +#include "CLucene/search/FuzzyQuery.h" +#include "CLucene/search/WildcardQuery.h" +#include "CLucene/search/PrefixQuery.h" + + +CL_NS_USE(search) +CL_NS_USE(util) +CL_NS_USE(analysis) +CL_NS_USE(index) + +CL_NS_DEF(queryParser) + +QueryParserBase::QueryParserBase(Analyzer* analyzer){ +//Func - Constructor +//Pre - true +//Post - instance has been created with PhraseSlop = 0 + this->analyzer = analyzer; + this->defaultOperator = OR_OPERATOR; + this->phraseSlop = 0; + this->lowercaseExpandedTerms = true; +} + +QueryParserBase::~QueryParserBase(){ +//Func - Destructor +//Pre - true +//Post - The instance has been destroyed +} + + +void QueryParserBase::discardEscapeChar(TCHAR* source) const{ + int len = _tcslen(source); + int j = 0; + for (int i = 0; i < len; i++) { + if (source[i] == '\\' && source[i+1] != '\0' ) { + _tcscpy(source+i,source+i+1); + len--; + } + } +} + +void QueryParserBase::AddClause(CL_NS_STD(vector)<BooleanClause*>& clauses, int32_t conj, int32_t mods, Query* q){ +//Func - Adds the next parsed clause. +//Pre - +//Post - + + bool required, prohibited; + + // If this term is introduced by AND, make the preceding term required, + // unless it's already prohibited. + const uint32_t nPreviousClauses = clauses.size(); + if (nPreviousClauses > 0 && conj == CONJ_AND) { + BooleanClause* c = clauses[nPreviousClauses-1]; + if (!c->prohibited) + c->required = true; + } + + if (nPreviousClauses > 0 && defaultOperator == AND_OPERATOR && conj == CONJ_OR) { + // If this term is introduced by OR, make the preceding term optional, + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) + // notice if the input is a OR b, first term is parsed as required; without + // this modification a OR b would parse as +a OR b + BooleanClause* c = clauses[nPreviousClauses-1]; + if (!c->prohibited){ + c->required = false; + c->prohibited = false; + } + } + + // We might have been passed a NULL query; the term might have been + // filtered away by the analyzer. + if (q == NULL) + return; + + if (defaultOperator == OR_OPERATOR) { + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if + // introduced by NOT or -; make sure not to set both. + prohibited = (mods == MOD_NOT); + required = (mods == MOD_REQ); + if (conj == CONJ_AND && !prohibited) { + required = true; + } + } else { + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED + // if not PROHIBITED and not introduced by OR + prohibited = (mods == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); + } + + if ( required && prohibited ) + throwParserException( _T("Clause cannot be both required and prohibited"), ' ',0,0); + clauses.push_back(_CLNEW BooleanClause(q,true, required, prohibited)); +} + +void QueryParserBase::throwParserException(const TCHAR* message, TCHAR ch, int32_t col, int32_t line ) +{ + TCHAR msg[1024]; + _sntprintf(msg,1024,message,ch,col,line); + _CLTHROWT (CL_ERR_Parse, msg ); +} + + +Query* QueryParserBase::GetFieldQuery(const TCHAR* field, TCHAR* queryText, int32_t slop){ + Query* ret = GetFieldQuery(field,queryText); + if ( ret && ret->getQueryName() == PhraseQuery::getClassName() ) + ((PhraseQuery*)ret)->setSlop(slop); + + return ret; +} + +Query* QueryParserBase::GetFieldQuery(const TCHAR* field, TCHAR* queryText){ +//Func - Returns a query for the specified field. +// Use the analyzer to get all the tokens, and then build a TermQuery, +// PhraseQuery, or nothing based on the term count +//Pre - field != NULL +// analyzer contains a valid reference to an Analyzer +// queryText != NULL and contains the query +//Post - A query instance has been returned for the specified field + + CND_PRECONDITION(field != NULL, "field is NULL"); + CND_PRECONDITION(queryText != NULL, "queryText is NULL"); + + //Instantiate a stringReader for queryText + StringReader reader(queryText); + TokenStream* source = analyzer->tokenStream(field, &reader); + CND_CONDITION(source != NULL,"source is NULL"); + + StringArrayConstWithDeletor v; + + Token t; + int positionCount = 0; + bool severalTokensAtSamePosition = false; + + //Get the tokens from the source + try{ + while (source->next(&t)){ + v.push_back(STRDUP_TtoT(t.termText())); + + if (t.getPositionIncrement() != 0) + positionCount += t.getPositionIncrement(); + else + severalTokensAtSamePosition = true; + } + }catch(CLuceneError& err){ + if ( err.number() != CL_ERR_IO ) + throw err; + } + _CLDELETE(source); + + //Check if there are any tokens retrieved + if (v.size() == 0){ + return NULL; + }else{ + if (v.size() == 1){ + Term* t = _CLNEW Term(field, v[0]); + Query* ret = _CLNEW TermQuery( t ); + _CLDECDELETE(t); + return ret; + }else{ + if (severalTokensAtSamePosition) { + if (positionCount == 1) { + // no phrase query: + BooleanQuery* q = _CLNEW BooleanQuery; //todo: disableCoord=true here, but not implemented in BooleanQuery + StringArrayConst::iterator itr = v.begin(); + while ( itr != v.end() ){ + Term* t = _CLNEW Term(field, *itr); + q->add(_CLNEW TermQuery(t),true, false,false);//should occur... + _CLDECDELETE(t); + ++itr; + } + return q; + }else { + _CLTHROWA(CL_ERR_UnsupportedOperation, "MultiPhraseQuery NOT Implemented"); + } + }else{ + PhraseQuery* q = _CLNEW PhraseQuery; + q->setSlop(phraseSlop); + + StringArrayConst::iterator itr = v.begin(); + while ( itr != v.end() ){ + const TCHAR* data = *itr; + Term* t = _CLNEW Term(field, data); + q->add(t); + _CLDECDELETE(t); + ++itr; + } + return q; + } + } + } +} + +void QueryParserBase::setLowercaseExpandedTerms(bool lowercaseExpandedTerms){ + this->lowercaseExpandedTerms = lowercaseExpandedTerms; +} +bool QueryParserBase::getLowercaseExpandedTerms() const { + return lowercaseExpandedTerms; +} +void QueryParserBase::setDefaultOperator(int oper){ + this->defaultOperator=oper; +} +int QueryParserBase::getDefaultOperator() const{ + return defaultOperator; +} + + +Query* QueryParserBase::ParseRangeQuery(const TCHAR* field, TCHAR* queryText, bool inclusive) +{ + //todo: this must be fixed, [-1--5] (-1 to -5) should yield a result, but won't parse properly + //because it uses an analyser, should split it up differently... + + // Use the analyzer to get all the tokens. There should be 1 or 2. + StringReader reader(queryText); + TokenStream* source = analyzer->tokenStream(field, &reader); + + TCHAR* terms[2]; + terms[0]=NULL;terms[1]=NULL; + Token t; + bool tret=true; + bool from=true; + while(tret) + { + try{ + tret = source->next(&t); + }catch (CLuceneError& err){ + if ( err.number() == CL_ERR_IO ) + tret=false; + else + throw err; + } + if (tret) + { + if ( !from && _tcscmp(t.termText(),_T("TO"))==0 ) + continue; + + + TCHAR* tmp = STRDUP_TtoT(t.termText()); + discardEscapeChar(tmp); + terms[from? 0 : 1] = tmp; + + if (from) + from = false; + else + break; + } + } + Query* ret = GetRangeQuery(field, terms[0], terms[1],inclusive); + _CLDELETE_CARRAY(terms[0]); + _CLDELETE_CARRAY(terms[1]); + _CLDELETE(source); + + return ret; +} + +Query* QueryParserBase::GetPrefixQuery(const TCHAR* field, TCHAR* termStr){ +//Pre - field != NULL and field contains the name of the field that the query will use +// termStr != NULL and is the token to use for building term for the query +// (WITH or WITHOUT a trailing '*' character!) +//Post - A PrefixQuery instance has been returned + + CND_PRECONDITION(field != NULL,"field is NULL"); + CND_PRECONDITION(termStr != NULL,"termStr is NULL"); + + if ( lowercaseExpandedTerms ) + _tcslwr(termStr); + + Term* t = _CLNEW Term(field, termStr); + CND_CONDITION(t != NULL,"Could not allocate memory for term t"); + + Query *q = _CLNEW PrefixQuery(t); + CND_CONDITION(q != NULL,"Could not allocate memory for PrefixQuery q"); + + _CLDECDELETE(t); + return q; +} + +Query* QueryParserBase::GetFuzzyQuery(const TCHAR* field, TCHAR* termStr){ +//Func - Factory method for generating a query (similar to getPrefixQuery}). Called when parser parses +// an input term token that has the fuzzy suffix (~) appended. +//Pre - field != NULL and field contains the name of the field that the query will use +// termStr != NULL and is the token to use for building term for the query +// (WITH or WITHOUT a trailing '*' character!) +//Post - A FuzzyQuery instance has been returned + + CND_PRECONDITION(field != NULL,"field is NULL"); + CND_PRECONDITION(termStr != NULL,"termStr is NULL"); + + if ( lowercaseExpandedTerms ) + _tcslwr(termStr); + + Term* t = _CLNEW Term(field, termStr); + CND_CONDITION(t != NULL,"Could not allocate memory for term t"); + + Query *q = _CLNEW FuzzyQuery(t); + CND_CONDITION(q != NULL,"Could not allocate memory for FuzzyQuery q"); + + _CLDECDELETE(t); + return q; +} + + +Query* QueryParserBase::GetWildcardQuery(const TCHAR* field, TCHAR* termStr){ + CND_PRECONDITION(field != NULL,"field is NULL"); + CND_PRECONDITION(termStr != NULL,"termStr is NULL"); + + if ( lowercaseExpandedTerms ) + _tcslwr(termStr); + + Term* t = _CLNEW Term(field, termStr); + CND_CONDITION(t != NULL,"Could not allocate memory for term t"); + Query* q = _CLNEW WildcardQuery(t); + _CLDECDELETE(t); + + return q; +} + +Query* QueryParserBase::GetBooleanQuery(CL_NS_STD(vector)<CL_NS(search)::BooleanClause*>& clauses){ + if ( clauses.size() == 0 ) + return NULL; + + BooleanQuery* query = _CLNEW BooleanQuery(); + //Condition check to see if query has been allocated properly + CND_CONDITION(query != NULL, "No memory could be allocated for query"); + + //iterate through all the clauses + for( uint32_t i=0;i<clauses.size();i++ ){ + //Condition check to see if clauses[i] is valdid + CND_CONDITION(clauses[i] != NULL, "clauses[i] is NULL"); + //Add it to query + query->add(clauses[i]); + } + return query; +} + + +CL_NS(search)::Query* QueryParserBase::GetRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, bool inclusive){ + //todo: does jlucene handle rangequeries differntly? if we are using + //a certain type of analyser, the terms may be filtered out, which + //is not necessarily what we want. + if (lowercaseExpandedTerms) { + _tcslwr(part1); + _tcslwr(part2); + } + //todo: should see if we can parse the strings as dates... currently we leave that up to the end-developer... + Term* t1 = _CLNEW Term(field,part1); + Term* t2 = _CLNEW Term(field,part2); + Query* ret = _CLNEW RangeQuery(t1, t2, inclusive); + _CLDECDELETE(t1); + _CLDECDELETE(t2); + + return ret; +} + + + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/queryParser/QueryParserBase.h b/3rdparty/clucene/src/CLucene/queryParser/QueryParserBase.h new file mode 100644 index 000000000..261e587b0 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/QueryParserBase.h @@ -0,0 +1,204 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_queryParser_QueryParserBase_ +#define _lucene_queryParser_QueryParserBase_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "CLucene/util/VoidList.h" +#include "CLucene/search/BooleanClause.h" +#include "CLucene/analysis/Analyzers.h" +#include "QueryToken.h" + +CL_NS_DEF(queryParser) + + /** + * Contains default implementations used by QueryParser. + * You can override any of these to provide a customised QueryParser. + */ + class QueryParserBase:LUCENE_BASE + { + protected: + /* The actual operator the parser uses to combine query terms */ + int defaultOperator; + int32_t phraseSlop; + + bool lowercaseExpandedTerms; + + LUCENE_STATIC_CONSTANT(int, CONJ_NONE=0); + LUCENE_STATIC_CONSTANT(int, CONJ_AND=1); + LUCENE_STATIC_CONSTANT(int, CONJ_OR=2); + + LUCENE_STATIC_CONSTANT(int, MOD_NONE=0); + LUCENE_STATIC_CONSTANT(int, MOD_NOT=10); + LUCENE_STATIC_CONSTANT(int, MOD_REQ=11); + + CL_NS(analysis)::Analyzer* analyzer; + + public: + QueryParserBase(CL_NS(analysis)::Analyzer* analyzer); + ~QueryParserBase(); + + /** + * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + * lower-cased or not. Default is <code>true</code>. + */ + void setLowercaseExpandedTerms(bool lowercaseExpandedTerms); + + /** + * @see #setLowercaseExpandedTerms(boolean) + */ + bool getLowercaseExpandedTerms() const; + + //values used for setOperator + LUCENE_STATIC_CONSTANT(int, OR_OPERATOR=0); + LUCENE_STATIC_CONSTANT(int, AND_OPERATOR=1); + + /** + * Sets the boolean operator of the QueryParser. + * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers + * are considered optional: for example <code>capital of Hungary</code> is equal to + * <code>capital OR of OR Hungary</code>.<br/> + * In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the + * above mentioned query is parsed as <code>capital AND of AND Hungary</code> + */ + void setDefaultOperator(int oper); + /** + * Gets implicit operator setting, which will be either AND_OPERATOR + * or OR_OPERATOR. + */ + int getDefaultOperator() const; + + //public so that the lexer can call this + virtual void throwParserException(const TCHAR* message, TCHAR ch, int32_t col, int32_t line ); + + /** + * Sets the default slop for phrases. If zero, then exact phrase matches + * are required. Default value is zero. + */ + void setPhraseSlop(int phraseSlop) { this->phraseSlop = phraseSlop; } + + /** + * Gets the default slop for phrases. + */ + int getPhraseSlop() { return phraseSlop; } + + protected: + + /** + * Removes the escaped characters + */ + void discardEscapeChar(TCHAR* token) const; + + //Analyzes the expanded term termStr with the StandardFilter and the LowerCaseFilter. + TCHAR* AnalyzeExpandedTerm(const TCHAR* field, TCHAR* termStr); + + // Adds the next parsed clause. + virtual void AddClause(std::vector<CL_NS(search)::BooleanClause*>& clauses, int32_t conj, int32_t mods, CL_NS(search)::Query* q); + + /** + * Returns a termquery, phrasequery for the specified field. + * Note: this is only a partial implementation, since MultiPhraseQuery is not implemented yet + * return NULL to disallow + */ + virtual CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, TCHAR* queryText); + + /** + * Delegates to GetFieldQuery(string, string), and adds slop onto phrasequery. + * Can be used to remove slop functionality + */ + virtual CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, TCHAR* queryText, int32_t slop); + + /** + * Factory method for generating a query (similar to + * {@link #GetWildcardQuery}). Called when parser parses an input term + * token that uses prefix notation; that is, contains a single '*' wildcard + * character as its last character. Since this is a special case + * of generic wildcard term, and such a query can be optimized easily, + * this usually results in a different query object. + *<p> + * Depending on settings, a prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *<p> + * Can be overridden by extending classes, to provide custom handling for + * wild card queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * (<b>without</b> trailing '*' character!) + * + * @return Resulting {@link Query} built for the term + * return NULL to disallow + */ + virtual CL_NS(search)::Query* GetPrefixQuery(const TCHAR* field, TCHAR* termStr); + + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains one or more wildcard + * characters (? and *), but is not a prefix term token (one + * that has just a single * character at the end) + *<p> + * Depending on settings, prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *<p> + * Can be overridden by extending classes, to provide custom handling for + * wildcard queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains one or more wild card + * characters (? or *), but is not simple prefix term + * + * @return Resulting {@link Query} built for the term + * return NULL to disallow + */ + virtual CL_NS(search)::Query* GetWildcardQuery(const TCHAR* field, TCHAR* termStr); + + /** + * Factory method for generating a query (similar to + * {@link #GetWildcardQuery}). Called when parser parses + * an input term token that has the fuzzy suffix (~) appended. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * + * @return Resulting {@link Query} built for the term + * return NULL to disallow + */ + virtual CL_NS(search)::Query* GetFuzzyQuery(const TCHAR* field, TCHAR* termStr); + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses Vector that contains {@link BooleanClause} instances + * to join. + * + * @return Resulting {@link Query} object. + * return NULL to disallow + * + * Memory: clauses must all be cleaned up by this function. + */ + virtual CL_NS(search)::Query* GetBooleanQuery(std::vector<CL_NS(search)::BooleanClause*>& clauses); + + /** + * return NULL to disallow + */ + virtual CL_NS(search)::Query* GetRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, bool inclusive); + virtual CL_NS(search)::Query* ParseRangeQuery(const TCHAR* field, TCHAR* str, bool inclusive); + + }; +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/queryParser/QueryToken.cpp b/3rdparty/clucene/src/CLucene/queryParser/QueryToken.cpp new file mode 100644 index 000000000..ee88a3cb6 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/QueryToken.cpp @@ -0,0 +1,73 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "QueryToken.h" + +CL_NS_DEF(queryParser) + + +QueryToken::QueryToken(): + Value(NULL) +{ + set(UNKNOWN_); +} +QueryToken::QueryToken(TCHAR* value, const int32_t start, const int32_t end, const QueryToken::Types type): + Value(NULL) +{ + set(value,start,end,type); +} + +QueryToken::~QueryToken(){ +//Func - Destructor +//Pre - true +//Post - Instance has been destroyed + + #ifndef LUCENE_TOKEN_WORD_LENGTH + _CLDELETE_CARRAY( Value ); + #endif +} + +// Initializes a new instance of the Token class LUCENE_EXPORT. +// +QueryToken::QueryToken(TCHAR* value, const QueryToken::Types type): + Value(NULL) +{ + set(value,type); +} + +// Initializes a new instance of the Token class LUCENE_EXPORT. +// +QueryToken::QueryToken(QueryToken::Types type): + Value(NULL) +{ + set(type); +} + + +void QueryToken::set(TCHAR* value, const Types type){ + set(value,0,-1,type); +} +void QueryToken::set(TCHAR* value, const int32_t start, const int32_t end, const Types type){ + #ifndef LUCENE_TOKEN_WORD_LENGTH + _CLDELETE_CARRAY(Value); + Value = STRDUP_TtoT(value); + #else + _tcsncpy(Value,value,LUCENE_TOKEN_WORD_LENGTH); + Value[LUCENE_TOKEN_WORD_LENGTH]; + #endif + this->Start = start; + this->End = end; + this->Type = type; + + if ( this->End < 0 ) + this->End = _tcslen(Value); +} +void QueryToken::set(Types type){ + set(LUCENE_BLANK_STRING,0,0,type); +} + +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/queryParser/QueryToken.h b/3rdparty/clucene/src/CLucene/queryParser/QueryToken.h new file mode 100644 index 000000000..739a667ba --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/QueryToken.h @@ -0,0 +1,76 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_queryParser_QueryToken_ +#define _lucene_queryParser_QueryToken_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "QueryParserBase.h" + +CL_NS_DEF(queryParser) + + // Token class that used by QueryParser. + class QueryToken:LUCENE_BASE + { + public: + enum Types + { + AND_, + OR, + NOT, + PLUS, + MINUS, + LPAREN, + RPAREN, + COLON, + CARAT, + QUOTED, + TERM, + SLOP, + FUZZY, + PREFIXTERM, + WILDTERM, + RANGEIN, + RANGEEX, + NUMBER, + EOF_, + UNKNOWN_ + }; + + + #ifdef LUCENE_TOKEN_WORD_LENGTH + TCHAR Value[LUCENE_TOKEN_WORD_LENGTH+1]; + #else + TCHAR* Value; + #endif + + int32_t Start; + int32_t End; + QueryToken::Types Type; + + // Initializes a new instance of the Token class. + QueryToken(TCHAR* value, const int32_t start, const int32_t end, const Types type); + + // Initializes a new instance of the Token class. + QueryToken(TCHAR* value, const Types type); + + // Initializes a new instance of the Token class. + QueryToken(Types type); + + // Initializes an empty instance of the Token class. + QueryToken(); + + ~QueryToken(); + + void set(TCHAR* value, const int32_t start, const int32_t end, const Types type); + void set(TCHAR* value, const Types type); + void set(Types type); + }; +CL_NS_END +#endif diff --git a/3rdparty/clucene/src/CLucene/queryParser/TokenList.cpp b/3rdparty/clucene/src/CLucene/queryParser/TokenList.cpp new file mode 100644 index 000000000..7d30b931f --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/TokenList.cpp @@ -0,0 +1,79 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "TokenList.h" + +#include "CLucene/util/VoidMap.h" +#include "CLucene/util/VoidList.h" +#include "QueryToken.h" + +CL_NS_DEF(queryParser) + + TokenList::TokenList(){ + //Func - Constructor + //Pre - true + //Post - Instance has been created + } + + TokenList::~TokenList(){ + //Func - Destructor + //Pre - true + //Post - The tokenlist has been destroyed + + tokens.clear(); + } + + void TokenList::add(QueryToken* token){ + //Func - Adds a QueryToken token to the TokenList + //Pre - token != NULL + //Post - token has been added to the token list + + CND_PRECONDITION(token != NULL, "token != NULL"); + + tokens.insert(tokens.begin(),token); + } + + void TokenList::push(QueryToken* token){ + //Func - + //Pre - token != NULL + //Post - + + CND_PRECONDITION(token != NULL, "token is NULL"); + + tokens.push_back(token); + } + + QueryToken* TokenList::peek() { + /* DSR:2004.11.01: Reverted my previous (circa April 2004) fix (which + ** raised an exception if Peek was called when there were no tokens) in + ** favor of returning the EOF token. This solution is much better + ** integrated with the rest of the code in the queryParser subsystem. */ + size_t nTokens = tokens.size(); + if (nTokens == 0) { + push(_CLNEW QueryToken(QueryToken::EOF_)); + nTokens++; + } + return tokens[nTokens-1]; + } + + QueryToken* TokenList::extract(){ + //Func - Extract token from the TokenList + //Pre - true + //Post - Retracted token has been returned + + QueryToken* token = peek(); + //Retract the current peeked token + tokens.delete_back(); + + return token; + } + + int32_t TokenList::count() const + { + return tokens.size(); + } +CL_NS_END diff --git a/3rdparty/clucene/src/CLucene/queryParser/TokenList.h b/3rdparty/clucene/src/CLucene/queryParser/TokenList.h new file mode 100644 index 000000000..3166bba78 --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/TokenList.h @@ -0,0 +1,38 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_queryParser_TokenList_ +#define _lucene_queryParser_TokenList_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include "CLucene/util/VoidList.h" +#include "QueryToken.h" +CL_NS_DEF(queryParser) + + // Represents a list of the tokens. + class TokenList:LUCENE_BASE + { + private: + CL_NS(util)::CLVector<QueryToken*> tokens; //todo:,CL_NS(util)::Deletor::Object<QueryToken> + public: + TokenList(); + ~TokenList(); + + void add(QueryToken* token); + + void push(QueryToken* token); + + QueryToken* peek(); + + QueryToken* extract(); + + int32_t count() const; + }; +CL_NS_END +#endif |