diff options
Diffstat (limited to '3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp')
-rw-r--r-- | 3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp | 509 |
1 files changed, 509 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp b/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp new file mode 100644 index 000000000..b11eec0bb --- /dev/null +++ b/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp @@ -0,0 +1,509 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +* +* Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "QueryParser.h" + +#include "CLucene/analysis/AnalysisHeader.h" +#include "CLucene/util/Reader.h" +#include "CLucene/search/SearchHeader.h" +#include "CLucene/index/Term.h" + +#include "TokenList.h" +#include "QueryToken.h" +#include "QueryParserBase.h" +#include "Lexer.h" + +CL_NS_USE(util) +CL_NS_USE(index) +CL_NS_USE(analysis) +CL_NS_USE(search) + +CL_NS_DEF(queryParser) + + QueryParser::QueryParser(const TCHAR* _field, Analyzer* _analyzer) : QueryParserBase(_analyzer){ + //Func - Constructor. + // Instantiates a QueryParser for the named field _field + //Pre - _field != NULL + //Post - An instance has been created + + if ( _field ) + field = STRDUP_TtoT(_field); + else + field = NULL; + tokens = NULL; + lowercaseExpandedTerms = true; + } + + QueryParser::~QueryParser() { + //Func - Destructor + //Pre - true + //Post - The instance has been destroyed + + _CLDELETE_CARRAY(field); + } + + //static + Query* QueryParser::parse(const TCHAR* query, const TCHAR* field, Analyzer* analyzer){ + //Func - Returns a new instance of the Query class with a specified query, field and + // analyzer values. + //Pre - query != NULL and holds the query to parse + // field != NULL and holds the default field for query terms + // analyzer holds a valid reference to an Analyzer and is used to + // find terms in the query text + //Post - query has been parsed and an instance of Query has been returned + + CND_PRECONDITION(query != NULL, "query is NULL"); + CND_PRECONDITION(field != NULL, "field is NULL"); + + QueryParser parser(field, analyzer); + return parser.parse(query); + } + + Query* QueryParser::parse(const TCHAR* query){ + //Func - Returns a parsed Query instance + //Pre - query != NULL and contains the query value to be parsed + //Post - Returns a parsed Query Instance + + CND_PRECONDITION(query != NULL, "query is NULL"); + + //Instantie a Stringer that can read the query string + Reader* r = _CLNEW StringReader(query); + + //Check to see if r has been created properly + CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r"); + + //Pointer for the return value + Query* ret = NULL; + + try{ + //Parse the query managed by the StringReader R and return a parsed Query instance + //into ret + ret = parse(r); + }_CLFINALLY ( + _CLDELETE(r); + ); + + return ret; + } + + Query* QueryParser::parse(Reader* reader){ + //Func - Returns a parsed Query instance + //Pre - reader contains a valid reference to a Reader and manages the query string + //Post - A parsed Query instance has been returned or + + //instantiate the TokenList tokens + TokenList _tokens; + this->tokens = &_tokens; + + //Instantiate a lexer + Lexer lexer(this, reader); + + //tokens = lexer.Lex(); + //Lex the tokens + lexer.Lex(tokens); + + //Peek to the first token and check if is an EOF + if (tokens->peek()->Type == QueryToken::EOF_){ + // The query string failed to yield any tokens. We discard the + // TokenList tokens and raise an exceptioin. + QueryToken* token = this->tokens->extract(); + _CLDELETE(token); + _CLTHROWA(CL_ERR_Parse, "No query given."); + } + + //Return the parsed Query instance + Query* ret = MatchQuery(field); + this->tokens = NULL; + return ret; + } + + int32_t QueryParser::MatchConjunction(){ + //Func - matches for CONJUNCTION + // CONJUNCTION ::= <AND> | <OR> + //Pre - tokens != NULL + //Post - if the first token is an AND or an OR then + // the token is extracted and deleted and CONJ_AND or CONJ_OR is returned + // otherwise CONJ_NONE is returned + + CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + + switch(tokens->peek()->Type){ + case QueryToken::AND_ : + //Delete the first token of tokenlist + ExtractAndDeleteToken(); + return CONJ_AND; + case QueryToken::OR : + //Delete the first token of tokenlist + ExtractAndDeleteToken(); + return CONJ_OR; + default : + return CONJ_NONE; + } + } + + int32_t QueryParser::MatchModifier(){ + //Func - matches for MODIFIER + // MODIFIER ::= <PLUS> | <MINUS> | <NOT> + //Pre - tokens != NULL + //Post - if the first token is a PLUS the token is extracted and deleted and MOD_REQ is returned + // if the first token is a MINUS or NOT the token is extracted and deleted and MOD_NOT is returned + // otherwise MOD_NONE is returned + CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + + switch(tokens->peek()->Type){ + case QueryToken::PLUS : + //Delete the first token of tokenlist + ExtractAndDeleteToken(); + return MOD_REQ; + case QueryToken::MINUS : + case QueryToken::NOT : + //Delete the first token of tokenlist + ExtractAndDeleteToken(); + return MOD_NOT; + default : + return MOD_NONE; + } + } + + Query* QueryParser::MatchQuery(const TCHAR* field){ + //Func - matches for QUERY + // QUERY ::= [MODIFIER] QueryParser::CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)* + //Pre - field != NULL + //Post - + + CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + + CL_NS_STD(vector)<BooleanClause*> clauses; + + Query* q = NULL; + + int32_t mods = MOD_NONE; + int32_t conj = CONJ_NONE; + + //match for MODIFIER + mods = MatchModifier(); + + //match for CLAUSE + q = MatchClause(field); + AddClause(clauses, CONJ_NONE, mods, q); + + // match for CLAUSE* + while(true){ + QueryToken* p = tokens->peek(); + if(p->Type == QueryToken::EOF_){ + QueryToken* qt = MatchQueryToken(QueryToken::EOF_); + _CLDELETE(qt); + break; + } + + if(p->Type == QueryToken::RPAREN){ + //MatchQueryToken(QueryToken::RPAREN); + break; + } + + //match for a conjuction (AND OR NOT) + conj = MatchConjunction(); + //match for a modifier + mods = MatchModifier(); + + q = MatchClause(field); + if ( q != NULL ) + AddClause(clauses, conj, mods, q); + } + + // finalize query + if(clauses.size() == 1){ //bvk: removed this && firstQuery != NULL + BooleanClause* c = clauses[0]; + Query* q = c->query; + + //Condition check to be sure clauses[0] is valid + CND_CONDITION(c != NULL, "c is NULL"); + + //Tell the boolean clause not to delete its query + c->deleteQuery=false; + //Clear the clauses list + clauses.clear(); + _CLDELETE(c); + + return q; + }else{ + return GetBooleanQuery(clauses); + } + } + + Query* QueryParser::MatchClause(const TCHAR* field){ + //Func - matches for CLAUSE + // CLAUSE ::= [TERM <COLONQueryParser::>] ( TERM | (<LPAREN> QUERY <RPAREN>)) + //Pre - field != NULL + //Post - + + Query* q = NULL; + const TCHAR* sfield = field; + bool delField = false; + + QueryToken *DelToken = NULL; + + //match for [TERM <COLON>] + QueryToken* term = tokens->extract(); + if(term->Type == QueryToken::TERM && tokens->peek()->Type == QueryToken::COLON){ + DelToken = MatchQueryToken(QueryToken::COLON); + + CND_CONDITION(DelToken != NULL,"DelToken is NULL"); + _CLDELETE(DelToken); + + TCHAR* tmp = STRDUP_TtoT(term->Value); + discardEscapeChar(tmp); + delField = true; + sfield = tmp; + _CLDELETE(term); + }else{ + tokens->push(term); + term = NULL; + } + + // match for + // TERM | (<LPAREN> QUERY <RPAREN>) + if(tokens->peek()->Type == QueryToken::LPAREN){ + DelToken = MatchQueryToken(QueryToken::LPAREN); + + CND_CONDITION(DelToken != NULL,"DelToken is NULL"); + _CLDELETE(DelToken); + + q = MatchQuery(sfield); + //DSR:2004.11.01: + //If exception is thrown while trying to match trailing parenthesis, + //need to prevent q from leaking. + + try{ + DelToken = MatchQueryToken(QueryToken::RPAREN); + + CND_CONDITION(DelToken != NULL,"DelToken is NULL"); + _CLDELETE(DelToken); + + }catch(...) { + _CLDELETE(q); + throw; + } + }else{ + q = MatchTerm(sfield); + } + + if ( delField ) + _CLDELETE_CARRAY(sfield); + return q; + } + + + Query* QueryParser::MatchTerm(const TCHAR* field){ + //Func - matches for TERM + // TERM ::= TERM | PREFIXTERM | WILDTERM | NUMBER + // [ <FUZZY> ] [ <CARAT> <NUMBER> [<FUZZY>]] + // | (<RANGEIN> | <RANGEEX>) [<CARAT> <NUMBER>] + // | <QUOTED> [SLOP] [<CARAT> <NUMBER>] + //Pre - field != NULL + //Post - + + QueryToken* term = NULL; + QueryToken* slop = NULL; + QueryToken* boost = NULL; + + bool prefix = false; + bool wildcard = false; + bool fuzzy = false; + bool rangein = false; + Query* q = NULL; + + term = tokens->extract(); + QueryToken* DelToken = NULL; //Token that is about to be deleted + + switch(term->Type){ + case QueryToken::TERM: + case QueryToken::NUMBER: + case QueryToken::PREFIXTERM: + case QueryToken::WILDTERM: + { //start case + //Check if type of QueryToken term is a prefix term + if(term->Type == QueryToken::PREFIXTERM){ + prefix = true; + } + //Check if type of QueryToken term is a wildcard term + if(term->Type == QueryToken::WILDTERM){ + wildcard = true; + } + //Peek to see if the type of the next token is fuzzy term + if(tokens->peek()->Type == QueryToken::FUZZY){ + DelToken = MatchQueryToken(QueryToken::FUZZY); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + fuzzy = true; + } + if(tokens->peek()->Type == QueryToken::CARAT){ + DelToken = MatchQueryToken(QueryToken::CARAT); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + boost = MatchQueryToken(QueryToken::NUMBER); + + if(tokens->peek()->Type == QueryToken::FUZZY){ + DelToken = MatchQueryToken(QueryToken::FUZZY); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + fuzzy = true; + } + } //end if type==CARAT + + discardEscapeChar(term->Value); //clean up + if(wildcard){ + q = GetWildcardQuery(field,term->Value); + break; + }else if(prefix){ + //Create a PrefixQuery + term->Value[_tcslen(term->Value)-1] = 0; //discard the * + q = GetPrefixQuery(field,term->Value); + break; + }else if(fuzzy){ + //Create a FuzzyQuery + + //Check if the last char is a ~ + if(term->Value[_tcslen(term->Value)-1] == '~'){ + //remove the ~ + term->Value[_tcslen(term->Value)-1] = '\0'; + } + + q = GetFuzzyQuery(field,term->Value); + break; + }else{ + q = GetFieldQuery(field, term->Value); + break; + } + } + + + case QueryToken::RANGEIN: + case QueryToken::RANGEEX:{ + if(term->Type == QueryToken::RANGEIN){ + rangein = true; + } + + if(tokens->peek()->Type == QueryToken::CARAT){ + DelToken = MatchQueryToken(QueryToken::CARAT); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + boost = MatchQueryToken(QueryToken::NUMBER); + } + + TCHAR* noBrackets = term->Value + 1; + noBrackets[_tcslen(noBrackets)-1] = 0; + q = ParseRangeQuery(field, noBrackets, rangein); + break; + } + + + case QueryToken::QUOTED:{ + if(tokens->peek()->Type == QueryToken::SLOP){ + slop = MatchQueryToken(QueryToken::SLOP); + } + + if(tokens->peek()->Type == QueryToken::CARAT){ + DelToken = MatchQueryToken(QueryToken::CARAT); + + CND_CONDITION(DelToken !=NULL, "DelToken is NULL"); + _CLDELETE(DelToken); + + boost = MatchQueryToken(QueryToken::NUMBER); + } + + //remove the quotes + TCHAR* quotedValue = term->Value+1; + quotedValue[_tcslen(quotedValue)-1] = '\0'; + + int32_t islop = phraseSlop; + if(slop != NULL ){ + try { + TCHAR* end; //todo: should parse using float... + islop = (int32_t)_tcstoi64(slop->Value+1, &end, 10); + }catch(...){ + //ignored + } + } + + q = GetFieldQuery(field, quotedValue, islop); + _CLDELETE(slop); + } + } // end of switch + + _CLDELETE(term); + + + if( q!=NULL && boost != NULL ){ + qreal f = 1.0F; + try { + TCHAR* tmp; + f = _tcstod(boost->Value, &tmp); + }catch(...){ + //ignored + } + _CLDELETE(boost); + + q->setBoost( f); + } + + return q; + } + + QueryToken* QueryParser::MatchQueryToken(QueryToken::Types expectedType){ + //Func - matches for QueryToken of the specified type and returns it + // otherwise Exception throws + //Pre - tokens != NULL + //Post - + + CND_PRECONDITION(tokens != NULL,"tokens is NULL"); + + if(tokens->count() == 0){ + throwParserException(_T("Error: Unexpected end of program"),' ',0,0); + } + + //Extract a token form the TokenList tokens + QueryToken* t = tokens->extract(); + //Check if the type of the token t matches the expectedType + if (expectedType != t->Type){ + TCHAR buf[200]; + _sntprintf(buf,200,_T("Error: Unexpected QueryToken: %d, expected: %d"),t->Type,expectedType); + _CLDELETE(t); + throwParserException(buf,' ',0,0); + } + + //Return the matched token + return t; + } + + void QueryParser::ExtractAndDeleteToken(void){ + //Func - Extracts the first token from the Tokenlist tokenlist + // and destroys it + //Pre - true + //Post - The first token has been extracted and destroyed + + CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + + //Extract the token from the TokenList tokens + QueryToken* t = tokens->extract(); + //Condition Check Token may not be NULL + CND_CONDITION(t != NULL, "Token is NULL"); + //Delete Token + _CLDELETE(t); + } + +CL_NS_END |