summaryrefslogtreecommitdiffstats
path: root/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp')
-rw-r--r--3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp509
1 files changed, 509 insertions, 0 deletions
diff --git a/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp b/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp
new file mode 100644
index 000000000..b11eec0bb
--- /dev/null
+++ b/3rdparty/clucene/src/CLucene/queryParser/QueryParser.cpp
@@ -0,0 +1,509 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+*
+* Changes are Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "QueryParser.h"
+
+#include "CLucene/analysis/AnalysisHeader.h"
+#include "CLucene/util/Reader.h"
+#include "CLucene/search/SearchHeader.h"
+#include "CLucene/index/Term.h"
+
+#include "TokenList.h"
+#include "QueryToken.h"
+#include "QueryParserBase.h"
+#include "Lexer.h"
+
+CL_NS_USE(util)
+CL_NS_USE(index)
+CL_NS_USE(analysis)
+CL_NS_USE(search)
+
+CL_NS_DEF(queryParser)
+
+ QueryParser::QueryParser(const TCHAR* _field, Analyzer* _analyzer) : QueryParserBase(_analyzer){
+ //Func - Constructor.
+ // Instantiates a QueryParser for the named field _field
+ //Pre - _field != NULL
+ //Post - An instance has been created
+
+ if ( _field )
+ field = STRDUP_TtoT(_field);
+ else
+ field = NULL;
+ tokens = NULL;
+ lowercaseExpandedTerms = true;
+ }
+
+ QueryParser::~QueryParser() {
+ //Func - Destructor
+ //Pre - true
+ //Post - The instance has been destroyed
+
+ _CLDELETE_CARRAY(field);
+ }
+
+ //static
+ Query* QueryParser::parse(const TCHAR* query, const TCHAR* field, Analyzer* analyzer){
+ //Func - Returns a new instance of the Query class with a specified query, field and
+ // analyzer values.
+ //Pre - query != NULL and holds the query to parse
+ // field != NULL and holds the default field for query terms
+ // analyzer holds a valid reference to an Analyzer and is used to
+ // find terms in the query text
+ //Post - query has been parsed and an instance of Query has been returned
+
+ CND_PRECONDITION(query != NULL, "query is NULL");
+ CND_PRECONDITION(field != NULL, "field is NULL");
+
+ QueryParser parser(field, analyzer);
+ return parser.parse(query);
+ }
+
+ Query* QueryParser::parse(const TCHAR* query){
+ //Func - Returns a parsed Query instance
+ //Pre - query != NULL and contains the query value to be parsed
+ //Post - Returns a parsed Query Instance
+
+ CND_PRECONDITION(query != NULL, "query is NULL");
+
+ //Instantie a Stringer that can read the query string
+ Reader* r = _CLNEW StringReader(query);
+
+ //Check to see if r has been created properly
+ CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r");
+
+ //Pointer for the return value
+ Query* ret = NULL;
+
+ try{
+ //Parse the query managed by the StringReader R and return a parsed Query instance
+ //into ret
+ ret = parse(r);
+ }_CLFINALLY (
+ _CLDELETE(r);
+ );
+
+ return ret;
+ }
+
+ Query* QueryParser::parse(Reader* reader){
+ //Func - Returns a parsed Query instance
+ //Pre - reader contains a valid reference to a Reader and manages the query string
+ //Post - A parsed Query instance has been returned or
+
+ //instantiate the TokenList tokens
+ TokenList _tokens;
+ this->tokens = &_tokens;
+
+ //Instantiate a lexer
+ Lexer lexer(this, reader);
+
+ //tokens = lexer.Lex();
+ //Lex the tokens
+ lexer.Lex(tokens);
+
+ //Peek to the first token and check if is an EOF
+ if (tokens->peek()->Type == QueryToken::EOF_){
+ // The query string failed to yield any tokens. We discard the
+ // TokenList tokens and raise an exceptioin.
+ QueryToken* token = this->tokens->extract();
+ _CLDELETE(token);
+ _CLTHROWA(CL_ERR_Parse, "No query given.");
+ }
+
+ //Return the parsed Query instance
+ Query* ret = MatchQuery(field);
+ this->tokens = NULL;
+ return ret;
+ }
+
+ int32_t QueryParser::MatchConjunction(){
+ //Func - matches for CONJUNCTION
+ // CONJUNCTION ::= <AND> | <OR>
+ //Pre - tokens != NULL
+ //Post - if the first token is an AND or an OR then
+ // the token is extracted and deleted and CONJ_AND or CONJ_OR is returned
+ // otherwise CONJ_NONE is returned
+
+ CND_PRECONDITION(tokens != NULL, "tokens is NULL");
+
+ switch(tokens->peek()->Type){
+ case QueryToken::AND_ :
+ //Delete the first token of tokenlist
+ ExtractAndDeleteToken();
+ return CONJ_AND;
+ case QueryToken::OR :
+ //Delete the first token of tokenlist
+ ExtractAndDeleteToken();
+ return CONJ_OR;
+ default :
+ return CONJ_NONE;
+ }
+ }
+
+ int32_t QueryParser::MatchModifier(){
+ //Func - matches for MODIFIER
+ // MODIFIER ::= <PLUS> | <MINUS> | <NOT>
+ //Pre - tokens != NULL
+ //Post - if the first token is a PLUS the token is extracted and deleted and MOD_REQ is returned
+ // if the first token is a MINUS or NOT the token is extracted and deleted and MOD_NOT is returned
+ // otherwise MOD_NONE is returned
+ CND_PRECONDITION(tokens != NULL, "tokens is NULL");
+
+ switch(tokens->peek()->Type){
+ case QueryToken::PLUS :
+ //Delete the first token of tokenlist
+ ExtractAndDeleteToken();
+ return MOD_REQ;
+ case QueryToken::MINUS :
+ case QueryToken::NOT :
+ //Delete the first token of tokenlist
+ ExtractAndDeleteToken();
+ return MOD_NOT;
+ default :
+ return MOD_NONE;
+ }
+ }
+
+ Query* QueryParser::MatchQuery(const TCHAR* field){
+ //Func - matches for QUERY
+ // QUERY ::= [MODIFIER] QueryParser::CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)*
+ //Pre - field != NULL
+ //Post -
+
+ CND_PRECONDITION(tokens != NULL, "tokens is NULL");
+
+ CL_NS_STD(vector)<BooleanClause*> clauses;
+
+ Query* q = NULL;
+
+ int32_t mods = MOD_NONE;
+ int32_t conj = CONJ_NONE;
+
+ //match for MODIFIER
+ mods = MatchModifier();
+
+ //match for CLAUSE
+ q = MatchClause(field);
+ AddClause(clauses, CONJ_NONE, mods, q);
+
+ // match for CLAUSE*
+ while(true){
+ QueryToken* p = tokens->peek();
+ if(p->Type == QueryToken::EOF_){
+ QueryToken* qt = MatchQueryToken(QueryToken::EOF_);
+ _CLDELETE(qt);
+ break;
+ }
+
+ if(p->Type == QueryToken::RPAREN){
+ //MatchQueryToken(QueryToken::RPAREN);
+ break;
+ }
+
+ //match for a conjuction (AND OR NOT)
+ conj = MatchConjunction();
+ //match for a modifier
+ mods = MatchModifier();
+
+ q = MatchClause(field);
+ if ( q != NULL )
+ AddClause(clauses, conj, mods, q);
+ }
+
+ // finalize query
+ if(clauses.size() == 1){ //bvk: removed this && firstQuery != NULL
+ BooleanClause* c = clauses[0];
+ Query* q = c->query;
+
+ //Condition check to be sure clauses[0] is valid
+ CND_CONDITION(c != NULL, "c is NULL");
+
+ //Tell the boolean clause not to delete its query
+ c->deleteQuery=false;
+ //Clear the clauses list
+ clauses.clear();
+ _CLDELETE(c);
+
+ return q;
+ }else{
+ return GetBooleanQuery(clauses);
+ }
+ }
+
+ Query* QueryParser::MatchClause(const TCHAR* field){
+ //Func - matches for CLAUSE
+ // CLAUSE ::= [TERM <COLONQueryParser::>] ( TERM | (<LPAREN> QUERY <RPAREN>))
+ //Pre - field != NULL
+ //Post -
+
+ Query* q = NULL;
+ const TCHAR* sfield = field;
+ bool delField = false;
+
+ QueryToken *DelToken = NULL;
+
+ //match for [TERM <COLON>]
+ QueryToken* term = tokens->extract();
+ if(term->Type == QueryToken::TERM && tokens->peek()->Type == QueryToken::COLON){
+ DelToken = MatchQueryToken(QueryToken::COLON);
+
+ CND_CONDITION(DelToken != NULL,"DelToken is NULL");
+ _CLDELETE(DelToken);
+
+ TCHAR* tmp = STRDUP_TtoT(term->Value);
+ discardEscapeChar(tmp);
+ delField = true;
+ sfield = tmp;
+ _CLDELETE(term);
+ }else{
+ tokens->push(term);
+ term = NULL;
+ }
+
+ // match for
+ // TERM | (<LPAREN> QUERY <RPAREN>)
+ if(tokens->peek()->Type == QueryToken::LPAREN){
+ DelToken = MatchQueryToken(QueryToken::LPAREN);
+
+ CND_CONDITION(DelToken != NULL,"DelToken is NULL");
+ _CLDELETE(DelToken);
+
+ q = MatchQuery(sfield);
+ //DSR:2004.11.01:
+ //If exception is thrown while trying to match trailing parenthesis,
+ //need to prevent q from leaking.
+
+ try{
+ DelToken = MatchQueryToken(QueryToken::RPAREN);
+
+ CND_CONDITION(DelToken != NULL,"DelToken is NULL");
+ _CLDELETE(DelToken);
+
+ }catch(...) {
+ _CLDELETE(q);
+ throw;
+ }
+ }else{
+ q = MatchTerm(sfield);
+ }
+
+ if ( delField )
+ _CLDELETE_CARRAY(sfield);
+ return q;
+ }
+
+
+ Query* QueryParser::MatchTerm(const TCHAR* field){
+ //Func - matches for TERM
+ // TERM ::= TERM | PREFIXTERM | WILDTERM | NUMBER
+ // [ <FUZZY> ] [ <CARAT> <NUMBER> [<FUZZY>]]
+ // | (<RANGEIN> | <RANGEEX>) [<CARAT> <NUMBER>]
+ // | <QUOTED> [SLOP] [<CARAT> <NUMBER>]
+ //Pre - field != NULL
+ //Post -
+
+ QueryToken* term = NULL;
+ QueryToken* slop = NULL;
+ QueryToken* boost = NULL;
+
+ bool prefix = false;
+ bool wildcard = false;
+ bool fuzzy = false;
+ bool rangein = false;
+ Query* q = NULL;
+
+ term = tokens->extract();
+ QueryToken* DelToken = NULL; //Token that is about to be deleted
+
+ switch(term->Type){
+ case QueryToken::TERM:
+ case QueryToken::NUMBER:
+ case QueryToken::PREFIXTERM:
+ case QueryToken::WILDTERM:
+ { //start case
+ //Check if type of QueryToken term is a prefix term
+ if(term->Type == QueryToken::PREFIXTERM){
+ prefix = true;
+ }
+ //Check if type of QueryToken term is a wildcard term
+ if(term->Type == QueryToken::WILDTERM){
+ wildcard = true;
+ }
+ //Peek to see if the type of the next token is fuzzy term
+ if(tokens->peek()->Type == QueryToken::FUZZY){
+ DelToken = MatchQueryToken(QueryToken::FUZZY);
+
+ CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
+ _CLDELETE(DelToken);
+
+ fuzzy = true;
+ }
+ if(tokens->peek()->Type == QueryToken::CARAT){
+ DelToken = MatchQueryToken(QueryToken::CARAT);
+
+ CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
+ _CLDELETE(DelToken);
+
+ boost = MatchQueryToken(QueryToken::NUMBER);
+
+ if(tokens->peek()->Type == QueryToken::FUZZY){
+ DelToken = MatchQueryToken(QueryToken::FUZZY);
+
+ CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
+ _CLDELETE(DelToken);
+
+ fuzzy = true;
+ }
+ } //end if type==CARAT
+
+ discardEscapeChar(term->Value); //clean up
+ if(wildcard){
+ q = GetWildcardQuery(field,term->Value);
+ break;
+ }else if(prefix){
+ //Create a PrefixQuery
+ term->Value[_tcslen(term->Value)-1] = 0; //discard the *
+ q = GetPrefixQuery(field,term->Value);
+ break;
+ }else if(fuzzy){
+ //Create a FuzzyQuery
+
+ //Check if the last char is a ~
+ if(term->Value[_tcslen(term->Value)-1] == '~'){
+ //remove the ~
+ term->Value[_tcslen(term->Value)-1] = '\0';
+ }
+
+ q = GetFuzzyQuery(field,term->Value);
+ break;
+ }else{
+ q = GetFieldQuery(field, term->Value);
+ break;
+ }
+ }
+
+
+ case QueryToken::RANGEIN:
+ case QueryToken::RANGEEX:{
+ if(term->Type == QueryToken::RANGEIN){
+ rangein = true;
+ }
+
+ if(tokens->peek()->Type == QueryToken::CARAT){
+ DelToken = MatchQueryToken(QueryToken::CARAT);
+
+ CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
+ _CLDELETE(DelToken);
+
+ boost = MatchQueryToken(QueryToken::NUMBER);
+ }
+
+ TCHAR* noBrackets = term->Value + 1;
+ noBrackets[_tcslen(noBrackets)-1] = 0;
+ q = ParseRangeQuery(field, noBrackets, rangein);
+ break;
+ }
+
+
+ case QueryToken::QUOTED:{
+ if(tokens->peek()->Type == QueryToken::SLOP){
+ slop = MatchQueryToken(QueryToken::SLOP);
+ }
+
+ if(tokens->peek()->Type == QueryToken::CARAT){
+ DelToken = MatchQueryToken(QueryToken::CARAT);
+
+ CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
+ _CLDELETE(DelToken);
+
+ boost = MatchQueryToken(QueryToken::NUMBER);
+ }
+
+ //remove the quotes
+ TCHAR* quotedValue = term->Value+1;
+ quotedValue[_tcslen(quotedValue)-1] = '\0';
+
+ int32_t islop = phraseSlop;
+ if(slop != NULL ){
+ try {
+ TCHAR* end; //todo: should parse using float...
+ islop = (int32_t)_tcstoi64(slop->Value+1, &end, 10);
+ }catch(...){
+ //ignored
+ }
+ }
+
+ q = GetFieldQuery(field, quotedValue, islop);
+ _CLDELETE(slop);
+ }
+ } // end of switch
+
+ _CLDELETE(term);
+
+
+ if( q!=NULL && boost != NULL ){
+ qreal f = 1.0F;
+ try {
+ TCHAR* tmp;
+ f = _tcstod(boost->Value, &tmp);
+ }catch(...){
+ //ignored
+ }
+ _CLDELETE(boost);
+
+ q->setBoost( f);
+ }
+
+ return q;
+ }
+
+ QueryToken* QueryParser::MatchQueryToken(QueryToken::Types expectedType){
+ //Func - matches for QueryToken of the specified type and returns it
+ // otherwise Exception throws
+ //Pre - tokens != NULL
+ //Post -
+
+ CND_PRECONDITION(tokens != NULL,"tokens is NULL");
+
+ if(tokens->count() == 0){
+ throwParserException(_T("Error: Unexpected end of program"),' ',0,0);
+ }
+
+ //Extract a token form the TokenList tokens
+ QueryToken* t = tokens->extract();
+ //Check if the type of the token t matches the expectedType
+ if (expectedType != t->Type){
+ TCHAR buf[200];
+ _sntprintf(buf,200,_T("Error: Unexpected QueryToken: %d, expected: %d"),t->Type,expectedType);
+ _CLDELETE(t);
+ throwParserException(buf,' ',0,0);
+ }
+
+ //Return the matched token
+ return t;
+ }
+
+ void QueryParser::ExtractAndDeleteToken(void){
+ //Func - Extracts the first token from the Tokenlist tokenlist
+ // and destroys it
+ //Pre - true
+ //Post - The first token has been extracted and destroyed
+
+ CND_PRECONDITION(tokens != NULL, "tokens is NULL");
+
+ //Extract the token from the TokenList tokens
+ QueryToken* t = tokens->extract();
+ //Condition Check Token may not be NULL
+ CND_CONDITION(t != NULL, "Token is NULL");
+ //Delete Token
+ _CLDELETE(t);
+ }
+
+CL_NS_END