summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoberto Raggi <roberto.raggi@nokia.com>2012-01-30 08:45:00 +0100
committerRoberto Raggi <roberto.raggi@nokia.com>2012-02-28 13:02:54 +0100
commit02403f0530868b4e9aa43a22eb25ceafe4fb4eee (patch)
tree8c467640eb1f7f7361f32fc0229427d49f10a3f2
parent6e3f53b103ceb11d67a58f9fd3397378d5ea491f (diff)
Basic support for generalized LR parsing.qlalr2
Generate GLR friendly parser tables when the grammar use the `%glr-parser' declaration. Change-Id: I9dae3764565dcbbd744768679123523f61d49f9b Reviewed-by: Erik Verbruggen <erik.verbruggen@nokia.com>
-rw-r--r--src/cppgenerator.cpp139
-rw-r--r--src/cppgenerator.h3
-rw-r--r--src/qlalr.qlalr9
-rw-r--r--src/qlalr_parser.cpp158
4 files changed, 231 insertions, 78 deletions
diff --git a/src/cppgenerator.cpp b/src/cppgenerator.cpp
index cb47d4c..82de05b 100644
--- a/src/cppgenerator.cpp
+++ b/src/cppgenerator.cpp
@@ -192,6 +192,10 @@ void CppGenerator::operator () ()
++reduce_reduce_conflict_count;
+ const int la = grammar->indexOfToken(s);
+ conflicts[q].insert(la, u);
+ conflicts[q].insert(la, -r);
+
u = qMax (u, -r);
if (verbose)
@@ -225,6 +229,10 @@ void CppGenerator::operator () ()
else
{
+ const int la = grammar->indexOfToken(s);
+ conflicts[q].insert(la, u);
+ conflicts[q].insert(la, -r);
+
++shift_reduce_conflict_count;
if (verbose)
@@ -509,8 +517,16 @@ void CppGenerator::generateDecl (QTextStream &out)
<< " static const short action_default [];" << endl
<< " static const short action_index [];" << endl
<< " static const short action_info [];" << endl
- << " static const short action_check [];" << endl
- << endl
+ << " static const short action_check [];" << endl;
+
+ if (flags->glr_parser) {
+ out << " static const short conflict_index [];" << endl
+ << " static const short conflict_info [];" << endl
+ << " static const short conflict_check [];" << endl
+ << " static const short conflicts [];" << endl;
+ }
+
+ out << endl
<< " static inline int nt_action (int state, int nt)" << endl
<< " {" << endl
<< " const int yyn = action_index [GOTO_INDEX_OFFSET + state] + nt;" << endl
@@ -528,12 +544,34 @@ void CppGenerator::generateDecl (QTextStream &out)
<< " return - action_default [state];" << endl
<< endl
<< " return action_info [yyn];" << endl
- << " }" << endl
- << "};" << endl
+ << " }" << endl;
+
+ if (flags->glr_parser) {
+ out << endl
+ << " static inline const short *alternatives (int state, int token)" << endl
+ << " {" << endl
+ << " const int yyn = conflict_index [state] + token;" << endl
+ << endl
+ << " if (yyn < 0 || conflict_check [yyn] != token)" << endl
+ << " return 0;" << endl
+ << endl
+ << " return &conflicts [conflict_info [yyn]];" << endl
+ << " }" << endl;
+ }
+
+ out << "};" << endl
<< endl
<< endl;
}
+static QList<int> stripped(const QList<int> &v)
+{
+ QMap<int, bool> m;
+ foreach (int x, v)
+ m.insert(x, true);
+ return m.keys();
+}
+
void CppGenerator::generateImpl (QTextStream &out)
{
int idx = 0;
@@ -731,4 +769,97 @@ void CppGenerator::generateImpl (QTextStream &out)
out << ' ' << compressed_goto.check [i];
}
out << "};" << endl << endl;
+
+ if (flags->glr_parser) {
+ QList<QList<int> > processed;
+ QList<int> offsets;
+ QList<int> flat;
+ flat.push_back(0);
+
+ QMapIterator<int, QMultiMap<int, int> > it(conflicts);
+ while (it.hasNext()) {
+ it.next();
+ const QMultiMap<int, int> &m = it.value();
+ int last_la = -1;
+ foreach (int la, m.keys()) {
+ if (last_la == la)
+ continue;
+ last_la = la;
+ const QList<int> v = stripped(m.values(la));
+ int offset = processed.indexOf(v);
+ if (offset == -1) {
+ offset = flat.size();
+ flat += v;
+ flat.append(0);
+ processed.append(v);
+ offsets.append(offset);
+ }
+ }
+ }
+ QVector<int> data(state_count * terminal_count);
+ it.toFront();
+ while (it.hasNext()) {
+ it.next();
+ const int q = it.key();
+ const QMultiMap<int, int> m = it.value();
+ int last_la = -1;
+ foreach (int la, m.keys()) {
+ if (la == last_la)
+ continue;
+ last_la = la;
+ const QList<int> v = stripped(m.values(la));
+ const int idx = processed.indexOf(v);
+ Q_ASSERT(idx != -1);
+ const int offset = offsets.at(idx);
+ data[q * terminal_count + la] = offset;
+ }
+ }
+
+ Compress k;
+ k(data.data(), state_count, terminal_count);
+
+ out << "const short " << flags->table_name << "::conflict_index [] = {";
+ for (int i = 0; i < k.index.size(); ++i) {
+ if (i)
+ out << ',';
+ if (! (i % 10))
+ out << endl << ' ';
+ out << ' ' << k.index.at(i);
+ }
+ out << "};" << endl
+ << endl;
+
+ out << "const short " << flags->table_name << "::conflict_info [] = {";
+ for (int i = 0; i < k.info.size(); ++i) {
+ if (i)
+ out << ',';
+ if (! (i % 10))
+ out << endl << ' ';
+ out << ' ' << k.info.at(i);
+ }
+ out << "};" << endl
+ << endl;
+
+ out << "const short " << flags->table_name << "::conflict_check [] = {";
+ for (int i = 0; i < k.check.size(); ++i) {
+ if (i)
+ out << ',';
+ if (! (i % 10))
+ out << endl << ' ';
+ out << ' ' << k.check.at(i);
+ }
+ out << "};" << endl
+ << endl;
+
+ out << "const short " << flags->table_name << "::conflicts [] = {";
+ for (int i = 0; i < flat.size(); ++i) {
+ if (i)
+ out << ',';
+ if (! (i % 10))
+ out << endl << ' ';
+ out << ' ' << flat.at(i);
+ }
+ out << "};" << endl
+ << endl;
+ }
}
diff --git a/src/cppgenerator.h b/src/cppgenerator.h
index e70d5a8..0fbe93d 100644
--- a/src/cppgenerator.h
+++ b/src/cppgenerator.h
@@ -64,6 +64,7 @@ struct Options {
bool no_lines: 1;
bool no_debug: 1;
bool qt: 1;
+ bool glr_parser: 1;
Options()
: expected_shift_reduce(0)
@@ -72,6 +73,7 @@ struct Options {
, no_lines(false)
, no_debug(false)
, qt(false)
+ , glr_parser(false)
{
table_name = "parser_table";
}
@@ -124,6 +126,7 @@ private:
Compress compressed_goto;
QVector<int> count;
QVector<int> defgoto;
+ QMap<int, QMultiMap<int, int> > conflicts;
};
#endif // CPPGENERATOR_H
diff --git a/src/qlalr.qlalr b/src/qlalr.qlalr
index 2d564ab..5fea41d 100644
--- a/src/qlalr.qlalr
+++ b/src/qlalr.qlalr
@@ -56,6 +56,7 @@
%token IMPL_FILE
%token EXPECT
%token EXPECT_RR
+%token GLR_PARSER
%token IDENTIFIER
%token STRING_LITERAL
@@ -334,6 +335,13 @@ case $rule_number: {
} break;
./
+Directive ::= GLR_PARSER;
+/.
+case $rule_number: {
+ flags->glr_parser = true;
+} break;
+./
+
TokenKind ::= TOKEN ;
/.
case $rule_number: {
@@ -492,6 +500,7 @@ again:
else if (yytext == QLatin1String("right")) return T_RIGHT;
else if (yytext == QLatin1String("nonassoc")) return T_NONASSOC;
else if (yytext == QLatin1String("prec")) return T_PREC;
+ else if (yytext == QLatin1String("glr-parser")) return T_GLR_PARSER;
fprintf(stderr, "unrecognized directive %s\n", qPrintable(yytext));
return T_ERROR;
} else if ((ch == QLatin1Char('-') && yychar == QLatin1Char('-')) ||
diff --git a/src/qlalr_parser.cpp b/src/qlalr_parser.cpp
index 2820b9a..9fc9646 100644
--- a/src/qlalr_parser.cpp
+++ b/src/qlalr_parser.cpp
@@ -76,23 +76,24 @@ public:
T_IMPL_FILE = 11,
T_EXPECT = 12,
T_EXPECT_RR = 13,
- T_IDENTIFIER = 14,
- T_STRING_LITERAL = 15,
- T_SEMICOLON = 16,
- T_COLON = 17,
- T_COLON_COLON_EQUAL = 18,
- T_BAR = 19,
- T_ERROR = 20,
-
- ACCEPT_STATE = 26,
- RULE_COUNT = 41,
- STATE_COUNT = 58,
- TERMINAL_COUNT = 21,
+ T_GLR_PARSER = 14,
+ T_IDENTIFIER = 15,
+ T_STRING_LITERAL = 16,
+ T_SEMICOLON = 17,
+ T_COLON = 18,
+ T_COLON_COLON_EQUAL = 19,
+ T_BAR = 20,
+ T_ERROR = 21,
+
+ ACCEPT_STATE = 27,
+ RULE_COUNT = 42,
+ STATE_COUNT = 59,
+ TERMINAL_COUNT = 22,
NON_TERMINAL_COUNT = 21,
- GOTO_INDEX_OFFSET = 58,
- GOTO_INFO_OFFSET = 60,
- GOTO_CHECK_OFFSET = 60
+ GOTO_INDEX_OFFSET = 59,
+ GOTO_INFO_OFFSET = 83,
+ GOTO_CHECK_OFFSET = 83
};
static const char *const spell [];
@@ -128,73 +129,77 @@ public:
const char *const parser_table::spell [] = {
"end of file", 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0};
+ 0, 0};
const short parser_table::lhs [] = {
- 21, 22, 23, 24, 24, 25, 25, 26, 26, 27,
- 27, 27, 27, 27, 27, 27, 28, 28, 28, 28,
- 29, 30, 30, 31, 31, 32, 33, 33, 34, 35,
- 36, 36, 37, 38, 38, 38, 38, 39, 40, 40,
- 41};
+ 22, 23, 24, 25, 25, 26, 26, 27, 27, 28,
+ 28, 28, 28, 28, 28, 28, 28, 29, 29, 29,
+ 29, 30, 31, 31, 32, 32, 33, 34, 34, 35,
+ 36, 37, 37, 38, 39, 39, 39, 39, 40, 41,
+ 41, 42};
const short parser_table::rhs [] = {
2, 1, 3, 1, 2, 1, 2, 1, 1, 2,
2, 2, 2, 2, 2, 2, 1, 1, 1, 1,
- 2, 1, 2, 1, 2, 2, 1, 3, 1, 4,
- 1, 1, 1, 0, 1, 2, 1, 2, 1, 2,
- 1};
+ 1, 2, 1, 2, 1, 2, 2, 1, 3, 1,
+ 4, 1, 1, 1, 0, 1, 2, 1, 2, 1,
+ 2, 1};
const short parser_table::action_default [] = {
- 0, 20, 0, 4, 18, 0, 9, 2, 0, 0,
- 8, 0, 0, 0, 0, 17, 0, 0, 19, 12,
- 10, 22, 21, 24, 13, 14, 1, 11, 16, 15,
- 0, 5, 0, 23, 25, 0, 3, 6, 33, 26,
- 32, 34, 31, 7, 37, 0, 0, 39, 27, 35,
- 41, 40, 36, 38, 30, 34, 29, 28};
+ 0, 21, 0, 4, 19, 0, 9, 2, 0, 0,
+ 8, 0, 0, 0, 0, 18, 0, 0, 20, 17,
+ 12, 10, 23, 22, 25, 13, 14, 1, 11, 16,
+ 15, 5, 0, 0, 24, 26, 0, 3, 6, 34,
+ 27, 33, 35, 32, 7, 38, 0, 0, 40, 28,
+ 36, 42, 41, 37, 39, 31, 35, 30, 29};
const short parser_table::goto_default [] = {
- 0, 12, 7, 17, 36, 3, 10, 8, 6, 22,
- 21, 30, 46, 55, 37, 41, 35, 48, 49, 44,
- 47};
+ 0, 12, 7, 17, 37, 3, 10, 8, 6, 23,
+ 22, 32, 47, 56, 38, 42, 36, 49, 50, 45,
+ 48};
const short parser_table::action_index [] = {
- 39, -21, -12, -21, -21, -11, -21, -21, -13, -14,
- -21, -2, 21, -1, 0, -21, -3, 28, -21, -21,
- -21, -21, -5, -10, -21, -21, -21, -21, -21, -21,
- -4, -21, -6, -21, -21, 5, -8, -21, -21, -21,
- -21, 13, -21, -21, 16, -7, 1, -21, -21, -21,
- -21, -21, -21, -21, -21, 2, -21, -21,
+ 40, -22, -13, -22, -22, -12, -22, -22, -14, -15,
+ -22, -11, 21, -2, -9, -22, -4, 61, -22, -22,
+ -22, -22, -22, -3, -7, -22, -22, -22, -22, -22,
+ -22, -22, -5, -1, -22, -22, 4, -10, -22, -22,
+ -22, -22, 14, -22, -22, 16, -8, 0, -22, -22,
+ -22, -22, -22, -22, -22, -22, 13, -22, -22,
-21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
- -21, -21, -21, -21, -21, -21, -21, -4, -21, -21,
- -21, -21, 1, -21, -21, -21, -21, -21, -21, -21,
- -21, -21, -21, -21, -21, -21, -10, -21, -21, -21,
- -21, -21, -21, -21, -15, -21, -21, -21, -21, -21,
- -21, -21, -21, -21, -21, -17, -21, -21};
+ -21, -21, -21, -21, -21, -21, -21, -5, -21, -21,
+ -21, -21, -21, -7, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -12, -21, -21,
+ -21, -21, -21, -21, -21, -14, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -16, -21, -21};
const short parser_table::action_info [] = {
- 24, 23, 19, 20, 45, 34, 38, 53, 39, 23,
- 38, 29, 25, 27, 28, 45, 50, 54, 45, 0,
- 56, 26, 42, 40, 0, 0, 0, 50, 0, 32,
- 50, 15, 4, 18, 1, 5, 13, 2, 9, 11,
- 16, 14, 15, 4, 18, 1, 5, 13, 2, 9,
- 11, 16, 14, 0, 0, 0, 0, 0, 0, 0,
-
- 57, 31, 0, 52, 43, 51, 0, 0, 0, 0,
- 0, 33, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0};
+ 25, 24, 20, 21, 26, 39, 29, 54, 0, 35,
+ 39, 30, 24, 28, 40, 46, 46, 55, 46, 0,
+ 57, 27, 43, 41, 0, 0, 0, 0, 51, 51,
+ 0, 51, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 15, 4, 18, 1, 5, 13, 2,
+ 9, 11, 16, 14, 19, 0, 0, 0, 0, 0,
+ 0, 0, 33, 0, 15, 4, 18, 1, 5, 13,
+ 2, 9, 11, 16, 14, 19, 0, 0, 0, 0,
+ 0, 0, 0,
+
+ 31, 58, 44, 34, 53, 0, 52, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0};
const short parser_table::action_check [] = {
- 14, 14, 14, 14, 2, 15, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 2, 14, 16, 2, -1,
- 19, 0, 17, 18, -1, -1, -1, 14, -1, 1,
- 14, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, -1, -1, -1, -1, -1, -1, -1,
+ 15, 15, 15, 15, 15, 15, 15, 15, -1, 16,
+ 15, 15, 15, 15, 15, 2, 2, 17, 2, -1,
+ 20, 0, 18, 19, -1, -1, -1, -1, 15, 15,
+ -1, 15, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, -1, -1, -1, -1, -1,
+ -1, -1, 1, -1, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, -1, -1, -1, -1,
+ -1, -1, -1,
- 17, 5, -1, 18, 14, 20, -1, -1, -1, -1,
- -1, 10, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1};
+ 5, 17, 14, 10, 18, -1, 20, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1};
#include "qlalr.h"
@@ -425,55 +430,59 @@ case 15: {
} break;
case 16: {
+ flags->glr_parser = true;
+} break;
+
+case 17: {
terminalKind = T_TOKEN;
assoc = Token::Nonassoc;
} break;
-case 17: {
+case 18: {
terminalKind = T_LEFT;
assoc = Token::Left;
++prec;
} break;
-case 18: {
+case 19: {
terminalKind = T_RIGHT;
assoc = Token::Right;
++prec;
} break;
-case 19: {
+case 20: {
terminalKind = T_NONASSOC;
assoc = Token::Nonassoc;
++prec;
} break;
-case 23: {
+case 24: {
G.enterToken(string(1), QString(), assoc, terminalKind == T_TOKEN ? -1 : prec);
} break;
-case 24: {
+case 25: {
G.enterToken(string(1), string(2), assoc, terminalKind == T_TOKEN ? -1 : prec);
} break;
-case 25: {
+case 26: {
G.setStartSymbol(string(2));
} break;
-case 28: {
+case 29: {
G.finishRule();
QString lhs = G.currentRule()->lhs;
G.startRule(lhs);
} break;
-case 29: {
+case 30: {
G.finishRule();
} break;
-case 32: {
+case 33: {
G.startRule(string(1));
} break;
-case 37: {
+case 38: {
const int precTokenIndex = G.indexOfToken(string(2));
if (precTokenIndex == -1)
fprintf(stderr, "warning: undefined token %s\n", qPrintable(string(2)));
@@ -481,7 +490,7 @@ case 37: {
G.currentRule()->precToken = precTokenIndex;
} break;
-case 40: {
+case 41: {
G.enterSymbol(string(1));
} break;
@@ -523,6 +532,7 @@ again:
else if (yytext == QLatin1String("right")) return T_RIGHT;
else if (yytext == QLatin1String("nonassoc")) return T_NONASSOC;
else if (yytext == QLatin1String("prec")) return T_PREC;
+ else if (yytext == QLatin1String("glr-parser")) return T_GLR_PARSER;
fprintf(stderr, "unrecognized directive %s\n", qPrintable(yytext));
return T_ERROR;
} else if ((ch == QLatin1Char('-') && yychar == QLatin1Char('-')) ||