-- Copyright (C) 2016 The Qt Company Ltd. -- SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 %parser grammar %decl recognizer.h %impl recognizer.cpp %token ID "identifier" %token STRING_LITERAL "string literal" %token DECL_FILE "%decl" %token EXPECT "%expect" %token EXPECT_RR "%expect-lr" %token IMPL_FILE "%impl" %token LEFT "%left" %token MERGED_OUTPUT "%merged_output" %token NONASSOC "%nonassoc" %token PARSER "%parser" %token PREC "%prec" %token RIGHT "%right" %token START "%start" %token TOKEN "%token" %token TOKEN_PREFIX "%token_prefix" %token COLON ":" %token OR "|" %token SEMICOLON ";" %token DECL %token IMPL %token ERROR %start Specification /:// Copyright (C) 2016 The Qt Company Ltd. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #include "$header" #include "lalr.h" #include #include #include #include #include class Recognizer: protected $table { public: Recognizer (Grammar *grammar, bool no_lines); ~Recognizer(); bool parse (const QString &input_file = QString ()); inline QString decls () const { return _M_decls; } inline QString impls () const { return _M_impls; } protected: inline void reallocateStack (); inline QString &sym (int index) { return sym_stack [tos + index - 1]; } protected: // scanner int nextToken(); inline void inp () { if (_M_currentChar != _M_lastChar) { ch = *_M_currentChar++; if (ch == u'\n') ++_M_line; } else ch = QChar(); } QString expand (const QString &text) const; protected: // recognizer int tos; int stack_size; QList sym_stack; int *state_stack; QString _M_contents; QString::const_iterator _M_firstChar; QString::const_iterator _M_lastChar; QString::const_iterator _M_currentChar; // scanner QChar ch; int _M_line; int _M_action_line; Grammar *_M_grammar; RulePointer _M_current_rule; QString _M_input_file; QString _M_decls; QString _M_impls; QString _M_current_value; bool _M_no_lines; }; :/ /.// Copyright (C) 2016 The Qt Company Ltd. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #include "recognizer.h" #include #include #include #include using namespace Qt::StringLiterals; Recognizer::Recognizer (Grammar *grammar, bool no_lines): tos(0), stack_size(0), state_stack(0), _M_line(1), _M_action_line(0), _M_grammar(grammar), _M_no_lines(no_lines) { } Recognizer::~Recognizer() { if (stack_size) ::free(state_stack); } inline void Recognizer::reallocateStack() { if (! stack_size) stack_size = 128; else stack_size <<= 1; sym_stack.resize (stack_size); if (! state_stack) state_stack = reinterpret_cast (::malloc(stack_size * sizeof(int))); else state_stack = reinterpret_cast (::realloc(state_stack, stack_size * sizeof(int))); } int Recognizer::nextToken() { QString text; Lagain: while (ch.isSpace ()) inp (); if (ch.isNull ()) return EOF_SYMBOL; int token = ch.unicode (); if (token == '"') { inp(); // skip " text.clear (); while (! ch.isNull () && ch != u'"') { if (ch == u'\\') { text += ch; inp(); } text += ch; inp (); } if (ch == u'"') inp (); else qerr() << _M_input_file << ":" << _M_line << ": Warning. Expected `\"'" << Qt::endl; _M_current_value = text; return (token = STRING_LITERAL); } else if (ch.isLetterOrNumber () || ch == u'_') { text.clear (); do { text += ch; inp (); } while (ch.isLetterOrNumber () || ch == u'_' || ch == u'.'); _M_current_value = text; return (token = ID); } else if (token == '%') { text.clear (); do { inp (); } while (ch.isSpace ()); do { text += ch; inp (); } while (ch.isLetterOrNumber () || ch == u'_' || ch == u'-'); if (text == "token_prefix"_L1) return (token = TOKEN_PREFIX); else if (text == "merged_output"_L1) return (token = MERGED_OUTPUT); else if (text == "token"_L1) return (token = TOKEN); else if (text == "start"_L1) return (token = START); else if (text == "parser"_L1) return (token = PARSER); else if (text == "decl"_L1) return (token = DECL_FILE); else if (text == "impl"_L1) return (token = IMPL_FILE); else if (text == "expect"_L1) return (token = EXPECT); else if (text == "expect-rr"_L1) return (token = EXPECT_RR); else if (text == "left"_L1) return (token = LEFT); else if (text == "right"_L1) return (token = RIGHT); else if (text == "nonassoc"_L1) return (token = NONASSOC); else if (text == "prec"_L1) return (token = PREC); else { qerr() << _M_input_file << ":" << _M_line << ": Unknown keyword `" << text << "'" << Qt::endl; exit (EXIT_FAILURE); return (token = ERROR); } } inp (); if (token == '-' && ch == u'-') { do { inp (); } while (! ch.isNull () && ch != u'\n'); goto Lagain; } else if (token == ':' && ch == u':') { inp (); if (ch != u'=') return (token = ERROR); inp (); return (token = COLON); } else if (token == '/' && ch == u':') { _M_action_line = _M_line; text.clear (); if (! _M_no_lines) text += "\n#line "_L1 + QString::number(_M_action_line) + " \""_L1 + QDir::fromNativeSeparators(_M_input_file) + "\"\n"_L1; inp (); // skip ':' forever { while (! ch.isNull ()) { token = ch.unicode (); inp (); if (token == ':' && ch == u'/') break; text += QLatin1Char (token); } if (ch != u'/') return (token = ERROR); inp (); if (ch.isNull () || ch.isSpace ()) { _M_current_value = text; return (token = DECL); } else text += ":/"_L1; } } else if (token == '/' && ch == u'.') { _M_action_line = _M_line; text.clear (); if (! _M_no_lines) text += "\n#line "_L1 + QString::number(_M_action_line) + " \""_L1 + QDir::fromNativeSeparators(_M_input_file) + "\"\n"_L1; inp (); // skip ':' forever { while (! ch.isNull ()) { token = ch.unicode (); inp (); if (token == '.' && ch == u'/') break; text += QLatin1Char (token); } if (ch != u'/') return (token = ERROR); inp (); if (ch.isNull () || ch.isSpace ()) { _M_current_value = text; return (token = IMPL); } else text += "./"_L1; } } switch (token) { case ':': return (token = COLON); case ';': return (token = SEMICOLON); case '|': return (token = OR); default: break; } return token; } bool Recognizer::parse (const QString &input_file) { _M_input_file = input_file; QFile file(_M_input_file); if (! file.open(QFile::ReadOnly)) { qerr() << "qlalr: no input file\n"; return false; } QString _M_contents = QTextStream(&file).readAll(); _M_firstChar = _M_contents.constBegin(); _M_lastChar = _M_contents.constEnd(); _M_currentChar = _M_firstChar; _M_line = 1; int yytoken = -1; inp (); reallocateStack(); _M_current_rule = _M_grammar->rules.end (); _M_decls.clear (); _M_impls.clear (); tos = 0; state_stack[++tos] = 0; while (true) { if (yytoken == -1 && - TERMINAL_COUNT != action_index [state_stack [tos]]) yytoken = nextToken(); int act = t_action (state_stack [tos], yytoken); if (act == ACCEPT_STATE) return true; else if (act > 0) { if (++tos == stack_size) reallocateStack(); sym_stack [tos] = _M_current_value; state_stack [tos] = act; yytoken = -1; } else if (act < 0) { int r = - act - 1; tos -= rhs [r]; act = state_stack [tos++]; switch (r) { ./ ----------------------------------------------------------- SPECS Specification ::= Options Tokens Start Rules ; Options ::= Empty ; Options ::= Options Option ; StartHeader ::= START ID ; /. case $rule_number: { Name name = _M_grammar->intern (sym(2)); _M_grammar->start = name; _M_grammar->non_terminals.insert (name); } break; ./ Start ::= StartHeader UserActionOpt ; ----------------------------------------------------------- OPTIONS Option ::= PARSER ID ; /. case $rule_number: { _M_grammar->table_name = sym(2); } break; ./ Option ::= MERGED_OUTPUT ID ; /. case $rule_number: { _M_grammar->merged_output = sym(2); } break; ./ Option ::= DECL_FILE ID ; /. case $rule_number: { _M_grammar->decl_file_name = sym(2); } break; ./ Option ::= IMPL_FILE ID ; /. case $rule_number: { _M_grammar->impl_file_name = sym(2); } break; ./ Option ::= EXPECT ID ; /. case $rule_number: { _M_grammar->expected_shift_reduce = sym(2).toInt(); } break; ./ Option ::= EXPECT_RR ID ; /. case $rule_number: { _M_grammar->expected_reduce_reduce = sym(2).toInt(); } break; ./ Option ::= TOKEN_PREFIX ID ; /. case $rule_number: { _M_grammar->token_prefix = sym(2); } break; ./ ----------------------------------------------------------- TOKENS Tokens ::= Empty ; Tokens ::= Tokens Token ; Token ::= TOKEN TerminalList ; TerminalList ::= Terminal ; TerminalList ::= TerminalList Terminal ; Terminal ::= ID Empty ; /.case $rule_number:./ Terminal ::= ID STRING_LITERAL ; /.case $rule_number: { Name name = _M_grammar->intern (sym(1)); _M_grammar->terminals.insert (name); _M_grammar->spells.insert (name, sym(2)); } break; ./ PrecHeader: LEFT ; /. case $rule_number: { _M_grammar->current_assoc = Grammar::Left; ++_M_grammar->current_prec; } break; ./ PrecHeader: RIGHT ; /. case $rule_number: { _M_grammar->current_assoc = Grammar::Right; ++_M_grammar->current_prec; } break; ./ PrecHeader: NONASSOC ; /. case $rule_number: { _M_grammar->current_assoc = Grammar::NonAssoc; ++_M_grammar->current_prec; } break; ./ Token ::= PrecHeader TokenList ; TokenList ::= TokenId ; TokenList ::= TokenList TokenId ; TokenId ::= ID ; /. case $rule_number: { Name name = _M_grammar->intern (sym(1)); _M_grammar->terminals.insert (name); Grammar::TokenInfo info; info.prec = _M_grammar->current_prec; info.assoc = _M_grammar->current_assoc; _M_grammar->token_info.insert (name, info); } break; ./ ----------------------------------------------------------- Code Code ::= DECL ; /. case $rule_number: { _M_decls += expand (sym(1)); } break; ./ Code ::= IMPL ; /. case $rule_number: { _M_impls += expand (sym(1)); } break; ./ UserAction ::= Code ; UserAction ::= UserAction Code ; UserActionOpt ::= ; UserActionOpt ::= UserAction ; ----------------------------------------------------------- RULES Rules ::= Empty ; Rules ::= Rules Rule ; RuleHeader ::= ID COLON ; /. case $rule_number: { _M_current_rule = _M_grammar->rules.insert (_M_grammar->rules.end (), Rule ()); _M_current_rule->lhs = _M_grammar->intern (sym(1)); _M_grammar->declared_lhs.insert (_M_current_rule->lhs); if (_M_grammar->terminals.find (_M_current_rule->lhs) != _M_grammar->terminals.end ()) { qerr() << _M_input_file << ":" << _M_line << ": Invalid non terminal `" << *_M_current_rule->lhs << "'" << Qt::endl; return false; } _M_grammar->non_terminals.insert (_M_current_rule->lhs); } break; ./ Rule ::= RuleHeader RuleDefinition SEMICOLON UserActionOpt ; RuleDefinition ::= Symbols PrecOpt UserActionOpt ; RuleDefinition ::= RuleDefinition NewRule OR Symbols PrecOpt UserActionOpt ; NewRule ::= ; /. case $rule_number: { Name lhs = _M_current_rule->lhs; _M_current_rule = _M_grammar->rules.insert (_M_grammar->rules.end (), Rule ()); _M_current_rule->lhs = lhs; _M_grammar->declared_lhs.insert (_M_current_rule->lhs); if (_M_grammar->terminals.find (_M_current_rule->lhs) != _M_grammar->terminals.end ()) { qerr() << _M_input_file << ":" << _M_line << ": Invalid non terminal `" << *_M_current_rule->lhs << "'" << Qt::endl; return false; } _M_grammar->non_terminals.insert (_M_current_rule->lhs); } break; ./ PrecOpt ::= ; /. case $rule_number: { _M_current_rule->prec = _M_grammar->names.end (); for (NameList::iterator it = _M_current_rule->rhs.begin (); it != _M_current_rule->rhs.end (); ++it) { if (! _M_grammar->isTerminal (*it)) continue; _M_current_rule->prec = *it; } } break; ./ PrecOpt ::= PREC ID ; /. case $rule_number: { Name tok = _M_grammar->intern (sym(2)); if (! _M_grammar->isTerminal (tok)) { qerr() << _M_input_file << ":" << _M_line << ": `" << *tok << " is not a terminal symbol" << Qt::endl; _M_current_rule->prec = _M_grammar->names.end (); } else _M_current_rule->prec = tok; } break; ./ ----------------------------------------------------------- SYMBOLS Symbols ::= Empty ; Symbols ::= Symbols ID ; /. case $rule_number: { Name name = _M_grammar->intern (sym(2)); if (_M_grammar->terminals.find (name) == _M_grammar->terminals.end ()) _M_grammar->non_terminals.insert (name); _M_current_rule->rhs.push_back (name); } break; ./ ----------------------------------------------------------- HELPERS Empty ::= ; /. case $rule_number: { sym(1) = QString(); } break; ./ ----------------------------------------------------------- END /. } // switch state_stack [tos] = nt_action (act, lhs [r] - TERMINAL_COUNT); } else { break; } } qerr() << _M_input_file << ":" << _M_line << ": Syntax error" << Qt::endl; return false; } ./