summaryrefslogtreecommitdiffstats
path: root/util/lexgen
diff options
context:
space:
mode:
authorQt by Nokia <qt-info@nokia.com>2011-04-27 12:05:43 +0200
committeraxis <qt-info@nokia.com>2011-04-27 12:05:43 +0200
commit38be0d13830efd2d98281c645c3a60afe05ffece (patch)
tree6ea73f3ec77f7d153333779883e8120f82820abe /util/lexgen
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you want to look at revision history older than this, please refer to the Qt Git wiki for how to use Git history grafting. At the time of writing, this wiki is located here: http://qt.gitorious.org/qt/pages/GitIntroductionWithQt If you have already performed the grafting and you don't see any history beyond this commit, try running "git log" with the "--follow" argument. Branched from the monolithic repo, Qt master branch, at commit 896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to 'util/lexgen')
-rw-r--r--util/lexgen/README16
-rw-r--r--util/lexgen/configfile.cpp99
-rw-r--r--util/lexgen/configfile.h81
-rw-r--r--util/lexgen/css2-simplified.lexgen93
-rw-r--r--util/lexgen/generator.cpp532
-rw-r--r--util/lexgen/generator.h221
-rw-r--r--util/lexgen/global.h113
-rw-r--r--util/lexgen/lexgen.lexgen24
-rw-r--r--util/lexgen/lexgen.pri3
-rw-r--r--util/lexgen/lexgen.pro6
-rw-r--r--util/lexgen/main.cpp323
-rw-r--r--util/lexgen/nfa.cpp508
-rw-r--r--util/lexgen/nfa.h127
-rw-r--r--util/lexgen/re2nfa.cpp547
-rw-r--r--util/lexgen/re2nfa.h116
-rw-r--r--util/lexgen/test.lexgen9
-rw-r--r--util/lexgen/tests/testdata/backtrack1/input1
-rw-r--r--util/lexgen/tests/testdata/backtrack1/output1
-rw-r--r--util/lexgen/tests/testdata/backtrack1/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/backtrack2/input1
-rw-r--r--util/lexgen/tests/testdata/backtrack2/output2
-rw-r--r--util/lexgen/tests/testdata/backtrack2/rules.lexgen4
-rw-r--r--util/lexgen/tests/testdata/casesensitivity/input1
-rw-r--r--util/lexgen/tests/testdata/casesensitivity/output14
-rw-r--r--util/lexgen/tests/testdata/casesensitivity/rules.lexgen7
-rw-r--r--util/lexgen/tests/testdata/comments/input1
-rw-r--r--util/lexgen/tests/testdata/comments/output2
-rw-r--r--util/lexgen/tests/testdata/comments/rules.lexgen2
-rw-r--r--util/lexgen/tests/testdata/dot/input1
-rw-r--r--util/lexgen/tests/testdata/dot/output2
-rw-r--r--util/lexgen/tests/testdata/dot/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/negation/input1
-rw-r--r--util/lexgen/tests/testdata/negation/output2
-rw-r--r--util/lexgen/tests/testdata/negation/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/quoteinset/input1
-rw-r--r--util/lexgen/tests/testdata/quoteinset/output1
-rw-r--r--util/lexgen/tests/testdata/quoteinset/rules.lexgen2
-rw-r--r--util/lexgen/tests/testdata/quotes/input1
-rw-r--r--util/lexgen/tests/testdata/quotes/output1
-rw-r--r--util/lexgen/tests/testdata/quotes/rules.lexgen2
-rw-r--r--util/lexgen/tests/testdata/simple/input1
-rw-r--r--util/lexgen/tests/testdata/simple/output2
-rw-r--r--util/lexgen/tests/testdata/simple/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/subsets1/input1
-rw-r--r--util/lexgen/tests/testdata/subsets1/output2
-rw-r--r--util/lexgen/tests/testdata/subsets1/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/subsets2/input1
-rw-r--r--util/lexgen/tests/testdata/subsets2/output3
-rw-r--r--util/lexgen/tests/testdata/subsets2/rules.lexgen4
-rw-r--r--util/lexgen/tests/tests.pro6
-rw-r--r--util/lexgen/tests/tst_lexgen.cpp285
-rw-r--r--util/lexgen/tokenizer.cpp237
52 files changed, 3425 insertions, 0 deletions
diff --git a/util/lexgen/README b/util/lexgen/README
new file mode 100644
index 0000000000..9feb18d1e3
--- /dev/null
+++ b/util/lexgen/README
@@ -0,0 +1,16 @@
+Lexgen
+------
+
+This is a little tool to generate lexical scanners from a rather simplistic
+configuration file. We use it internally in Qt to generate the scanner for the
+CSS parser that is built into the toolkit (used for the widget styling and the
+HTML import into QTextDocument).
+
+Beware, it's very slow (in generating the code) and it may not generate what
+you want. But I like that it generates code that operates on QChar and friends.
+
+Use at your own risk ;-)
+
+
+--
+Simon Hausmann <simon.hausmann@nokia.com>
diff --git a/util/lexgen/configfile.cpp b/util/lexgen/configfile.cpp
new file mode 100644
index 0000000000..dab2f61686
--- /dev/null
+++ b/util/lexgen/configfile.cpp
@@ -0,0 +1,99 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include "configfile.h"
+
+#include <QFile>
+
+ConfigFile::SectionMap ConfigFile::parse(const QString &fileName)
+{
+ QFile f(fileName);
+ if (!f.open(QIODevice::ReadOnly))
+ return ConfigFile::SectionMap();
+ return parse(&f);
+}
+
+ConfigFile::SectionMap ConfigFile::parse(QIODevice *dev)
+{
+ SectionMap sections;
+ SectionMap::Iterator currentSection = sections.end();
+
+ ConfigFile::SectionMap result;
+ int currentLineNumber = 0;
+ while (!dev->atEnd()) {
+ QString line = QString::fromUtf8(dev->readLine()).trimmed();
+ ++currentLineNumber;
+
+ if (line.isEmpty() || line.startsWith(QLatin1Char('#')))
+ continue;
+
+ if (line.startsWith(QLatin1Char('['))) {
+ if (!line.endsWith(']')) {
+ qWarning("Syntax error at line %d: Missing ']' at start of new section.", currentLineNumber);
+ return SectionMap();
+ }
+ line.remove(0, 1);
+ line.chop(1);
+ const QString sectionName = line;
+ currentSection = sections.insert(sectionName, Section());
+ continue;
+ }
+
+ if (currentSection == sections.end()) {
+ qWarning("Syntax error at line %d: Entry found outside of any section.", currentLineNumber);
+ return SectionMap();
+ }
+
+ Entry e;
+ e.lineNumber = currentLineNumber;
+
+ int equalPos = line.indexOf(QLatin1Char('='));
+ if (equalPos == -1) {
+ e.key = line;
+ } else {
+ e.key = line;
+ e.key.truncate(equalPos);
+ e.key = e.key.trimmed();
+ e.value = line.mid(equalPos + 1).trimmed();
+ }
+ currentSection->append(e);
+ }
+ return sections;
+}
diff --git a/util/lexgen/configfile.h b/util/lexgen/configfile.h
new file mode 100644
index 0000000000..b8d483b916
--- /dev/null
+++ b/util/lexgen/configfile.h
@@ -0,0 +1,81 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef CONFIGFILE_H
+#define CONFIGFILE_H
+
+#include <QStringList>
+#include <QMap>
+#include <QVector>
+
+struct ConfigFile
+{
+ struct Entry
+ {
+ inline Entry() : lineNumber(-1) {}
+ int lineNumber;
+ QString key;
+ QString value;
+ };
+ struct Section : public QVector<Entry>
+ {
+ inline bool contains(const QString &key) const
+ {
+ for (int i = 0; i < count(); ++i)
+ if (at(i).key == key)
+ return true;
+ return false;
+ }
+ inline QString value(const QString &key, const QString &defaultValue = QString()) const
+ {
+ for (int i = 0; i < count(); ++i)
+ if (at(i).key == key)
+ return at(i).value;
+ return defaultValue;
+ }
+ };
+ typedef QMap<QString, Section> SectionMap;
+
+ static SectionMap parse(const QString &fileName);
+ static SectionMap parse(QIODevice *dev);
+};
+
+#endif // CONFIGFILE_H
+
diff --git a/util/lexgen/css2-simplified.lexgen b/util/lexgen/css2-simplified.lexgen
new file mode 100644
index 0000000000..53facb1d4a
--- /dev/null
+++ b/util/lexgen/css2-simplified.lexgen
@@ -0,0 +1,93 @@
+[Options]
+case-sensitive
+classname = QCssScanner_Generated
+
+[Code Generator Options]
+MapToCode[a-z] = (ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256
+TokenPrefix = QCss::
+FileHeader = ../moc/licenseheader.txt
+
+[Macros]
+escape = \\[^\r\n\f0-9a-f]
+nmstart = [_a-z]|{escape}
+nmchar = [_a-z0-9-]|{escape}
+nl = \n|\r\n|\r|\f
+string1 = \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
+string2 = \'([^\n\r\f\\']|\\{nl}|{escape})*\'
+invalid1 = \"([^\n\r\f\\"]|\\{nl}|{escape})*
+invalid2 = \'([^\n\r\f\\']|\\{nl}|{escape})*
+
+ident = -?{nmstart}{nmchar}*
+name = {nmchar}+
+num = [0-9]+|[0-9]*"."[0-9]+
+string = {string1}|{string2}
+invalid = {invalid1}|{invalid2}
+url = ([!#$%&*-~]|{escape})*
+s = [ \t\r\n\f]
+w = {s}*
+
+[Tokens]
+
+S = {s}+
+
+handleCommentStart() = \/\*
+
+CDO = "<!--"
+CDC = "-->"
+INCLUDES = "~="
+DASHMATCH = "|="
+
+LBRACE = {w}"{"
+PLUS = {w}"+"
+GREATER = {w}">"
+COMMA = {w}","
+
+STRING = {string}
+INVALID = {invalid}
+
+IDENT = {ident}
+
+HASH = "#"{name}
+
+ATKEYWORD_SYM = "@"{ident}
+
+EXCLAMATION_SYM = "!"
+
+#EMS = {num}em
+#EXS = {num}ex
+#LENGTH = {num}px
+#LENGTH = {num}cm
+#LENGTH = {num}mm
+#LENGTH = {num}in
+#LENGTH = {num}pt
+#LENGTH = {num}pc
+#ANGLE = {num}deg
+#ANGLE = {num}rad
+#ANGLE = {num}grad
+#TIME = {num}ms
+#TIME = {num}s
+#FREQ = {num}hz
+#FREQ = {num}khz
+#DIMENSION = {num}{ident}
+LENGTH = {num}{ident}
+
+PERCENTAGE = {num}%
+NUMBER = {num}
+
+#URI = "url("{w}{string}{w}")"
+#URI = "url("{w}{url}{w}")"
+FUNCTION = {ident}"("
+
+COLON = :
+SEMICOLON = ;
+RBRACE = \}
+SLASH = /
+MINUS = -
+DOT = \.
+STAR = \*
+LBRACKET = \[
+RBRACKET = \]
+EQUAL = \=
+LPAREN = \(
+RPAREN = \)
+OR = \|
diff --git a/util/lexgen/generator.cpp b/util/lexgen/generator.cpp
new file mode 100644
index 0000000000..7998861dfe
--- /dev/null
+++ b/util/lexgen/generator.cpp
@@ -0,0 +1,532 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "generator.h"
+
+#include <QFile>
+
+void Function::printDeclaration(CodeBlock &block, const QString &funcNamePrefix) const
+{
+ block << (iline ? "inline " : "") << signature(funcNamePrefix) << (iline ? QLatin1String(" {") : QLatin1String(";"));
+ if (!iline)
+ return;
+
+ block.indent();
+ QString tmp = body;
+ if (tmp.endsWith(QLatin1Char('\n')))
+ tmp.chop(1);
+ foreach (QString line, tmp.split(QLatin1Char('\n')))
+ block << line;
+ block.outdent();
+ block << "}";
+}
+
+QString Function::signature(const QString &funcNamePrefix) const
+{
+ QString sig;
+ if (!rtype.isEmpty()) {
+ sig += rtype;
+ sig += QLatin1Char(' ');
+ }
+ sig += funcNamePrefix;
+ sig += fname;
+ if (cnst)
+ sig += " const";
+ return sig;
+}
+
+QString Function::definition() const
+{
+ if (iline)
+ return QString();
+
+ QString result;
+ result += signature();
+ result += QLatin1String("\n{\n");
+
+ QString tmp = body;
+
+ if (tmp.endsWith(QLatin1Char('\n')))
+ tmp.chop(1);
+ if (!tmp.startsWith(QLatin1Char('\n')))
+ tmp.prepend(" ");
+
+ tmp.replace(QLatin1Char('\n'), QLatin1String("\n "));
+
+ result += tmp;
+
+ result += QLatin1String("\n}\n");
+
+ return result;
+}
+
+void Class::Section::printDeclaration(const Class *klass, CodeBlock &block) const
+{
+ foreach (Function ctor, constructors)
+ ctor.printDeclaration(block, klass->name());
+
+ if (!constructors.isEmpty())
+ block.addNewLine();
+
+ foreach (Function func, functions)
+ func.printDeclaration(block);
+
+ if (!functions.isEmpty())
+ block.addNewLine();
+
+ foreach (QString var, variables)
+ block << var << ';';
+}
+
+void Class::addConstructor(Access access, const QString &body, const QString &_args)
+{
+ Function ctor;
+ QString args = _args;
+ if (!args.startsWith(QLatin1Char('('))
+ && !args.endsWith(QLatin1Char(')'))) {
+ args.prepend('(');
+ args.append(')');
+ }
+ ctor.setName(args);
+ ctor.addBody(body);
+ sections[access].constructors.append(ctor);
+}
+
+QString Class::Section::definition(const Class *klass) const
+{
+ QString result;
+
+ foreach (Function ctor, constructors) {
+ ctor.setName(klass->name() + "::" + klass->name() + ctor.name());
+ result += ctor.definition();
+ result += QLatin1Char('\n');
+ }
+
+ foreach (Function func, functions) {
+ if (!func.hasBody()) continue;
+ func.setName(klass->name() + "::" + func.name());
+ result += func.definition();
+ result += QLatin1Char('\n');
+ }
+
+ return result;
+}
+
+QString Class::declaration() const
+{
+ CodeBlock block;
+
+ block << QLatin1String("class ") << cname;
+ block << "{";
+
+ if (!sections[PublicMember].isEmpty()) {
+ block << "public:";
+ block.indent();
+ sections[PublicMember].printDeclaration(this, block);
+ block.outdent();
+ }
+
+ if (!sections[ProtectedMember].isEmpty()) {
+ block << "protected:";
+ block.indent();
+ sections[ProtectedMember].printDeclaration(this, block);
+ block.outdent();
+ }
+
+ if (!sections[PrivateMember].isEmpty()) {
+ block << "private:";
+ block.indent();
+ sections[PrivateMember].printDeclaration(this, block);
+ block.outdent();
+ }
+
+ block << "};";
+ block.addNewLine();
+
+ return block.toString();
+}
+
+QString Class::definition() const
+{
+ return sections[PrivateMember].definition(this)
+ + sections[ProtectedMember].definition(this)
+ + sections[PublicMember].definition(this);
+}
+
+Generator::Generator(const DFA &_dfa, const Config &config)
+ : dfa(_dfa), cfg(config)
+{
+ QList<InputType> lst = cfg.maxInputSet.toList();
+ qSort(lst);
+ minInput = lst.first();
+ maxInput = lst.last();
+
+ ConfigFile::Section section = config.configSections.value("Code Generator Options");
+
+ foreach (ConfigFile::Entry entry, section) {
+ if (!entry.key.startsWith(QLatin1String("MapToCode["))
+ || !entry.key.endsWith(QLatin1Char(']')))
+ continue;
+ QString range = entry.key;
+ range.remove(0, qstrlen("MapToCode["));
+ range.chop(1);
+ if (range.length() != 3
+ || range.at(1) != QLatin1Char('-')) {
+ qWarning("Invalid range for char mapping function: %s", qPrintable(range));
+ continue;
+ }
+ TransitionSequence seq;
+ seq.first = range.at(0).unicode();
+ seq.last = range.at(2).unicode();
+ seq.testFunction = entry.value;
+ charFunctionRanges.append(seq);
+ }
+
+ QString tokenPrefix = section.value("TokenPrefix");
+ if (!tokenPrefix.isEmpty()) {
+ for (int i = 0; i < dfa.count(); ++i)
+ if (!dfa.at(i).symbol.isEmpty()
+ && !dfa.at(i).symbol.endsWith(QLatin1String("()")))
+ dfa[i].symbol.prepend(tokenPrefix);
+ }
+
+ headerFileName = section.value("FileHeader");
+}
+
+static inline bool adjacentKeys(int left, int right) { return left + 1 == right; }
+//static inline bool adjacentKeys(const InputType &left, const InputType &right)
+//{ return left.val + 1 == right.val; }
+
+static QVector<Generator::TransitionSequence> convertToSequences(const TransitionMap &transitions)
+{
+ QVector<Generator::TransitionSequence> sequences;
+ if (transitions.isEmpty())
+ return sequences;
+
+ QList<InputType> keys = transitions.keys();
+ qSort(keys);
+ int i = 0;
+ Generator::TransitionSequence sequence;
+ sequence.first = keys.at(0);
+ ++i;
+ for (; i < keys.count(); ++i) {
+ if (adjacentKeys(keys.at(i - 1), keys.at(i))
+ && transitions.value(keys.at(i)) == transitions.value(keys.at(i - 1))) {
+ continue;
+ }
+ sequence.last = keys.at(i - 1);
+ sequence.transition = transitions.value(sequence.last);
+ sequences.append(sequence);
+
+ sequence.first = keys.at(i);
+ }
+ sequence.last = keys.at(i - 1);
+ sequence.transition = transitions.value(sequence.last);
+ sequences.append(sequence);
+
+ return sequences;
+}
+
+QDebug &operator<<(QDebug &debug, const Generator::TransitionSequence &seq)
+{
+ return debug << "[first:" << seq.first << "; last:" << seq.last << "; transition:" << seq.transition
+ << (seq.testFunction.isEmpty() ? QString() : QString(QString("; testfunction:" + seq.testFunction)))
+ << "]";
+}
+
+bool Generator::isSingleReferencedFinalState(int i) const
+{
+ return backReferenceMap.value(i) == 1
+ && dfa.at(i).transitions.isEmpty()
+ && !dfa.at(i).symbol.isEmpty();
+}
+
+void Generator::generateTransitions(CodeBlock &body, const TransitionMap &transitions)
+{
+ if (transitions.isEmpty())
+ return;
+
+ QVector<TransitionSequence> sequences = convertToSequences(transitions);
+
+ bool needsCharFunction = false;
+ if (!charFunctionRanges.isEmpty()) {
+ int i = 0;
+ while (i < sequences.count()) {
+ const TransitionSequence &seq = sequences.at(i);
+ if (!seq.testFunction.isEmpty()) {
+ ++i;
+ continue;
+ }
+
+ foreach (TransitionSequence range, charFunctionRanges)
+ if (range.first >= seq.first && range.last <= seq.last) {
+ needsCharFunction = true;
+
+ TransitionSequence left, middle, right;
+
+ left.first = seq.first;
+ left.last = range.first - 1;
+ left.transition = seq.transition;
+
+ middle = range;
+ middle.transition = seq.transition;
+
+ right.first = range.last + 1;
+ right.last = seq.last;
+ right.transition = seq.transition;
+
+ sequences.remove(i);
+ if (left.last >= left.first) {
+ sequences.insert(i, left);
+ ++i;
+ }
+ sequences.insert(i, middle);
+ ++i;
+ if (right.last >= right.first) {
+ sequences.insert(i, right);
+ ++i;
+ }
+
+ i = -1;
+ break;
+ }
+
+ ++i;
+ }
+ }
+
+ //qDebug() << "sequence count" << sequences.count();
+ //qDebug() << sequences;
+
+ if (sequences.count() < 10
+ || sequences.last().last == maxInput
+ || needsCharFunction) {
+ foreach (TransitionSequence seq, sequences) {
+ const bool embedFinalState = isSingleReferencedFinalState(seq.transition);
+
+ QString brace;
+ if (embedFinalState)
+ brace = " {";
+
+ if (!seq.testFunction.isEmpty()) {
+ body << "if (" << seq.testFunction << ")" << brace;
+ } else if (seq.first == seq.last) {
+ body << "if (ch.unicode() == " << seq.first << ")" << brace;
+ } else {
+ if (seq.last < maxInput)
+ body << "if (ch.unicode() >= " << seq.first
+ << " && ch.unicode() <= " << seq.last << ")" << brace;
+ else
+ body << "if (ch.unicode() >= " << seq.first << ")" << brace;
+ }
+ body.indent();
+ if (embedFinalState) {
+ body << "token = " << dfa.at(seq.transition).symbol << ";";
+ body << "goto found;";
+
+ body.outdent();
+ body << "}";
+ } else {
+ body << "goto state_" << seq.transition << ";";
+ body.outdent();
+ }
+ }
+ } else {
+ QList<InputType> keys = transitions.keys();
+ qSort(keys);
+
+ body << "switch (ch.unicode()) {";
+ body.indent();
+ for (int k = 0; k < keys.count(); ++k) {
+ const InputType key = keys.at(k);
+ const int trans = transitions.value(key);
+
+ QString keyStr;
+ if (key == '\\')
+ keyStr = QString("\'\\\\\'");
+ else if (key >= 48 && key < 127)
+ keyStr = QString('\'') + QChar::fromLatin1(char(key)) + QChar('\'');
+ else
+ keyStr = QString::number(key);
+
+ if (k < keys.count() - 1
+ && transitions.value(keys.at(k + 1)) == trans) {
+ body << "case " << keyStr << ":";
+ } else {
+ if (isSingleReferencedFinalState(trans)) {
+ body << "case " << keyStr << ": token = " << dfa.at(trans).symbol << "; goto found;";
+ } else {
+ body << "case " << keyStr << ": goto state_" << trans << ";";
+ }
+ }
+ }
+ body.outdent();
+ body << "}";
+ }
+}
+
+QString Generator::generate()
+{
+ Class klass(cfg.className);
+
+ klass.addMember(Class::PublicMember, "QString input");
+ klass.addMember(Class::PublicMember, "int pos");
+ klass.addMember(Class::PublicMember, "int lexemStart");
+ klass.addMember(Class::PublicMember, "int lexemLength");
+
+ {
+ CodeBlock body;
+ body << "input = inp;";
+ body << "pos = 0;";
+ body << "lexemStart = 0;";
+ body << "lexemLength = 0;";
+ klass.addConstructor(Class::PublicMember, body, "const QString &inp");
+ }
+
+ {
+ Function next("QChar", "next()");
+ next.setInline(true);
+ if (cfg.caseSensitivity == Qt::CaseSensitive)
+ next.addBody("return (pos < input.length()) ? input.at(pos++) : QChar();");
+ else
+ next.addBody("return (pos < input.length()) ? input.at(pos++).toLower() : QChar();");
+ klass.addMember(Class::PublicMember, next);
+ }
+
+ /*
+ {
+ Function lexem("QString", "lexem()");
+ lexem.setConst(true);
+ lexem.setInline(true);
+ lexem.addBody("return input.mid(lexemStart, lexemLength);");
+ klass.addMember(Class::PublicMember, lexem);
+ }
+ */
+
+ for (int i = 0; i < dfa.count(); ++i)
+ if (dfa.at(i).symbol.endsWith(QLatin1String("()"))) {
+ Function handlerFunc("int", dfa.at(i).symbol);
+ klass.addMember(Class::PublicMember, handlerFunc);
+ }
+
+ Function lexFunc;
+ lexFunc.setReturnType("int");
+ lexFunc.setName("lex()");
+
+ CodeBlock body;
+ body << "lexemStart = pos;";
+ body << "lexemLength = 0;";
+ body << "int lastAcceptingPos = -1;";
+ body << "int token = -1;";
+ body << "QChar ch;";
+ body.addNewLine();
+
+ backReferenceMap.clear();
+ foreach (State s, dfa)
+ foreach (int state, s.transitions)
+ backReferenceMap[state]++;
+
+ bool haveSingleReferencedFinalState = false;
+
+ for (int i = 0; i < dfa.count(); ++i) {
+ if (isSingleReferencedFinalState(i)) {
+ haveSingleReferencedFinalState = true;
+ continue;
+ }
+
+ if (i > 0)
+ body << "state_" << i << ":";
+ else
+ body << "// initial state";
+
+ body.indent();
+
+ if (!dfa.at(i).symbol.isEmpty()) {
+ body << "lastAcceptingPos = pos;";
+ body << "token = " << dfa.at(i).symbol << ";";
+ }
+
+ body.outdent();
+
+ body.indent();
+
+ if (!dfa.at(i).transitions.isEmpty()) {
+ body << "ch = next();";
+ generateTransitions(body, dfa.at(i).transitions);
+ }
+
+ body << "goto out;";
+
+ body.outdent();
+ }
+
+ if (haveSingleReferencedFinalState) {
+ body << "found:";
+ body << "lastAcceptingPos = pos;";
+ body.addNewLine();
+ }
+
+ body << "out:";
+ body << "if (lastAcceptingPos != -1) {";
+ body.indent();
+ body << "lexemLength = lastAcceptingPos - lexemStart;";
+ body << "pos = lastAcceptingPos;";
+ body.outdent();
+ body << "}";
+ body << "return token;";
+
+ lexFunc.addBody(body);
+
+ klass.addMember(Class::PublicMember, lexFunc);
+
+ QString header;
+ QFile headerFile(headerFileName);
+ if (!headerFileName.isEmpty()
+ && headerFile.exists()
+ && headerFile.open(QIODevice::ReadOnly)) {
+ header = QString::fromUtf8(headerFile.readAll());
+ }
+
+ header += QLatin1String("// auto generated. DO NOT EDIT.\n");
+
+ return header + klass.declaration() + klass.definition();
+}
+
diff --git a/util/lexgen/generator.h b/util/lexgen/generator.h
new file mode 100644
index 0000000000..b9df8d74fa
--- /dev/null
+++ b/util/lexgen/generator.h
@@ -0,0 +1,221 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef GENERATOR_H
+#define GENERATOR_H
+
+#include <QTextStream>
+#include <QStringList>
+
+#include "nfa.h"
+
+class LineStream
+{
+private:
+ struct SharedStream
+ {
+ int ref;
+ QTextStream *stream;
+ };
+
+public:
+ LineStream(QTextStream *textStream)
+ {
+ shared = new SharedStream;
+ shared->ref = 1;
+ shared->stream = textStream;
+ }
+ LineStream(const LineStream &other)
+ {
+ shared = other.shared;
+ shared->ref++;
+ }
+ LineStream &operator=(const LineStream &other)
+ {
+ if (this == &other)
+ return *this;
+ LineStream copy(other); // keep refcount up
+ qSwap(*shared, *other.shared);
+ return *this;
+ }
+ ~LineStream()
+ {
+ if (!--shared->ref) {
+ (*shared->stream) << endl;
+ delete shared;
+ }
+ }
+
+ template <typename T>
+ LineStream &operator<<(const T &value)
+ { (*shared->stream) << value; return *this; }
+
+ SharedStream *shared;
+};
+
+class CodeBlock
+{
+public:
+ inline CodeBlock() { stream.setString(&output, QIODevice::WriteOnly); }
+
+ inline void indent() { indentStr += QLatin1String(" "); }
+ inline void outdent() { indentStr.remove(0, 4); }
+
+ template <typename T>
+ LineStream operator<<(const T &value)
+ { stream << indentStr; stream << value; return LineStream(&stream); }
+
+ inline void addNewLine() { stream << endl; }
+
+ inline QString toString() const { stream.flush(); return output; }
+
+private:
+ QString output;
+ mutable QTextStream stream;
+ QString indentStr;
+};
+
+class Function
+{
+public:
+ inline Function(const QString &returnType, const QString &name)
+ : rtype(returnType), fname(name), iline(false), cnst(false) {}
+ inline Function() : iline(false), cnst(false) {}
+
+ inline void setName(const QString &name) { fname = name; }
+ inline QString name() const { return fname; }
+
+ inline void setInline(bool i) { iline = i; }
+ inline bool isInline() const { return iline; }
+
+ inline void setReturnType(const QString &type) { rtype = type; }
+ inline QString returnType() const { return rtype; }
+
+ inline void addBody(const QString &_body) { body += _body; }
+ inline void addBody(const CodeBlock &block) { body += block.toString(); }
+ inline bool hasBody() const { return !body.isEmpty(); }
+
+ inline void setConst(bool konst) { cnst = konst; }
+ inline bool isConst() const { return cnst; }
+
+ void printDeclaration(CodeBlock &block, const QString &funcNamePrefix = QString()) const;
+ QString definition() const;
+
+private:
+ QString signature(const QString &funcNamePrefix = QString()) const;
+
+ QString rtype;
+ QString fname;
+ QString body;
+ bool iline;
+ bool cnst;
+};
+
+class Class
+{
+public:
+ enum Access { PublicMember, ProtectedMember, PrivateMember };
+
+ inline Class(const QString &name) : cname(name) {}
+
+ inline void setName(const QString &name) { cname = name; }
+ inline QString name() const { return cname; }
+
+ inline void addMember(Access access, const QString &name)
+ { sections[access].variables.append(name); }
+ inline void addMember(Access access, const Function &func)
+ { sections[access].functions.append(func); }
+
+ void addConstructor(Access access, const QString &body, const QString &args = QString());
+ inline void addConstructor(Access access, const CodeBlock &body, const QString &args = QString())
+ { addConstructor(access, body.toString(), args); }
+
+ QString declaration() const;
+ QString definition() const;
+
+private:
+ QString cname;
+ struct Section
+ {
+ QVector<Function> functions;
+ QStringList variables;
+ QVector<Function> constructors;
+
+ inline bool isEmpty() const
+ { return functions.isEmpty() && variables.isEmpty() && constructors.isEmpty(); }
+
+ void printDeclaration(const Class *klass, CodeBlock &block) const;
+ QString definition(const Class *klass) const;
+ };
+
+ Section sections[3];
+};
+
+class Generator
+{
+public:
+ Generator(const DFA &dfa, const Config &config);
+
+ QString generate();
+
+private:
+ void generateTransitions(CodeBlock &body, const TransitionMap &transitions);
+ bool isSingleReferencedFinalState(int i) const;
+
+ DFA dfa;
+ Config cfg;
+ InputType minInput;
+ InputType maxInput;
+ QHash<int, int> backReferenceMap;
+ QString headerFileName;
+public:
+ struct TransitionSequence
+ {
+ inline TransitionSequence() : first(-1), last(-1), transition(-1) {}
+ InputType first;
+ InputType last;
+ int transition;
+ QString testFunction;
+ };
+private:
+ QVector<TransitionSequence> charFunctionRanges;
+};
+
+#endif
diff --git a/util/lexgen/global.h b/util/lexgen/global.h
new file mode 100644
index 0000000000..49f993e8c6
--- /dev/null
+++ b/util/lexgen/global.h
@@ -0,0 +1,113 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef GLOBAL_H
+#define GLOBAL_H
+
+#include <QHash>
+#include <QDataStream>
+#include <QSet>
+
+#include "configfile.h"
+
+#if 1
+typedef int InputType;
+
+enum SpecialInputType {
+ DigitInput,
+ SpaceInput,
+ Letter
+};
+
+#else
+
+enum SpecialInputType {
+ NoSpecialInput = 0,
+ DigitInput,
+ SpaceInput,
+ LetterOrNumberInput
+};
+
+struct InputType
+{
+ inline InputType() : val(0), specialInput(NoSpecialInput) {}
+ inline InputType(const int &val) : val(val), specialInput(NoSpecialInput) {}
+
+ inline operator int() const { return val; }
+
+ inline bool operator==(const InputType &other) const
+ { return val == other.val; }
+ inline bool operator!=(const InputType &other) const
+ { return val != other.val; }
+
+ int val;
+ SpecialInputType specialInput;
+};
+
+inline int qHash(const InputType &t) { return qHash(t.val); }
+
+inline QDataStream &operator<<(QDataStream &stream, const InputType &i)
+{
+ return stream << i;
+}
+
+inline QDataStream &operator>>(QDataStream &stream, InputType &i)
+{
+ return stream >> i;
+}
+
+#endif
+
+const InputType Epsilon = -1;
+
+struct Config
+{
+ inline Config() : caseSensitivity(Qt::CaseSensitive), debug(false), cache(false) {}
+ QSet<InputType> maxInputSet;
+ Qt::CaseSensitivity caseSensitivity;
+ QString className;
+ bool debug;
+ bool cache;
+ QString ruleFile;
+ ConfigFile::SectionMap configSections;
+};
+
+#endif // GLOBAL_H
diff --git a/util/lexgen/lexgen.lexgen b/util/lexgen/lexgen.lexgen
new file mode 100644
index 0000000000..33efb8b39b
--- /dev/null
+++ b/util/lexgen/lexgen.lexgen
@@ -0,0 +1,24 @@
+[Options]
+case-sensitive
+classname = RegExpTokenizer
+
+[Code Generator Options]
+TokenPrefix = RE2NFA::
+
+[Macros]
+Escape = \\.{1}
+
+[Tokens]
+TOK_QUOTED_STRING = \"[^\"]*\"
+TOK_STRING = [^\{\}\(\)\,\*\|\?\.\+\[]|{Escape}
+TOK_SEQUENCE = \[([^\]]|(\\\]))*\]
+TOK_LBRACE = \{
+TOK_RBRACE = \}
+TOK_LPAREN = \(
+TOK_RPAREN = \)
+TOK_COMMA = \,
+TOK_STAR = \*
+TOK_OR = \|
+TOK_QUESTION = \?
+TOK_DOT = \.
+TOK_PLUS = \+
diff --git a/util/lexgen/lexgen.pri b/util/lexgen/lexgen.pri
new file mode 100644
index 0000000000..b36e00ea62
--- /dev/null
+++ b/util/lexgen/lexgen.pri
@@ -0,0 +1,3 @@
+VPATH += $$PWD
+SOURCES += nfa.cpp configfile.cpp re2nfa.cpp
+INCLUDEPATH += $$PWD
diff --git a/util/lexgen/lexgen.pro b/util/lexgen/lexgen.pro
new file mode 100644
index 0000000000..89ac84de06
--- /dev/null
+++ b/util/lexgen/lexgen.pro
@@ -0,0 +1,6 @@
+TEMPLATE = app
+TARGET = lexgen
+include(lexgen.pri)
+SOURCES += main.cpp \
+ generator.cpp
+QT = core
diff --git a/util/lexgen/main.cpp b/util/lexgen/main.cpp
new file mode 100644
index 0000000000..1cb5d902a6
--- /dev/null
+++ b/util/lexgen/main.cpp
@@ -0,0 +1,323 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "nfa.h"
+#include "re2nfa.h"
+#include "configfile.h"
+#include "generator.h"
+
+#include <QFile>
+#include <QCoreApplication>
+#include <QFileInfo>
+#include <QDateTime>
+
+struct Symbol
+{
+ QString token;
+ QString lexem;
+};
+
+static QList<Symbol> tokenize(const DFA &dfa, const QString &input, Config *cfg, bool *ok = 0)
+{
+ QList<Symbol> symbols;
+ Symbol lastSymbol;
+ int state = 0;
+ int lastAcceptingState = -1;
+ QString lastAcceptingLexem;
+ int lastAcceptingPos = -1;
+ for (int i = 0; i < input.length(); ++i) {
+ QChar ch = input.at(i);
+ QChar chForInput = ch;
+ if (cfg->caseSensitivity == Qt::CaseInsensitive)
+ chForInput = chForInput.toLower();
+ int next = dfa.at(state).transitions.value(chForInput.unicode());
+ if (cfg->debug)
+ qDebug() << "input" << input.at(i) << "leads to state" << next;
+ if (next) {
+ lastSymbol.lexem.append(input.at(i));
+ lastSymbol.token = dfa.at(next).symbol;
+ if (!lastSymbol.token.isEmpty()) {
+ lastAcceptingState = next;
+ lastAcceptingLexem = lastSymbol.lexem;
+ lastAcceptingPos = i;
+ }
+ state = next;
+ } else {
+ if (lastAcceptingState != -1) {
+ if (cfg->debug)
+ qDebug() << "adding" << dfa.at(lastAcceptingState).symbol << "and backtracking to" << lastAcceptingPos;
+ Symbol s;
+ s.token = dfa.at(lastAcceptingState).symbol;
+ s.lexem = lastAcceptingLexem;
+ symbols << s;
+ lastSymbol = Symbol();
+ state = 0;
+ i = lastAcceptingPos;
+ lastAcceptingPos = -1;
+ lastAcceptingState = -1;
+ continue;
+ }
+ if (state == 0 || lastSymbol.token.isEmpty()) {
+ if (cfg->debug)
+ qDebug() << "invalid input";
+ if (ok)
+ *ok = false;
+ return symbols;
+ }
+ if (cfg->debug)
+ qDebug() << "appending symbol with token" << lastSymbol.token;
+ symbols << lastSymbol;
+ lastSymbol = Symbol();
+ state = 0;
+ lastAcceptingState = -1;
+ --i;
+ }
+ }
+ if (!lastSymbol.token.isEmpty()) {
+ if (cfg->debug)
+ qDebug() << "appending (last) symbol with token" << lastSymbol.token;
+ symbols << lastSymbol;
+ } else if (lastAcceptingState != -1) {
+ if (cfg->debug)
+ qDebug() << "appending last accepting state with token" << dfa.at(lastAcceptingState).symbol;
+ Symbol s;
+ s.lexem = lastAcceptingLexem;
+ s.token = dfa.at(lastAcceptingState).symbol;
+ symbols << s;
+ }
+ if (ok)
+ *ok = true;
+ return symbols;
+}
+
+static QSet<InputType> determineMaxInputSet(const ConfigFile::Section &section)
+{
+ QSet<InputType> set;
+
+ QString inputTypeName;
+
+ foreach (const ConfigFile::Entry &entry, section)
+ if (entry.key == QLatin1String("InputType")) {
+ if (!inputTypeName.isEmpty()) {
+ qWarning("Error: InputType field specified multiple times in config file");
+ return QSet<InputType>();
+ }
+ inputTypeName = entry.value;
+ }
+
+ if (inputTypeName.isEmpty())
+ inputTypeName = "quint8";
+
+ if (inputTypeName == "quint8") {
+ for (int i = 1; i < 256; ++i)
+ set.insert(i);
+ } /* else if ### */
+ else {
+ qWarning("Error: Unknown input type '%s'", qPrintable(inputTypeName));
+ return QSet<InputType>();
+ }
+
+ return set;
+}
+
+static bool loadConfig(const QString &ruleFile, Config *cfg)
+{
+ ConfigFile::SectionMap sections = ConfigFile::parse(ruleFile);
+ if (sections.isEmpty()) {
+ qWarning("Error parsing %s", qPrintable(ruleFile));
+ return false;
+ }
+
+ QSet<InputType> maxInputSet = determineMaxInputSet(sections.value("Options"));
+ if (maxInputSet.isEmpty())
+ return false;
+
+ Qt::CaseSensitivity cs = Qt::CaseInsensitive;
+ if (sections.value("Options").contains("case-sensitive"))
+ cs = Qt::CaseSensitive;
+
+ cfg->configSections = sections;
+ cfg->caseSensitivity = cs;
+ cfg->className = sections.value("Options").value("classname", "Scanner");
+ cfg->maxInputSet = maxInputSet;
+ cfg->ruleFile = ruleFile;
+ return true;
+}
+
+static DFA generateMachine(const Config &cfg)
+{
+ if (cfg.cache) {
+ QFileInfo ruleInfo(cfg.ruleFile);
+ QFileInfo cacheInfo(ruleInfo.baseName() + ".dfa");
+ if (cacheInfo.exists()
+ && cacheInfo.lastModified() > ruleInfo.lastModified()) {
+ QFile f(cacheInfo.absoluteFilePath());
+ f.open(QIODevice::ReadOnly);
+ QDataStream stream(&f);
+ DFA machine;
+ stream >> machine;
+ return machine;
+ }
+ }
+
+ QMap<QString, NFA> macros;
+ foreach (ConfigFile::Entry e, cfg.configSections.value("Macros")) {
+ int errCol = 0;
+ if (cfg.debug)
+ qDebug() << "parsing" << e.value;
+ NFA nfa = RE2NFA(macros, cfg.maxInputSet, cfg.caseSensitivity).parse(e.value, &errCol);
+ if (nfa.isEmpty()) {
+ qWarning("Parse error in line %d column %d", e.lineNumber, errCol);
+ return DFA();
+ }
+ macros.insert(e.key, nfa);
+ }
+
+ if (!cfg.configSections.contains("Tokens")) {
+ qWarning("Rule file does not contain a [Tokens] section!");
+ return DFA();
+ }
+
+ QVector<NFA> tokens;
+
+ foreach (ConfigFile::Entry e, cfg.configSections.value("Tokens")) {
+ int errCol = 0;
+ if (cfg.debug)
+ qDebug() << "parsing" << e.value;
+ NFA tok = RE2NFA(macros, cfg.maxInputSet, cfg.caseSensitivity).parse(e.value, &errCol);
+ if (tok.isEmpty()) {
+ qWarning("Parse error in line %d column %d while parsing token %s", e.lineNumber, errCol, e.key.toLocal8Bit().constData());
+ return DFA();
+ }
+ tok.setTerminationSymbol(e.key);
+ tokens.append(tok);
+ }
+
+ NFA giganticStateMachine;
+ foreach (NFA nfa, tokens)
+ if (giganticStateMachine.isEmpty())
+ giganticStateMachine = nfa;
+ else
+ giganticStateMachine = NFA::createAlternatingNFA(giganticStateMachine, nfa);
+
+ DFA result = giganticStateMachine.toDFA().minimize();
+ if (cfg.cache) {
+ QFileInfo ruleInfo(cfg.ruleFile);
+ QFileInfo cacheInfo(ruleInfo.baseName() + ".dfa");
+ QFile f(cacheInfo.absoluteFilePath());
+ f.open(QIODevice::WriteOnly | QIODevice::Truncate);
+ QDataStream stream(&f);
+ stream << result;
+ }
+ return result;
+}
+
+#if !defined(AUTOTEST)
+int main(int argc, char **argv)
+{
+ QCoreApplication app(argc, argv);
+ QString ruleFile;
+ Config cfg;
+
+ const QStringList arguments = app.arguments().mid(1);
+ cfg.debug = arguments.contains("-debug");
+ const bool testRules = arguments.contains("-test");
+ cfg.cache = arguments.contains("-cache");
+
+ foreach (const QString &arg, arguments)
+ if (!arg.startsWith(QLatin1Char('-'))) {
+ ruleFile = arg;
+ break;
+ }
+
+ if (ruleFile.isEmpty()) {
+ qWarning("usage: lexgen [-test rulefile");
+ qWarning(" ");
+ qWarning(" the -test option will cause lexgen to interpret standard input");
+ qWarning(" according to the specified rules and print out pairs of token and");
+ qWarning(" lexical element");
+ return 1;
+ }
+
+ if (!loadConfig(ruleFile, &cfg))
+ return 1;
+
+ DFA machine = generateMachine(cfg);
+ if (machine.isEmpty())
+ return 1;
+
+ if (testRules) {
+ qWarning("Testing:");
+ QString input = QTextStream(stdin).readAll();
+ /*
+ qDebug() << "NFA has" << machine.stateCount() << "states";
+ qDebug() << "Converting to DFA... (this may take a while)";
+ DFA dfa = machine.toDFA();
+ qDebug() << "DFA has" << dfa.count() << "states";
+ qDebug() << "Minimizing...";
+ dfa = dfa.minimize();
+ qDebug() << "Minimized DFA has" << dfa.count() << "states";
+ */
+ DFA dfa = machine;
+ if (cfg.debug)
+ qDebug() << "tokenizing" << input;
+ bool ok = false;
+ QList<Symbol> symbols = tokenize(dfa, input, &cfg, &ok);
+ if (symbols.isEmpty()) {
+ qWarning("No tokens produced!");
+ } else {
+ foreach (Symbol s, symbols)
+ qDebug() << s.token << ":" << s.lexem;
+ }
+ if (ok)
+ qDebug() << symbols.count() << "tokens produced.";
+ else
+ qDebug() << "Error while tokenizing!";
+ } else {
+ Generator gen(machine, cfg);
+ QTextStream(stdout)
+ << gen.generate();
+ }
+
+ return 0;
+}
+#endif
+
diff --git a/util/lexgen/nfa.cpp b/util/lexgen/nfa.cpp
new file mode 100644
index 0000000000..4adde3a5cf
--- /dev/null
+++ b/util/lexgen/nfa.cpp
@@ -0,0 +1,508 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include "nfa.h"
+#include <QSet>
+#include <limits.h>
+
+NFA NFA::createSingleInputNFA(InputType input)
+{
+ NFA result;
+ result.initialize(2);
+ result.addTransition(result.initialState, input, result.finalState);
+ return result;
+}
+
+NFA NFA::createSymbolNFA(const QString &symbol)
+{
+ NFA result = NFA::createSingleInputNFA(Epsilon);
+ result.states[result.finalState].symbol = symbol;
+ return result;
+}
+
+void NFA::initialize(int size)
+{
+ states.resize(size);
+ states.fill(State());
+ initialState = 0;
+ finalState = size - 1;
+}
+
+void NFA::addTransition(int from, InputType input, int to)
+{
+ assertValidState(from);
+ assertValidState(to);
+
+ states[from].transitions.insertMulti(input, to);
+}
+
+void NFA::copyFrom(const NFA &other, int baseState)
+{
+ assertValidState(baseState);
+ assertValidState(baseState + other.states.count() - 1);
+
+ for (int i = 0; i < other.states.count(); ++i) {
+ State s = other.states.at(i);
+
+ for (TransitionMap::Iterator it = s.transitions.begin(),
+ end = s.transitions.end(); it != end; ++it)
+ *it += baseState;
+
+ states[baseState + i] = s;
+ }
+}
+
+void NFA::initializeFromPair(const NFA &a, const NFA &b,
+ int *initialA, int *finalA,
+ int *initialB, int *finalB)
+{
+ initialize(a.states.count() + b.states.count() + 2);
+
+ int baseIdxA = 1;
+ int baseIdxB = 1 + a.states.count();
+
+ *initialA = a.initialState + baseIdxA;
+ *finalA = a.finalState + baseIdxA;
+
+ *initialB = b.initialState + baseIdxB;
+ *finalB = b.finalState + baseIdxB;
+
+ copyFrom(a, baseIdxA);
+ copyFrom(b, baseIdxB);
+}
+
+NFA NFA::createAlternatingNFA(const NFA &a, const NFA &b)
+{
+ NFA result;
+
+ int newInitialA, newFinalA,
+ newInitialB, newFinalB;
+
+ result.initializeFromPair(a, b, &newInitialA, &newFinalA,
+ &newInitialB, &newFinalB);
+
+ result.addTransition(result.initialState, Epsilon, newInitialA);
+ result.addTransition(result.initialState, Epsilon, newInitialB);
+
+ result.addTransition(newFinalA, Epsilon, result.finalState);
+ result.addTransition(newFinalB, Epsilon, result.finalState);
+
+ return result;
+}
+
+NFA NFA::createConcatenatingNFA(const NFA &a, const NFA &b)
+{
+ NFA result;
+
+ int initialA, finalA,
+ initialB, finalB;
+
+ result.initializeFromPair(a, b, &initialA, &finalA, &initialB, &finalB);
+
+ result.addTransition(result.initialState, Epsilon, initialA);
+ result.addTransition(finalA, Epsilon, initialB);
+ result.addTransition(finalB, Epsilon, result.finalState);
+ return result;
+}
+
+NFA NFA::createOptionalNFA(const NFA &a)
+{
+ NFA result;
+
+ result.initialize(a.states.count() + 2);
+
+ int baseIdxA = 1;
+ int initialA = a.initialState + baseIdxA;
+ int finalA = a.finalState + baseIdxA;
+
+ result.copyFrom(a, baseIdxA);
+
+ result.addTransition(result.initialState, Epsilon, initialA);
+ result.addTransition(result.initialState, Epsilon, result.finalState);
+
+ result.addTransition(finalA, Epsilon, initialA);
+ result.addTransition(finalA, Epsilon, result.finalState);
+
+ return result;
+}
+
+NFA NFA::createStringNFA(const QByteArray &str)
+{
+ NFA result;
+ foreach (char c, str) {
+ NFA ch = NFA::createSingleInputNFA(c);
+ if (result.isEmpty())
+ result = ch;
+ else
+ result = NFA::createConcatenatingNFA(result, ch);
+ }
+ return result;
+}
+
+NFA NFA::createSetNFA(const QSet<InputType> &set)
+{
+ NFA result;
+ result.initialize(set.count() + 2);
+
+ int state = 1;
+ for (QSet<InputType>::ConstIterator it = set.constBegin(), end = set.constEnd();
+ it != end; ++it, ++state) {
+ result.addTransition(result.initialState, Epsilon, state);
+ result.addTransition(state, *it, result.finalState);
+ }
+
+ /*
+ foreach (InputType input, set) {
+ NFA ch = NFA::createSingleInputNFA(input);
+ if (result.isEmpty())
+ result = ch;
+ else
+ result = NFA::createAlternatingNFA(result, ch);
+ }
+ */
+ return result;
+}
+
+NFA NFA::createZeroOrOneNFA(const NFA &a)
+{
+ NFA epsilonNFA = createSingleInputNFA(Epsilon);
+ return NFA::createAlternatingNFA(a, epsilonNFA);
+}
+
+NFA NFA::applyQuantity(const NFA &a, int minOccurrences, int maxOccurrences)
+{
+ NFA result = a;
+ NFA epsilonNFA = createSingleInputNFA(Epsilon);
+
+ if (minOccurrences == 0) {
+ result = NFA::createAlternatingNFA(result, epsilonNFA);
+ } else {
+ minOccurrences--;
+ }
+ maxOccurrences--;
+
+ for (int i = 0; i < minOccurrences; ++i)
+ result = NFA::createConcatenatingNFA(result, a);
+
+ for (int i = minOccurrences; i < maxOccurrences; ++i)
+ result = NFA::createConcatenatingNFA(result, NFA::createAlternatingNFA(a, epsilonNFA));
+
+ return result;
+}
+
+void NFA::debug()
+{
+ qDebug() << "NFA has" << states.count() << "states";
+ qDebug() << "initial state is" << initialState;
+ qDebug() << "final state is" << finalState;
+
+ for (int i = 0; i < states.count(); ++i) {
+ const State &s = states.at(i);
+ for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
+ end = s.transitions.constEnd(); it != end; ++it)
+ qDebug() << "transition from state" << i << "to" << it.value() << "through"
+ << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
+ if (!s.symbol.isEmpty())
+ qDebug() << "State" << i << "leads to symbol" << s.symbol;
+ }
+}
+
+// helper
+typedef QSet<int> DFAState;
+
+// that's a bad hash, but it's good enough for us
+// and it allows us to use the nice QHash API :)
+inline uint qHash(const DFAState &state)
+{
+ uint val = 0;
+ foreach (int s, state)
+ val |= qHash(s);
+ return val;
+}
+
+DFA NFA::toDFA() const
+{
+ DFA result;
+ result.reserve(states.count());
+
+ QHash<QString, int> symbolReferenceCounts;
+ {
+ QSet<int> symbolStates;
+ for (int i = 0; i < states.count(); ++i)
+ if (!states.at(i).symbol.isEmpty())
+ symbolStates.insert(i);
+
+ QHash<int, QString> epsilonStates;
+ for (int i = 0; i < states.count(); ++i) {
+ const State &s = states.at(i);
+ for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
+ transition != end; ++transition)
+ if (transition.key() == Epsilon && symbolStates.contains(transition.value()))
+ epsilonStates.insert(i, states.at(transition.value()).symbol);
+ }
+
+ int lastCount;
+ do {
+ lastCount = epsilonStates.count();
+ for (int i = 0; i < states.count(); ++i) {
+ const State &s = states.at(i);
+ for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
+ transition != end; ++transition)
+ if (transition.key() == Epsilon && epsilonStates.contains(transition.value()))
+ epsilonStates.insert(i, epsilonStates.value(transition.value()));
+ }
+
+ } while (lastCount != epsilonStates.count());
+
+ for (int i = 0; i < states.count(); ++i) {
+ const State &s = states.at(i);
+ for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
+ transition != end; ++transition) {
+ if (transition.key() == Epsilon)
+ continue;
+ if (symbolStates.contains(transition.value())) {
+ const QString symbol = states.at(transition.value()).symbol;
+ symbolReferenceCounts[symbol]++;
+ } else if (epsilonStates.contains(transition.value())) {
+ const QString symbol = epsilonStates.value(transition.value());
+ symbolReferenceCounts[symbol]++;
+ }
+ }
+ }
+ /*
+ for (QHash<QString, int>::ConstIterator symIt = symbolReferenceCounts.constBegin(), symEnd = symbolReferenceCounts.constEnd();
+ symIt != symEnd; ++symIt)
+ qDebug() << "symbol" << symIt.key() << "is reached" << symIt.value() << "times";
+ */
+ }
+
+
+ QSet<InputType> validInput;
+ foreach (const State &s, states)
+ for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
+ end = s.transitions.constEnd(); it != end; ++it)
+ if (it.key() != Epsilon)
+ validInput.insert(it.key());
+
+ // A DFA state can consist of multiple NFA states.
+ // the dfaStateMap maps from these to the actual
+ // state index within the resulting DFA vector
+ QHash<DFAState, int> dfaStateMap;
+ QStack<DFAState> pendingDFAStates;
+
+ DFAState startState = epsilonClosure(QSet<int>() << initialState);
+
+ result.resize(1);
+ dfaStateMap.insert(startState, 0);
+
+ pendingDFAStates.push(startState);
+
+ while (!pendingDFAStates.isEmpty()) {
+ DFAState state = pendingDFAStates.pop();
+// qDebug() << "processing" << state << "from the stack of pending states";
+
+ foreach (InputType input, validInput) {
+
+ QSet<int> reachableStates;
+
+ foreach (int nfaState, state) {
+ const TransitionMap &transitions = states.at(nfaState).transitions;
+ TransitionMap::ConstIterator it = transitions.find(input);
+ while (it != transitions.constEnd() && it.key() == input) {
+ reachableStates.insert(it.value());
+ ++it;
+ }
+ }
+
+ if (reachableStates.isEmpty())
+ continue;
+
+// qDebug() << "can reach" << reachableStates << "from input" << char(input);
+
+ QSet<int> closure = epsilonClosure(reachableStates);
+
+// qDebug() << "closure is" << closure;
+
+ if (!dfaStateMap.contains(closure)) {
+ int dfaState = result.count();
+ result.append(State());
+
+ QString symbol;
+ int refCount = INT_MAX;
+ foreach (int nfaState, closure)
+ if (!states.at(nfaState).symbol.isEmpty()) {
+// qDebug() << "closure also contains symbol" << states.at(nfaState).symbol;
+ QString candidate = states.at(nfaState).symbol;
+ int candidateRefCount =symbolReferenceCounts.value(candidate, INT_MAX);
+ if (candidateRefCount < refCount) {
+ refCount = candidateRefCount;
+ symbol = candidate;
+ }
+ }
+ if (!symbol.isEmpty())
+ result.last().symbol = symbol;
+
+ dfaStateMap.insert(closure, dfaState);
+
+ Q_ASSERT(!pendingDFAStates.contains(closure));
+ pendingDFAStates.prepend(closure);
+ }
+
+ result[dfaStateMap.value(state)].transitions.insert(input, dfaStateMap.value(closure));
+ }
+ }
+
+ return result;
+}
+
+QSet<int> NFA::epsilonClosure(const QSet<int> &initialClosure) const
+{
+ QSet<int> closure = initialClosure;
+ closure.reserve(closure.count() * 4);
+
+ QStack<int> stateStack;
+ stateStack.resize(closure.count());
+ qCopy(closure.constBegin(), closure.constEnd(), stateStack.begin());
+
+ while (!stateStack.isEmpty()) {
+ int t = stateStack.pop();
+ const TransitionMap &transitions = states.at(t).transitions;
+ TransitionMap::ConstIterator it = transitions.find(Epsilon);
+ while (it != transitions.constEnd() && it.key() == Epsilon) {
+ const int u = it.value();
+ if (!closure.contains(u)) {
+ closure.insert(u);
+ stateStack.push(u);
+ }
+ ++it;
+ }
+ }
+
+ return closure;
+}
+
+void NFA::setTerminationSymbol(const QString &symbol)
+{
+ states[finalState].symbol = symbol;
+}
+
+void DFA::debug() const
+{
+ qDebug() << "DFA has" << count() << "states";
+
+ for (int i = 0; i < count(); ++i) {
+ const State &s = at(i);
+ if (s.transitions.isEmpty()) {
+ qDebug() << "State" << i << "has no transitions";
+ } else {
+ for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
+ end = s.transitions.constEnd(); it != end; ++it)
+ qDebug() << "transition from state" << i << "to" << it.value() << "through"
+ << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
+ }
+ if (!s.symbol.isEmpty())
+ qDebug() << "State" << i << "leads to symbol" << s.symbol;
+ }
+
+}
+
+DFA DFA::minimize() const
+{
+ QVector<bool> inequivalentStates(count() * count());
+ inequivalentStates.fill(false);
+
+ for (int i = 0; i < count(); ++i)
+ for (int j = 0; j < i; ++j) {
+ if (i != j && at(i).symbol != at(j).symbol)
+ inequivalentStates[i * count() + j] = true;
+ }
+
+ bool done;
+ do {
+ done = true;
+ for (int i = 0; i < count(); ++i)
+ for (int j = 0; j < count(); ++j) {
+ if (i == j)
+ continue;
+
+ if (inequivalentStates[i * count() + j])
+ continue;
+
+ if (at(i).transitions.keys() != at(j).transitions.keys()) {
+ inequivalentStates[i * count() + j] = true;
+ done = false;
+ continue;
+ }
+
+ foreach (InputType a, at(i).transitions.keys()) {
+ int r = at(i).transitions.value(a, -1);
+ if (r == -1)
+ continue;
+ int s = at(j).transitions.value(a, -1);
+ if (s == -1)
+ continue;
+
+ if (inequivalentStates[r * count() + s]
+ || r == s) {
+ inequivalentStates[i * count() + j] = true;
+ done = false;
+ break;
+ }
+ }
+ }
+ } while (!done);
+
+ QHash<int, int> statesToEliminate;
+ for (int i = 0; i < count(); ++i)
+ for (int j = 0; j < i; ++j)
+ if (!inequivalentStates[i * count() + j]) {
+ statesToEliminate.insertMulti(i, j);
+ }
+
+ /*
+ qDebug() << "states to eliminiate:" << statesToEliminate.count();;
+ qDebug() << "merging" << statesToEliminate;
+ debug();
+ */
+
+ return *this;
+}
+
+
diff --git a/util/lexgen/nfa.h b/util/lexgen/nfa.h
new file mode 100644
index 0000000000..02657b33eb
--- /dev/null
+++ b/util/lexgen/nfa.h
@@ -0,0 +1,127 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef NFA_H
+#define NFA_H
+
+#include <QMap>
+#include <QHash>
+#include <QString>
+#include <QVector>
+#include <QDebug>
+#include <QStack>
+#include <QByteArray>
+
+#include "global.h"
+
+typedef QHash<InputType, int> TransitionMap;
+
+struct State
+{
+ QString symbol;
+ TransitionMap transitions;
+};
+
+inline QDataStream &operator<<(QDataStream &stream, const State &state)
+{
+ return stream << state.symbol << state.transitions;
+}
+
+inline QDataStream &operator>>(QDataStream &stream, State &state)
+{
+ return stream >> state.symbol >> state.transitions;
+}
+
+struct DFA : public QVector<State>
+{
+ void debug() const;
+ DFA minimize() const;
+};
+
+class NFA
+{
+public:
+ static NFA createSingleInputNFA(InputType input);
+ static NFA createSymbolNFA(const QString &symbol);
+ static NFA createAlternatingNFA(const NFA &a, const NFA &b);
+ static NFA createConcatenatingNFA(const NFA &a, const NFA &b);
+ static NFA createOptionalNFA(const NFA &a);
+
+ // convenience
+ static NFA createStringNFA(const QByteArray &str);
+ static NFA createSetNFA(const QSet<InputType> &set);
+ static NFA createZeroOrOneNFA(const NFA &a);
+ static NFA applyQuantity(const NFA &a, int minOccurrences, int maxOccurrences);
+
+ void setTerminationSymbol(const QString &symbol);
+
+ DFA toDFA() const;
+
+ inline bool isEmpty() const { return states.isEmpty(); }
+ inline int stateCount() const { return states.count(); }
+
+ void debug();
+
+private:
+ void initialize(int size);
+ void addTransition(int from, InputType input, int to);
+ void copyFrom(const NFA &other, int baseState);
+
+ void initializeFromPair(const NFA &a, const NFA &b,
+ int *initialA, int *finalA,
+ int *initialB, int *finalB);
+
+ QSet<int> epsilonClosure(const QSet<int> &initialClosure) const;
+
+ inline void assertValidState(int state)
+ { Q_UNUSED(state); Q_ASSERT(state >= 0); Q_ASSERT(state < states.count()); }
+
+#if defined(AUTOTEST)
+public:
+#endif
+ int initialState;
+ int finalState;
+
+ QVector<State> states;
+};
+
+#endif // NFA_H
+
diff --git a/util/lexgen/re2nfa.cpp b/util/lexgen/re2nfa.cpp
new file mode 100644
index 0000000000..77cf5019fe
--- /dev/null
+++ b/util/lexgen/re2nfa.cpp
@@ -0,0 +1,547 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "re2nfa.h"
+#include "tokenizer.cpp"
+
+RE2NFA::RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs)
+ : macros(macros), index(0), errorColumn(-1), maxInputSet(maxInputSet), caseSensitivity(cs)
+{
+}
+
+NFA RE2NFA::parse(const QString &expression, int *errCol)
+{
+ tokenize(expression);
+
+ if (symbols.isEmpty())
+ return NFA();
+
+ index = 0;
+
+ NFA result = parseExpr();
+ if (result.isEmpty()) {
+ if (errCol)
+ *errCol = errorColumn;
+ }
+ return result;
+}
+
+NFA RE2NFA::parseExpr()
+{
+ NFA value = parseBranch();
+ while (test(TOK_OR)) {
+ NFA rhs = parseBranch();
+ value = NFA::createAlternatingNFA(value, rhs);
+ }
+ return value;
+}
+
+NFA RE2NFA::parseBranch()
+{
+ NFA value = parsePiece();
+ if (!hasNext())
+ return value;
+ NFA next;
+ do {
+ next = parsePiece();
+ if (!next.isEmpty())
+ value = NFA::createConcatenatingNFA(value, next);
+ } while (!next.isEmpty() && hasNext());
+ return value;
+}
+
+NFA RE2NFA::parsePiece()
+{
+ NFA atom = parseAtom();
+ if (atom.isEmpty() || !hasNext())
+ return atom;
+ return parseMaybeQuantifier(atom);
+}
+
+NFA RE2NFA::parseAtom()
+{
+ // ####
+ switch (next()) {
+ case TOK_STRING:
+ return createCharNFA();
+ case TOK_LPAREN: {
+ NFA subExpr = parseExpr();
+ next(TOK_RPAREN);
+ return subExpr;
+ }
+ case TOK_LBRACE: {
+ QString macroName = lexemUntil(TOK_RBRACE);
+ QMap<QString, NFA>::ConstIterator macro = macros.find(macroName);
+ if (macro == macros.end()) {
+ qWarning("Unknown macro '%s' - probably used before defined", qPrintable(macroName));
+ return NFA();
+ }
+ return *macro;
+ }
+ case TOK_LBRACKET: {
+ NFA set = parseSet();
+ next(TOK_RBRACKET);
+ return set;
+ }
+ case TOK_SEQUENCE:
+ return parseSet2();
+ case TOK_DOT:
+ return NFA::createSetNFA(maxInputSet);
+ default:
+ prev();
+ return NFA();
+ }
+}
+
+NFA RE2NFA::parseMaybeQuantifier(const NFA &nfa)
+{
+ // ####
+ switch (next()) {
+ case TOK_STAR:
+ return NFA::createOptionalNFA(nfa);
+ case TOK_QUESTION:
+ return NFA::createZeroOrOneNFA(nfa);
+ case TOK_PLUS:
+ return NFA::createConcatenatingNFA(nfa, NFA::createOptionalNFA(nfa));
+ case TOK_LBRACE: {
+ const int rewind = index - 1;
+
+ QString lexemBeforeComma;
+ QString lexemAfterComma;
+ bool seenComma = false;
+ forever {
+ if (test(TOK_COMMA)) {
+ if (seenComma) {
+ errorColumn = symbol().column;
+ return NFA();
+ }
+ seenComma = true;
+ } else if (test(TOK_RBRACE)) {
+ break;
+ } else {
+ next(TOK_STRING);
+ if (seenComma)
+ lexemAfterComma += symbol().lexem;
+ else
+ lexemBeforeComma += symbol().lexem;
+ }
+ }
+ bool isNumber = false;
+ int min = lexemBeforeComma.toInt(&isNumber);
+ if (!isNumber) {
+ index = rewind;
+ return nfa;
+ }
+ int max = min;
+ if (seenComma) {
+ max = lexemAfterComma.toInt(&isNumber);
+ if (!isNumber) {
+ errorColumn = symbol().column;
+ return NFA();
+ }
+ }
+ return NFA::applyQuantity(nfa, min, max);
+ }
+ default:
+ prev();
+ return nfa;
+ }
+}
+
+NFA RE2NFA::parseSet()
+{
+ QSet<InputType> set;
+ bool negate = false;
+
+ next(TOK_STRING);
+
+ do {
+ Q_ASSERT(symbol().lexem.length() == 1);
+ // ###
+ QChar ch = symbol().lexem.at(0);
+ if (set.isEmpty() && ch == QLatin1Char('^')) {
+ negate = true;
+ continue;
+ }
+
+ // look ahead for ranges like a-z
+ bool rangeFound = false;
+ if (test(TOK_STRING)) {
+ if (symbol().lexem.length() == 1
+ && symbol().lexem.at(0) == QLatin1Char('-')) {
+ next(TOK_STRING);
+ Q_ASSERT(symbol().lexem.length() == 1);
+ QChar last = symbol().lexem.at(0);
+
+ if (ch.unicode() > last.unicode())
+ qSwap(ch, last);
+
+ for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ set.insert(QChar(i).toLower().unicode());
+ } else {
+ set.insert(i);
+ }
+ }
+
+ rangeFound = true;
+ } else {
+ prev();
+ }
+ }
+
+ if (!rangeFound) {
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ set.insert(ch.toLower().unicode());
+ } else {
+ set.insert(ch.unicode());
+ }
+ }
+ } while (test(TOK_STRING));
+
+ if (negate) {
+ QSet<InputType> negatedSet = maxInputSet;
+ negatedSet.subtract(set);
+ set = negatedSet;
+ }
+
+ return NFA::createSetNFA(set);
+}
+
+NFA RE2NFA::parseSet2()
+{
+ QSet<InputType> set;
+ bool negate = false;
+
+ QString str = symbol().lexem;
+ // strip off brackets
+ str.chop(1);
+ str.remove(0, 1);
+
+ int i = 0;
+ while (i < str.length()) {
+ // ###
+ QChar ch = str.at(i++);
+ if (set.isEmpty() && ch == QLatin1Char('^')) {
+ negate = true;
+ continue;
+ }
+
+ // look ahead for ranges like a-z
+ bool rangeFound = false;
+ if (i < str.length() - 1 && str.at(i) == QLatin1Char('-')) {
+ ++i;
+ QChar last = str.at(i++);
+
+ if (ch.unicode() > last.unicode())
+ qSwap(ch, last);
+
+ for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ set.insert(QChar(i).toLower().unicode());
+ } else {
+ set.insert(i);
+ }
+ }
+
+ rangeFound = true;
+ }
+
+ if (!rangeFound) {
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ set.insert(ch.toLower().unicode());
+ } else {
+ set.insert(ch.unicode());
+ }
+ }
+ }
+
+ if (negate) {
+ QSet<InputType> negatedSet = maxInputSet;
+ negatedSet.subtract(set);
+ set = negatedSet;
+ }
+
+ return NFA::createSetNFA(set);
+}
+NFA RE2NFA::createCharNFA()
+{
+ NFA nfa;
+ // ####
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ nfa = NFA::createStringNFA(symbol().lexem.toLower().toLatin1());
+ } else {
+ nfa = NFA::createStringNFA(symbol().lexem.toLatin1());
+ }
+ return nfa;
+}
+
+static inline int skipQuote(const QString &str, int pos)
+{
+ while (pos < str.length()
+ && str.at(pos) != QLatin1Char('"')) {
+ if (str.at(pos) == QLatin1Char('\\')) {
+ ++pos;
+ if (pos >= str.length())
+ break;
+ }
+ ++pos;
+ }
+ if (pos < str.length())
+ ++pos;
+ return pos;
+}
+
+#if 0
+static const char*tokStr(Token t)
+{
+ switch (t) {
+ case TOK_INVALID: return "TOK_INVALID";
+ case TOK_STRING: return "TOK_STRING";
+ case TOK_LBRACE: return "TOK_LBRACE";
+ case TOK_RBRACE: return "TOK_RBRACE";
+ case TOK_LBRACKET: return "TOK_LBRACKET";
+ case TOK_RBRACKET: return "TOK_RBRACKET";
+ case TOK_LPAREN: return "TOK_LPAREN";
+ case TOK_RPAREN: return "TOK_RPAREN";
+ case TOK_COMMA: return "TOK_COMMA";
+ case TOK_STAR: return "TOK_STAR";
+ case TOK_OR: return "TOK_OR";
+ case TOK_QUESTION: return "TOK_QUESTION";
+ case TOK_DOT: return "TOK_DOT";
+ case TOK_PLUS: return "TOK_PLUS";
+ case TOK_SEQUENCE: return "TOK_SEQUENCE";
+ case TOK_QUOTED_STRING: return "TOK_QUOTED_STRING";
+ }
+ return "";
+}
+#endif
+
+void RE2NFA::tokenize(const QString &input)
+{
+ symbols.clear();
+#if 1
+ RegExpTokenizer tokenizer(input);
+ Symbol sym;
+ int tok = tokenizer.lex();
+ while (tok != -1) {
+ Symbol sym;
+ sym.token = static_cast<Token>(tok);
+ sym.lexem = input.mid(tokenizer.lexemStart, tokenizer.lexemLength);
+
+ if (sym.token == TOK_QUOTED_STRING) {
+ sym.lexem.chop(1);
+ sym.lexem.remove(0, 1);
+ sym.token = TOK_STRING;
+ }
+
+ if (sym.token == TOK_STRING || sym.token == TOK_SEQUENCE) {
+ for (int i = 0; i < sym.lexem.length(); ++i) {
+ if (sym.lexem.at(i) == '\\') {
+ if (i >= sym.lexem.length() - 1)
+ break;
+ QChar ch = sym.lexem.at(i + 1);
+ if (ch == QLatin1Char('n')) {
+ ch = '\n';
+ } else if (ch == QLatin1Char('r')) {
+ ch = '\r';
+ } else if (ch == QLatin1Char('t')) {
+ ch = '\t';
+ } else if (ch == QLatin1Char('f')) {
+ ch = '\f';
+ }
+ sym.lexem.replace(i, 2, ch);
+ }
+ }
+ }
+
+ /*
+ if (sym.token == TOK_SEQUENCE) {
+ Symbol s;
+ s.token = TOK_LBRACKET;
+ s.lexem = "[";
+ symbols.append(s);
+
+ for (int i = 1; i < sym.lexem.length() - 1; ++i) {
+ s.token = TOK_STRING;
+ s.lexem = sym.lexem.at(i);
+ symbols.append(s);
+ }
+
+ s.token = TOK_RBRACKET;
+ s.lexem = "]";
+ symbols.append(s);
+
+ tok = tokenizer.lex();
+ continue;
+ }
+ */
+
+ symbols.append(sym);
+ tok = tokenizer.lex();
+ }
+#else
+ int pos = 0;
+ bool insideSet = false;
+ while (pos < input.length()) {
+ QChar ch = input.at(pos);
+
+ Symbol sym;
+ sym.column = pos;
+ sym.token = TOK_INVALID;
+ sym.lexem = QString(ch);
+ switch (ch.toLatin1()) {
+ case '"': {
+ if (insideSet) {
+ sym.token = TOK_STRING;
+ sym.lexem = QString(ch);
+ symbols += sym;
+ ++pos;
+ continue;
+ }
+ if (pos + 1 >= input.length())
+ return;
+ int quoteEnd = skipQuote(input, pos + 1);
+ sym.token = TOK_STRING;
+ sym.lexem = input.mid(pos + 1, quoteEnd - pos - 2);
+ symbols += sym;
+ pos = quoteEnd;
+ continue;
+ }
+ case '{':
+ sym.token = (insideSet ? TOK_STRING : TOK_LBRACE);
+ break;
+ case '}':
+ sym.token = (insideSet ? TOK_STRING : TOK_RBRACE);
+ break;
+ case '[':
+ insideSet = true;
+ sym.token = TOK_LBRACKET;
+ break;
+ case ']':
+ insideSet = false;
+ sym.token = TOK_RBRACKET;
+ break;
+ case '(':
+ sym.token = (insideSet ? TOK_STRING : TOK_LPAREN);
+ break;
+ case ')':
+ sym.token = (insideSet ? TOK_STRING : TOK_RPAREN);
+ break;
+ case ',':
+ sym.token = (insideSet ? TOK_STRING : TOK_COMMA);
+ break;
+ case '*':
+ sym.token = (insideSet ? TOK_STRING : TOK_STAR);
+ break;
+ case '|':
+ sym.token = (insideSet ? TOK_STRING : TOK_OR);
+ break;
+ case '?':
+ sym.token = (insideSet ? TOK_STRING : TOK_QUESTION);
+ break;
+ case '.':
+ sym.token = (insideSet ? TOK_STRING : TOK_DOT);
+ break;
+ case '+':
+ sym.token = (insideSet ? TOK_STRING : TOK_PLUS);
+ break;
+ case '\\':
+ ++pos;
+ if (pos >= input.length())
+ return;
+ ch = input.at(pos);
+ if (ch == QLatin1Char('n')) {
+ ch = '\n';
+ } else if (ch == QLatin1Char('r')) {
+ ch = '\r';
+ } else if (ch == QLatin1Char('t')) {
+ ch = '\t';
+ } else if (ch == QLatin1Char('f')) {
+ ch = '\f';
+ }
+ // fall through
+ default:
+ sym.token = TOK_STRING;
+ sym.lexem = QString(ch);
+ symbols += sym;
+ ++pos;
+ continue;
+ }
+ symbols += sym;
+ ++pos;
+ }
+#endif
+#if 0
+ foreach (Symbol s, symbols) {
+ qDebug() << "Tok" << tokStr(s.token) << "lexem" << s.lexem;
+ }
+#endif
+}
+
+bool RE2NFA::next(Token t)
+{
+ if (hasNext() && next() == t)
+ return true;
+ errorColumn = symbol().column;
+ Q_ASSERT(false);
+ return false;
+}
+
+bool RE2NFA::test(Token t)
+{
+ if (index >= symbols.count())
+ return false;
+ if (symbols.at(index).token == t) {
+ ++index;
+ return true;
+ }
+ return false;
+}
+
+QString RE2NFA::lexemUntil(Token t)
+{
+ QString lexem;
+ while (hasNext() && next() != t)
+ lexem += symbol().lexem;
+ return lexem;
+}
+
diff --git a/util/lexgen/re2nfa.h b/util/lexgen/re2nfa.h
new file mode 100644
index 0000000000..57b8bde09e
--- /dev/null
+++ b/util/lexgen/re2nfa.h
@@ -0,0 +1,116 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef RE2NFA_H
+#define RE2NFA_H
+
+#include "nfa.h"
+#include <QSet>
+
+class RE2NFA
+{
+public:
+ RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs);
+
+ NFA parse(const QString &expression, int *errorColumn = 0);
+
+private:
+ NFA parseExpr();
+ NFA parseBranch();
+ NFA parsePiece();
+ NFA parseAtom();
+ NFA parseMaybeQuantifier(const NFA &nfa);
+ NFA parseSet();
+ NFA parseSet2();
+
+ NFA createCharNFA();
+
+private:
+ friend class RegExpTokenizer;
+
+ enum Token {
+ TOK_INVALID,
+ TOK_STRING,
+ TOK_LBRACE, // {
+ TOK_RBRACE, // }
+ TOK_LBRACKET, // [
+ TOK_RBRACKET, // ]
+ TOK_LPAREN, // (
+ TOK_RPAREN, // )
+ TOK_COMMA,
+ TOK_STAR,
+ TOK_OR,
+ TOK_QUESTION,
+ TOK_DOT,
+ TOK_PLUS,
+ TOK_SEQUENCE,
+ TOK_QUOTED_STRING
+ };
+
+ struct Symbol
+ {
+ inline Symbol() : token(TOK_INVALID), column(-1) {}
+ inline Symbol(Token t, const QString &l = QString()) : token(t), lexem(l), column(-1) {}
+ Token token;
+ QString lexem;
+ int column;
+ };
+
+ inline bool hasNext() const { return index < symbols.count(); }
+ inline Token next() { return symbols.at(index++).token; }
+ bool next(Token t);
+ bool test(Token t);
+ inline void prev() { index--; }
+ inline const Symbol &symbol() const { return symbols.at(index - 1); }
+ QString lexemUntil(Token t);
+
+ void tokenize(const QString &input);
+
+ QMap<QString, NFA> macros;
+ QVector<Symbol> symbols;
+ int index;
+ int errorColumn;
+ const QSet<InputType> maxInputSet;
+ Qt::CaseSensitivity caseSensitivity;
+};
+
+#endif // RE2NFA_H
+
diff --git a/util/lexgen/test.lexgen b/util/lexgen/test.lexgen
new file mode 100644
index 0000000000..fd532fd549
--- /dev/null
+++ b/util/lexgen/test.lexgen
@@ -0,0 +1,9 @@
+[Options]
+case-insensitive
+classname = TestScanner
+
+[Tokens]
+TOK_C = [abcd]
+TOK_B = [bc]
+TOK_A = a
+
diff --git a/util/lexgen/tests/testdata/backtrack1/input b/util/lexgen/tests/testdata/backtrack1/input
new file mode 100644
index 0000000000..f5099b509d
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack1/input
@@ -0,0 +1 @@
+LETX
diff --git a/util/lexgen/tests/testdata/backtrack1/output b/util/lexgen/tests/testdata/backtrack1/output
new file mode 100644
index 0000000000..6893deb014
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack1/output
@@ -0,0 +1 @@
+TOK_LET|LET
diff --git a/util/lexgen/tests/testdata/backtrack1/rules.lexgen b/util/lexgen/tests/testdata/backtrack1/rules.lexgen
new file mode 100644
index 0000000000..ade8a15546
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack1/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_LET = LET
+TOK_LETXX = LETXX
diff --git a/util/lexgen/tests/testdata/backtrack2/input b/util/lexgen/tests/testdata/backtrack2/input
new file mode 100644
index 0000000000..59ff5b7301
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack2/input
@@ -0,0 +1 @@
+LETXTRA
diff --git a/util/lexgen/tests/testdata/backtrack2/output b/util/lexgen/tests/testdata/backtrack2/output
new file mode 100644
index 0000000000..348b382818
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack2/output
@@ -0,0 +1,2 @@
+TOK_LET|LET
+TOK_XTRA|XTRA
diff --git a/util/lexgen/tests/testdata/backtrack2/rules.lexgen b/util/lexgen/tests/testdata/backtrack2/rules.lexgen
new file mode 100644
index 0000000000..6f16986e83
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack2/rules.lexgen
@@ -0,0 +1,4 @@
+[Tokens]
+TOK_LET = LET
+TOK_LETXX = LETXX
+TOK_XTRA = XTRA
diff --git a/util/lexgen/tests/testdata/casesensitivity/input b/util/lexgen/tests/testdata/casesensitivity/input
new file mode 100644
index 0000000000..72b7f4869c
--- /dev/null
+++ b/util/lexgen/tests/testdata/casesensitivity/input
@@ -0,0 +1 @@
+abcdAbcDABCDeFgEFGefgEfghiHIHihI
diff --git a/util/lexgen/tests/testdata/casesensitivity/output b/util/lexgen/tests/testdata/casesensitivity/output
new file mode 100644
index 0000000000..3a4e819060
--- /dev/null
+++ b/util/lexgen/tests/testdata/casesensitivity/output
@@ -0,0 +1,14 @@
+TOK_AB|ab
+TOK_CD|cd
+TOK_AB|Ab
+TOK_CD|cD
+TOK_AB|AB
+TOK_CD|CD
+TOK_EFG|eFg
+TOK_EFG|EFG
+TOK_EFG|efg
+TOK_EFG|Efg
+TOK_HI|hi
+TOK_HI|HI
+TOK_HI|Hi
+TOK_HI|hI
diff --git a/util/lexgen/tests/testdata/casesensitivity/rules.lexgen b/util/lexgen/tests/testdata/casesensitivity/rules.lexgen
new file mode 100644
index 0000000000..3347587ffe
--- /dev/null
+++ b/util/lexgen/tests/testdata/casesensitivity/rules.lexgen
@@ -0,0 +1,7 @@
+[Options]
+case-insensitive
+[Tokens]
+TOK_AB = ab
+TOK_CD = cd
+TOK_EFG = [e-g]{3}
+TOK_HI = [hi]{2}
diff --git a/util/lexgen/tests/testdata/comments/input b/util/lexgen/tests/testdata/comments/input
new file mode 100644
index 0000000000..03873e044a
--- /dev/null
+++ b/util/lexgen/tests/testdata/comments/input
@@ -0,0 +1 @@
+/* comment with stuff *//*another comment with * stars * inside*/
diff --git a/util/lexgen/tests/testdata/comments/output b/util/lexgen/tests/testdata/comments/output
new file mode 100644
index 0000000000..2395ad1873
--- /dev/null
+++ b/util/lexgen/tests/testdata/comments/output
@@ -0,0 +1,2 @@
+TOK_COMMENT|/* comment with stuff */
+TOK_COMMENT|/*another comment with * stars * inside*/
diff --git a/util/lexgen/tests/testdata/comments/rules.lexgen b/util/lexgen/tests/testdata/comments/rules.lexgen
new file mode 100644
index 0000000000..490c759cc5
--- /dev/null
+++ b/util/lexgen/tests/testdata/comments/rules.lexgen
@@ -0,0 +1,2 @@
+[Tokens]
+TOK_COMMENT = \/\*[^*]*\*+([^/*][^*]*\*+)*\/
diff --git a/util/lexgen/tests/testdata/dot/input b/util/lexgen/tests/testdata/dot/input
new file mode 100644
index 0000000000..e5b0ad6e91
--- /dev/null
+++ b/util/lexgen/tests/testdata/dot/input
@@ -0,0 +1 @@
+afbcxd
diff --git a/util/lexgen/tests/testdata/dot/output b/util/lexgen/tests/testdata/dot/output
new file mode 100644
index 0000000000..6a9afd4ced
--- /dev/null
+++ b/util/lexgen/tests/testdata/dot/output
@@ -0,0 +1,2 @@
+TOK_AB|afb
+TOK_CD|cxd
diff --git a/util/lexgen/tests/testdata/dot/rules.lexgen b/util/lexgen/tests/testdata/dot/rules.lexgen
new file mode 100644
index 0000000000..03873a71bf
--- /dev/null
+++ b/util/lexgen/tests/testdata/dot/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_AB = a.b
+TOK_CD = c.d
diff --git a/util/lexgen/tests/testdata/negation/input b/util/lexgen/tests/testdata/negation/input
new file mode 100644
index 0000000000..9447b8005d
--- /dev/null
+++ b/util/lexgen/tests/testdata/negation/input
@@ -0,0 +1 @@
+aycabd
diff --git a/util/lexgen/tests/testdata/negation/output b/util/lexgen/tests/testdata/negation/output
new file mode 100644
index 0000000000..0b73263fb9
--- /dev/null
+++ b/util/lexgen/tests/testdata/negation/output
@@ -0,0 +1,2 @@
+TOK_A|ayc
+TOK_B|abd
diff --git a/util/lexgen/tests/testdata/negation/rules.lexgen b/util/lexgen/tests/testdata/negation/rules.lexgen
new file mode 100644
index 0000000000..179810b3a0
--- /dev/null
+++ b/util/lexgen/tests/testdata/negation/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_A = a[^b]c
+TOK_B = abd
diff --git a/util/lexgen/tests/testdata/quoteinset/input b/util/lexgen/tests/testdata/quoteinset/input
new file mode 100644
index 0000000000..5a9b6804a9
--- /dev/null
+++ b/util/lexgen/tests/testdata/quoteinset/input
@@ -0,0 +1 @@
+"a
diff --git a/util/lexgen/tests/testdata/quoteinset/output b/util/lexgen/tests/testdata/quoteinset/output
new file mode 100644
index 0000000000..7ba8890d5d
--- /dev/null
+++ b/util/lexgen/tests/testdata/quoteinset/output
@@ -0,0 +1 @@
+TOK_QUOTEA|"a
diff --git a/util/lexgen/tests/testdata/quoteinset/rules.lexgen b/util/lexgen/tests/testdata/quoteinset/rules.lexgen
new file mode 100644
index 0000000000..9838276a7e
--- /dev/null
+++ b/util/lexgen/tests/testdata/quoteinset/rules.lexgen
@@ -0,0 +1,2 @@
+[Tokens]
+TOK_QUOTEA = ["]a
diff --git a/util/lexgen/tests/testdata/quotes/input b/util/lexgen/tests/testdata/quotes/input
new file mode 100644
index 0000000000..ac5445055d
--- /dev/null
+++ b/util/lexgen/tests/testdata/quotes/input
@@ -0,0 +1 @@
+quotedstring
diff --git a/util/lexgen/tests/testdata/quotes/output b/util/lexgen/tests/testdata/quotes/output
new file mode 100644
index 0000000000..c538e32d75
--- /dev/null
+++ b/util/lexgen/tests/testdata/quotes/output
@@ -0,0 +1 @@
+TOK_STR|quotedstring
diff --git a/util/lexgen/tests/testdata/quotes/rules.lexgen b/util/lexgen/tests/testdata/quotes/rules.lexgen
new file mode 100644
index 0000000000..d528cdd589
--- /dev/null
+++ b/util/lexgen/tests/testdata/quotes/rules.lexgen
@@ -0,0 +1,2 @@
+[Tokens]
+TOK_STR = "quotedstring"
diff --git a/util/lexgen/tests/testdata/simple/input b/util/lexgen/tests/testdata/simple/input
new file mode 100644
index 0000000000..acbe86c7c8
--- /dev/null
+++ b/util/lexgen/tests/testdata/simple/input
@@ -0,0 +1 @@
+abcd
diff --git a/util/lexgen/tests/testdata/simple/output b/util/lexgen/tests/testdata/simple/output
new file mode 100644
index 0000000000..a37a58ea31
--- /dev/null
+++ b/util/lexgen/tests/testdata/simple/output
@@ -0,0 +1,2 @@
+TOK_AB|ab
+TOK_CD|cd
diff --git a/util/lexgen/tests/testdata/simple/rules.lexgen b/util/lexgen/tests/testdata/simple/rules.lexgen
new file mode 100644
index 0000000000..5d958c429a
--- /dev/null
+++ b/util/lexgen/tests/testdata/simple/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_AB = ab
+TOK_CD = cd
diff --git a/util/lexgen/tests/testdata/subsets1/input b/util/lexgen/tests/testdata/subsets1/input
new file mode 100644
index 0000000000..47f66d1c94
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets1/input
@@ -0,0 +1 @@
+abaf
diff --git a/util/lexgen/tests/testdata/subsets1/output b/util/lexgen/tests/testdata/subsets1/output
new file mode 100644
index 0000000000..75dd9361bc
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets1/output
@@ -0,0 +1,2 @@
+TOK_AB|ab
+TOK_AZ|af
diff --git a/util/lexgen/tests/testdata/subsets1/rules.lexgen b/util/lexgen/tests/testdata/subsets1/rules.lexgen
new file mode 100644
index 0000000000..94f51a91e2
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets1/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_AB = ab
+TOK_AZ = a[a-z]
diff --git a/util/lexgen/tests/testdata/subsets2/input b/util/lexgen/tests/testdata/subsets2/input
new file mode 100644
index 0000000000..4d0dc3ad82
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets2/input
@@ -0,0 +1 @@
+abd
diff --git a/util/lexgen/tests/testdata/subsets2/output b/util/lexgen/tests/testdata/subsets2/output
new file mode 100644
index 0000000000..d5a7bc5bb7
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets2/output
@@ -0,0 +1,3 @@
+TOK_A|a
+TOK_B|b
+TOK_D|d
diff --git a/util/lexgen/tests/testdata/subsets2/rules.lexgen b/util/lexgen/tests/testdata/subsets2/rules.lexgen
new file mode 100644
index 0000000000..e0a76294bc
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets2/rules.lexgen
@@ -0,0 +1,4 @@
+[Tokens]
+TOK_D = [abcd]
+TOK_B = [bc]
+TOK_A = a
diff --git a/util/lexgen/tests/tests.pro b/util/lexgen/tests/tests.pro
new file mode 100644
index 0000000000..eb04439a13
--- /dev/null
+++ b/util/lexgen/tests/tests.pro
@@ -0,0 +1,6 @@
+CONFIG += qtestlib
+SOURCES += tst_lexgen.cpp
+TARGET = tst_lexgen
+include(../lexgen.pri)
+QT = core
+DEFINES += SRCDIR=\\\"$$PWD\\\"
diff --git a/util/lexgen/tests/tst_lexgen.cpp b/util/lexgen/tests/tst_lexgen.cpp
new file mode 100644
index 0000000000..c80de646d6
--- /dev/null
+++ b/util/lexgen/tests/tst_lexgen.cpp
@@ -0,0 +1,285 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QtTest/QtTest>
+#define AUTOTEST
+#include "../main.cpp"
+
+class tst_LexGen : public QObject
+{
+ Q_OBJECT
+private slots:
+ void nfa_singleInput();
+ void nfa_alternating();
+ void nfa_concatenating();
+ void nfa_optional();
+ void nfa_toDFA_data();
+ void nfa_toDFA();
+ void lexgen_data();
+ void lexgen();
+};
+
+void tst_LexGen::nfa_singleInput()
+{
+ NFA nfa = NFA::createSingleInputNFA('a');
+
+ QCOMPARE(nfa.initialState, 0);
+ QCOMPARE(nfa.finalState, 1);
+
+ QCOMPARE(nfa.states.count(), 2);
+
+ QCOMPARE(nfa.states.at(0).transitions.count(), 1);
+ QVERIFY(nfa.states.at(0).transitions.contains('a'));
+ QCOMPARE(nfa.states.at(0).transitions.values('a').count(), 1);
+ QCOMPARE(nfa.states.at(0).transitions.value('a'), nfa.finalState);
+
+ QVERIFY(nfa.states.at(1).transitions.isEmpty());
+}
+
+void tst_LexGen::nfa_alternating()
+{
+ NFA a = NFA::createSingleInputNFA('a');
+ NFA b = NFA::createSingleInputNFA('b');
+ NFA nfa = NFA::createAlternatingNFA(a, b);
+
+ const int initialA = 1;
+ const int finalA = 2;
+
+ const int initialB = 3;
+ const int finalB = 4;
+
+ QCOMPARE(nfa.states.count(), 6);
+
+ QCOMPARE(nfa.initialState, 0);
+ QCOMPARE(nfa.finalState, 5);
+
+ QList<int> initialTransitions = nfa.states.at(0).transitions.values(Epsilon);
+ QCOMPARE(initialTransitions.count(), 2);
+ QVERIFY(initialTransitions.contains(initialA));
+ QVERIFY(initialTransitions.contains(initialB));
+
+ // no need to test the individual a and b NFAs, the other
+ // autotest already takes care of that. Just check whether
+ // the epsilon transitions to the final state exist.
+
+ QCOMPARE(nfa.states.at(finalA).transitions.count(), 1);
+ QCOMPARE(nfa.states.at(finalA).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(finalA).transitions.value(Epsilon), nfa.finalState);
+
+ QCOMPARE(nfa.states.at(finalB).transitions.count(), 1);
+ QCOMPARE(nfa.states.at(finalB).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(finalB).transitions.value(Epsilon), nfa.finalState);
+}
+
+void tst_LexGen::nfa_concatenating()
+{
+ NFA a = NFA::createSingleInputNFA('a');
+ NFA b = NFA::createSingleInputNFA('b');
+ NFA nfa = NFA::createConcatenatingNFA(a, b);
+
+ const int initialA = 1;
+ const int finalA = 2;
+
+ const int initialB = 3;
+ const int finalB = 4;
+
+ QCOMPARE(nfa.states.count(), 6);
+
+ QCOMPARE(nfa.initialState, 0);
+ QCOMPARE(nfa.finalState, 5);
+
+ QCOMPARE(nfa.states.at(0).transitions.count(), 1);
+ QCOMPARE(nfa.states.at(0).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(0).transitions.value(Epsilon), initialA);
+
+ QCOMPARE(nfa.states.at(finalA).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(finalA).transitions.value(Epsilon), initialB);
+
+ QCOMPARE(nfa.states.at(finalB).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(finalB).transitions.value(Epsilon), nfa.finalState);
+}
+
+void tst_LexGen::nfa_optional()
+{
+ NFA a = NFA::createSingleInputNFA('a');
+ NFA nfa = NFA::createOptionalNFA(a);
+
+ const int initialA = 1;
+ const int finalA = 2;
+
+ QCOMPARE(nfa.states.count(), 4);
+
+ QCOMPARE(nfa.initialState, 0);
+ QCOMPARE(nfa.finalState, 3);
+
+ QCOMPARE(nfa.states.at(0).transitions.count(), 2);
+ QList<int> initialTransitions = nfa.states.at(0).transitions.values(Epsilon);
+ QVERIFY(initialTransitions.contains(nfa.finalState));
+ QVERIFY(initialTransitions.contains(initialA));
+
+ QList<int> finalEpsilonATransitions = nfa.states.at(finalA).transitions.values(Epsilon);
+ QVERIFY(finalEpsilonATransitions.contains(initialA));
+ QVERIFY(finalEpsilonATransitions.contains(nfa.finalState));
+}
+
+Q_DECLARE_METATYPE(NFA);
+Q_DECLARE_METATYPE(DFA);
+
+void tst_LexGen::nfa_toDFA_data()
+{
+ QTest::addColumn<NFA>("nfa");
+ QTest::addColumn<DFA>("expectedDFA");
+
+ NFA a = NFA::createSingleInputNFA('a');
+ NFA b = NFA::createSingleInputNFA('b');
+ NFA c = NFA::createSingleInputNFA('c');
+
+ NFA nfa;
+ DFA dfa;
+
+ dfa.clear();
+ dfa.resize(3);
+ dfa[0].transitions.insert('a', 1);
+ dfa[1].transitions.insert('b', 2);
+
+ nfa = NFA::createConcatenatingNFA(a, b);
+
+ QTest::newRow("simple concat") << nfa << dfa;
+
+ dfa.clear();
+ dfa.resize(3);
+ dfa[0].transitions.insert('a', 1);
+ dfa[0].transitions.insert('b', 2);
+
+ nfa = NFA::createAlternatingNFA(a, b);
+
+ QTest::newRow("simple alternate") << nfa << dfa;
+
+}
+
+void tst_LexGen::nfa_toDFA()
+{
+ QFETCH(NFA, nfa);
+ QFETCH(DFA, expectedDFA);
+
+ DFA dfa = nfa.toDFA();
+
+ QCOMPARE(dfa.count(), expectedDFA.count());
+ for (int i = 0; i < dfa.count(); ++i) {
+ if (dfa.at(i).transitions != expectedDFA.at(i).transitions) {
+ qDebug() << "DFAs differ in state" << i;
+ qDebug() << "NFA:";
+ nfa.debug();
+ qDebug() << "Actual DFA:";
+ dfa.debug();
+ qDebug() << "Expected DFA:";
+ expectedDFA.debug();
+ QVERIFY(false);
+ }
+ }
+}
+
+void tst_LexGen::lexgen_data()
+{
+ QTest::addColumn<QString>("ruleFile");
+ QTest::addColumn<QString>("input");
+ QTest::addColumn<QString>("expectedOutput");
+
+ QDir d(QString(SRCDIR));
+ d.cd("testdata");
+ foreach (QString test, d.entryList(QDir::Dirs | QDir::NoDotAndDotDot)) {
+ QString dir = d.absoluteFilePath(test) + '/';
+ QTest::newRow(qPrintable(test))
+ << dir + "rules.lexgen"
+ << dir + "input"
+ << dir + "output"
+ ;
+ }
+}
+
+void tst_LexGen::lexgen()
+{
+ QFETCH(QString, ruleFile);
+ QFETCH(QString, input);
+ QFETCH(QString, expectedOutput);
+
+ Config conf;
+ QVERIFY(loadConfig(ruleFile, &conf));
+ DFA dfa = generateMachine(conf);
+ QVERIFY(!dfa.isEmpty());
+ conf.debug = true;
+
+ QFile f(input);
+ QVERIFY(f.open(QIODevice::ReadOnly));
+ input = QString::fromUtf8(f.readAll());
+ f.close();
+ if (input.endsWith(QLatin1Char('\n')))
+ input.chop(1);
+// machine.debug();
+ bool ok = false;
+ QList<Symbol> symbols = tokenize(dfa, input, &conf, &ok);
+ QVERIFY(ok);
+ f.setFileName(expectedOutput);
+ QVERIFY(f.open(QIODevice::ReadOnly));
+ QStringList lines;
+ while (!f.atEnd()) {
+ QString line = QString::fromUtf8(f.readLine());
+ if (line.endsWith(QLatin1Char('\n')))
+ line.chop(1);
+ lines << line;
+ }
+ f.close();
+
+// dfa.debug();
+ QCOMPARE(lines.count(), symbols.count());
+
+ for (int i = 0; i < lines.count(); ++i) {
+ QStringList l = lines.at(i).split(QChar::fromLatin1('|'));
+ QCOMPARE(l.count(), 2);
+ QString expectedToken = l.at(0);
+ QString expectedLexem = l.at(1);
+ QCOMPARE(symbols.at(i).token, expectedToken);
+ QCOMPARE(symbols.at(i).lexem, expectedLexem);
+ }
+}
+
+QTEST_MAIN(tst_LexGen)
+#include "tst_lexgen.moc"
diff --git a/util/lexgen/tokenizer.cpp b/util/lexgen/tokenizer.cpp
new file mode 100644
index 0000000000..39fc7ca1ef
--- /dev/null
+++ b/util/lexgen/tokenizer.cpp
@@ -0,0 +1,237 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+// auto generated. DO NOT EDIT.
+class RegExpTokenizer
+{
+public:
+ RegExpTokenizer(const QString &inp);
+
+ inline QChar next() {
+ return (pos < input.length()) ? input.at(pos++) : QChar();
+ }
+ int lex();
+
+ QString input;
+ int pos;
+ int lexemStart;
+ int lexemLength;
+};
+
+RegExpTokenizer::RegExpTokenizer(const QString &inp)
+{
+ input = inp;
+ pos = 0;
+ lexemStart = 0;
+ lexemLength = 0;
+}
+
+
+int RegExpTokenizer::lex()
+{
+ lexemStart = pos;
+ lexemLength = 0;
+ int lastAcceptingPos = -1;
+ int token = -1;
+ QChar ch;
+
+ // initial state
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 33)
+ goto state_1;
+ if (ch.unicode() == 34)
+ goto state_2;
+ if (ch.unicode() >= 35 && ch.unicode() <= 39)
+ goto state_1;
+ if (ch.unicode() == 40) {
+ token = RE2NFA::TOK_LPAREN;
+ goto found;
+ }
+ if (ch.unicode() == 41) {
+ token = RE2NFA::TOK_RPAREN;
+ goto found;
+ }
+ if (ch.unicode() == 42) {
+ token = RE2NFA::TOK_STAR;
+ goto found;
+ }
+ if (ch.unicode() == 43) {
+ token = RE2NFA::TOK_PLUS;
+ goto found;
+ }
+ if (ch.unicode() == 44) {
+ token = RE2NFA::TOK_COMMA;
+ goto found;
+ }
+ if (ch.unicode() == 45)
+ goto state_1;
+ if (ch.unicode() == 46) {
+ token = RE2NFA::TOK_DOT;
+ goto found;
+ }
+ if (ch.unicode() >= 47 && ch.unicode() <= 62)
+ goto state_1;
+ if (ch.unicode() == 63) {
+ token = RE2NFA::TOK_QUESTION;
+ goto found;
+ }
+ if (ch.unicode() >= 64 && ch.unicode() <= 90)
+ goto state_1;
+ if (ch.unicode() == 91)
+ goto state_10;
+ if (ch.unicode() == 92)
+ goto state_11;
+ if (ch.unicode() >= 93 && ch.unicode() <= 122)
+ goto state_1;
+ if (ch.unicode() == 123) {
+ token = RE2NFA::TOK_LBRACE;
+ goto found;
+ }
+ if (ch.unicode() == 124) {
+ token = RE2NFA::TOK_OR;
+ goto found;
+ }
+ if (ch.unicode() == 125) {
+ token = RE2NFA::TOK_RBRACE;
+ goto found;
+ }
+ if (ch.unicode() >= 126)
+ goto state_1;
+ goto out;
+ state_1:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_STRING;
+ goto out;
+ state_2:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_STRING;
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 33)
+ goto state_15;
+ if (ch.unicode() == 34)
+ goto state_16;
+ if (ch.unicode() >= 35)
+ goto state_15;
+ goto out;
+ state_10:
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 91)
+ goto state_17;
+ if (ch.unicode() == 92)
+ goto state_18;
+ if (ch.unicode() == 93)
+ goto state_19;
+ if (ch.unicode() >= 94)
+ goto state_17;
+ goto out;
+ state_11:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_STRING;
+ ch = next();
+ if (ch.unicode() >= 1)
+ goto state_20;
+ goto out;
+ state_15:
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 33)
+ goto state_15;
+ if (ch.unicode() == 34)
+ goto state_16;
+ if (ch.unicode() >= 35)
+ goto state_15;
+ goto out;
+ state_16:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_QUOTED_STRING;
+ goto out;
+ state_17:
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 91)
+ goto state_17;
+ if (ch.unicode() == 92)
+ goto state_18;
+ if (ch.unicode() == 93)
+ goto state_19;
+ if (ch.unicode() >= 94)
+ goto state_17;
+ goto out;
+ state_18:
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 91)
+ goto state_17;
+ if (ch.unicode() == 92)
+ goto state_18;
+ if (ch.unicode() == 93)
+ goto state_21;
+ if (ch.unicode() >= 94)
+ goto state_17;
+ goto out;
+ state_19:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_SEQUENCE;
+ goto out;
+ state_20:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_STRING;
+ goto out;
+ state_21:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_SEQUENCE;
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 91)
+ goto state_17;
+ if (ch.unicode() == 92)
+ goto state_18;
+ if (ch.unicode() == 93)
+ goto state_19;
+ if (ch.unicode() >= 94)
+ goto state_17;
+ goto out;
+ found:
+ lastAcceptingPos = pos;
+
+ out:
+ if (lastAcceptingPos != -1) {
+ lexemLength = lastAcceptingPos - lexemStart;
+ pos = lastAcceptingPos;
+ }
+ return token;
+}
+