52 files changed, 3425 insertions, 0 deletions
diff --git a/util/lexgen/README b/util/lexgen/README
new file mode 100644
index 0000000000..9feb18d1e3
--- /dev/null
+++ b/util/lexgen/README
@@ -0,0 +1,16 @@
+Lexgen
+------
+
+This is a little tool to generate lexical scanners from a rather simplistic
+configuration file. We use it internally in Qt to generate the scanner for the
+CSS parser that is built into the toolkit (used for the widget styling and the
+HTML import into QTextDocument).
+
+Beware, it's very slow (in generating the code) and it may not generate what
+you want. But I like that it generates code that operates on QChar and friends.
+
+Use at your own risk ;-)
+
+
+--
+Simon Hausmann <simon.hausmann@nokia.com>
diff --git a/util/lexgen/configfile.cpp b/util/lexgen/configfile.cpp
new file mode 100644
index 0000000000..dab2f61686
--- /dev/null
+++ b/util/lexgen/configfile.cpp
@@ -0,0 +1,99 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include "configfile.h"
+
+#include <QFile>
+
+ConfigFile::SectionMap ConfigFile::parse(const QString &fileName)
+{
+    QFile f(fileName);
+    if (!f.open(QIODevice::ReadOnly))
+        return ConfigFile::SectionMap();
+    return parse(&f);
+}
+
+ConfigFile::SectionMap ConfigFile::parse(QIODevice *dev)
+{
+    SectionMap sections;
+    SectionMap::Iterator currentSection = sections.end();
+
+    ConfigFile::SectionMap result;
+    int currentLineNumber = 0;
+    while (!dev->atEnd()) {
+        QString line = QString::fromUtf8(dev->readLine()).trimmed();
+        ++currentLineNumber;
+
+        if (line.isEmpty() || line.startsWith(QLatin1Char('#')))
+            continue;
+
+        if (line.startsWith(QLatin1Char('['))) {
+            if (!line.endsWith(']')) {
+                qWarning("Syntax error at line %d: Missing ']' at start of new section.", currentLineNumber);
+                return SectionMap();
+            }
+            line.remove(0, 1);
+            line.chop(1);
+            const QString sectionName = line;
+            currentSection = sections.insert(sectionName, Section());
+            continue;
+        }
+
+        if (currentSection == sections.end()) {
+            qWarning("Syntax error at line %d: Entry found outside of any section.", currentLineNumber);
+            return SectionMap();
+        }
+
+        Entry e;
+        e.lineNumber = currentLineNumber;
+
+        int equalPos = line.indexOf(QLatin1Char('='));
+        if (equalPos == -1) {
+            e.key = line;
+        } else {
+            e.key = line;
+            e.key.truncate(equalPos);
+            e.key = e.key.trimmed();
+            e.value = line.mid(equalPos + 1).trimmed();
+        }
+        currentSection->append(e);
+    }
+    return sections;
+}
diff --git a/util/lexgen/configfile.h b/util/lexgen/configfile.h
new file mode 100644
index 0000000000..b8d483b916
--- /dev/null
+++ b/util/lexgen/configfile.h
@@ -0,0 +1,81 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef CONFIGFILE_H
+#define CONFIGFILE_H
+
+#include <QStringList>
+#include <QMap>
+#include <QVector>
+
+struct ConfigFile
+{
+    struct Entry
+    {
+        inline Entry() : lineNumber(-1) {}
+        int lineNumber;
+        QString key;
+        QString value;
+    };
+    struct Section : public QVector<Entry>
+    {
+        inline bool contains(const QString &key) const
+        {
+            for (int i = 0; i < count(); ++i)
+                if (at(i).key == key)
+                    return true;
+            return false;
+        }
+        inline QString value(const QString &key, const QString &defaultValue = QString()) const
+        {
+            for (int i = 0; i < count(); ++i)
+                if (at(i).key == key)
+                    return at(i).value;
+            return defaultValue;
+        }
+    };
+    typedef QMap<QString, Section> SectionMap;
+
+    static SectionMap parse(const QString &fileName);
+    static SectionMap parse(QIODevice *dev);
+};
+
+#endif // CONFIGFILE_H
+
diff --git a/util/lexgen/css2-simplified.lexgen b/util/lexgen/css2-simplified.lexgen
new file mode 100644
index 0000000000..53facb1d4a
--- /dev/null
+++ b/util/lexgen/css2-simplified.lexgen
@@ -0,0 +1,93 @@
+[Options]
+case-sensitive
+classname = QCssScanner_Generated
+
+[Code Generator Options]
+MapToCode[a-z] = (ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256
+TokenPrefix = QCss::
+FileHeader = ../moc/licenseheader.txt
+
+[Macros]
+escape          = \\[^\r\n\f0-9a-f]
+nmstart         = [_a-z]|{escape}
+nmchar          = [_a-z0-9-]|{escape}
+nl              = \n|\r\n|\r|\f
+string1         = \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
+string2         = \'([^\n\r\f\\']|\\{nl}|{escape})*\'
+invalid1        = \"([^\n\r\f\\"]|\\{nl}|{escape})*
+invalid2        = \'([^\n\r\f\\']|\\{nl}|{escape})*
+
+ident           = -?{nmstart}{nmchar}*
+name            = {nmchar}+
+num             = [0-9]+|[0-9]*"."[0-9]+
+string          = {string1}|{string2}
+invalid         = {invalid1}|{invalid2}
+url             = ([!#$%&*-~]|{escape})*
+s               = [ \t\r\n\f]
+w               = {s}*
+
+[Tokens]
+
+S = {s}+
+
+handleCommentStart() = \/\*
+
+CDO = "<!--"
+CDC = "-->"
+INCLUDES = "~="
+DASHMATCH = "|="
+
+LBRACE = {w}"{"
+PLUS = {w}"+"
+GREATER = {w}">"
+COMMA = {w}","
+
+STRING = {string}
+INVALID = {invalid}
+
+IDENT = {ident}
+
+HASH = "#"{name}
+
+ATKEYWORD_SYM = "@"{ident}
+
+EXCLAMATION_SYM = "!"
+
+#EMS = {num}em
+#EXS = {num}ex
+#LENGTH = {num}px
+#LENGTH = {num}cm
+#LENGTH = {num}mm
+#LENGTH = {num}in
+#LENGTH = {num}pt
+#LENGTH = {num}pc
+#ANGLE = {num}deg
+#ANGLE = {num}rad
+#ANGLE = {num}grad
+#TIME = {num}ms
+#TIME = {num}s
+#FREQ = {num}hz
+#FREQ = {num}khz
+#DIMENSION = {num}{ident}
+LENGTH = {num}{ident}
+
+PERCENTAGE = {num}%
+NUMBER = {num}
+
+#URI = "url("{w}{string}{w}")"
+#URI = "url("{w}{url}{w}")"
+FUNCTION = {ident}"("
+
+COLON = :
+SEMICOLON = ;
+RBRACE = \}
+SLASH = /
+MINUS = -
+DOT = \.
+STAR = \*
+LBRACKET = \[
+RBRACKET = \]
+EQUAL = \=
+LPAREN = \(
+RPAREN = \)
+OR = \|
diff --git a/util/lexgen/generator.cpp b/util/lexgen/generator.cpp
new file mode 100644
index 0000000000..7998861dfe
--- /dev/null
+++ b/util/lexgen/generator.cpp
@@ -0,0 +1,532 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "generator.h"
+
+#include <QFile>
+
+void Function::printDeclaration(CodeBlock &block, const QString &funcNamePrefix) const
+{
+    block << (iline ? "inline " : "") << signature(funcNamePrefix) << (iline ? QLatin1String(" {") : QLatin1String(";"));
+    if (!iline)
+        return;
+    
+    block.indent();
+    QString tmp = body;
+    if (tmp.endsWith(QLatin1Char('\n')))
+        tmp.chop(1);
+    foreach (QString line, tmp.split(QLatin1Char('\n')))
+        block << line;
+    block.outdent();
+    block << "}";
+}
+
+QString Function::signature(const QString &funcNamePrefix) const
+{
+   QString sig;
+   if (!rtype.isEmpty()) {
+       sig += rtype;
+       sig += QLatin1Char(' ');
+   }
+   sig += funcNamePrefix;
+   sig += fname;
+   if (cnst)
+       sig += " const";
+   return sig;
+}
+
+QString Function::definition() const
+{
+    if (iline)
+        return QString();
+
+    QString result;
+    result += signature();
+    result += QLatin1String("\n{\n");
+    
+    QString tmp = body;
+
+    if (tmp.endsWith(QLatin1Char('\n')))
+        tmp.chop(1);
+    if (!tmp.startsWith(QLatin1Char('\n')))
+        tmp.prepend("    ");
+
+    tmp.replace(QLatin1Char('\n'), QLatin1String("\n    "));
+
+    result += tmp;
+    
+    result += QLatin1String("\n}\n");
+
+    return result;
+}
+
+void Class::Section::printDeclaration(const Class *klass, CodeBlock &block) const
+{
+    foreach (Function ctor, constructors)
+        ctor.printDeclaration(block, klass->name());
+    
+    if (!constructors.isEmpty())
+        block.addNewLine();
+    
+    foreach (Function func, functions)
+        func.printDeclaration(block);
+    
+    if (!functions.isEmpty())
+        block.addNewLine();
+    
+    foreach (QString var, variables)
+        block << var << ';';
+}
+
+void Class::addConstructor(Access access, const QString &body, const QString &_args)
+{
+    Function ctor;
+    QString args = _args;
+    if (!args.startsWith(QLatin1Char('('))
+        && !args.endsWith(QLatin1Char(')'))) {
+        args.prepend('(');
+        args.append(')');
+    }
+    ctor.setName(args);
+    ctor.addBody(body);
+    sections[access].constructors.append(ctor);
+}
+
+QString Class::Section::definition(const Class *klass) const
+{
+    QString result;
+    
+    foreach (Function ctor, constructors) {
+        ctor.setName(klass->name() + "::" + klass->name() + ctor.name());
+        result += ctor.definition();
+        result += QLatin1Char('\n');
+    }
+    
+    foreach (Function func, functions) {
+        if (!func.hasBody()) continue;
+        func.setName(klass->name() + "::" + func.name());
+        result += func.definition();
+        result += QLatin1Char('\n');
+    }
+
+    return result;
+}
+
+QString Class::declaration() const
+{
+    CodeBlock block;
+    
+    block << QLatin1String("class ") << cname;
+    block << "{";
+    
+    if (!sections[PublicMember].isEmpty()) {
+        block << "public:";
+        block.indent();
+        sections[PublicMember].printDeclaration(this, block);
+        block.outdent();
+    }
+
+    if (!sections[ProtectedMember].isEmpty()) {
+        block << "protected:";
+        block.indent();
+        sections[ProtectedMember].printDeclaration(this, block);
+        block.outdent();
+    }
+    
+    if (!sections[PrivateMember].isEmpty()) {
+        block << "private:";
+        block.indent();
+        sections[PrivateMember].printDeclaration(this, block);
+        block.outdent();
+    }
+
+    block << "};";
+    block.addNewLine();
+    
+    return block.toString();
+}
+
+QString Class::definition() const
+{
+    return sections[PrivateMember].definition(this)
+           + sections[ProtectedMember].definition(this)
+           + sections[PublicMember].definition(this);
+}
+
+Generator::Generator(const DFA &_dfa, const Config &config)
+     : dfa(_dfa), cfg(config)
+{
+    QList<InputType> lst = cfg.maxInputSet.toList();
+    qSort(lst);
+    minInput = lst.first();
+    maxInput = lst.last();
+
+    ConfigFile::Section section = config.configSections.value("Code Generator Options");
+
+    foreach (ConfigFile::Entry entry, section) {
+        if (!entry.key.startsWith(QLatin1String("MapToCode["))
+            || !entry.key.endsWith(QLatin1Char(']')))
+            continue;
+        QString range = entry.key;
+        range.remove(0, qstrlen("MapToCode["));
+        range.chop(1);
+        if (range.length() != 3
+            || range.at(1) != QLatin1Char('-')) {
+            qWarning("Invalid range for char mapping function: %s", qPrintable(range));
+            continue;
+        }
+        TransitionSequence seq;
+        seq.first = range.at(0).unicode();
+        seq.last = range.at(2).unicode();
+        seq.testFunction = entry.value;
+        charFunctionRanges.append(seq);
+    }
+
+    QString tokenPrefix = section.value("TokenPrefix");
+    if (!tokenPrefix.isEmpty()) {
+        for (int i = 0; i < dfa.count(); ++i)
+            if (!dfa.at(i).symbol.isEmpty()
+                && !dfa.at(i).symbol.endsWith(QLatin1String("()")))
+                dfa[i].symbol.prepend(tokenPrefix);
+    }
+
+    headerFileName = section.value("FileHeader");
+}
+
+static inline bool adjacentKeys(int left, int right) { return left + 1 == right; }
+//static inline bool adjacentKeys(const InputType &left, const InputType &right)
+//{ return left.val + 1 == right.val; }
+
+static QVector<Generator::TransitionSequence> convertToSequences(const TransitionMap &transitions)
+{
+    QVector<Generator::TransitionSequence> sequences;
+    if (transitions.isEmpty())
+        return sequences;
+
+    QList<InputType> keys = transitions.keys();
+    qSort(keys);
+    int i = 0;
+    Generator::TransitionSequence sequence;
+    sequence.first = keys.at(0);
+    ++i;
+    for (; i < keys.count(); ++i) {
+        if (adjacentKeys(keys.at(i - 1), keys.at(i))
+            && transitions.value(keys.at(i)) == transitions.value(keys.at(i - 1))) {
+            continue;
+        }
+        sequence.last = keys.at(i - 1);
+        sequence.transition = transitions.value(sequence.last);
+        sequences.append(sequence);
+
+        sequence.first = keys.at(i);
+    }
+    sequence.last = keys.at(i - 1);
+    sequence.transition = transitions.value(sequence.last);
+    sequences.append(sequence);
+
+    return sequences;
+}
+
+QDebug &operator<<(QDebug &debug, const Generator::TransitionSequence &seq)
+{
+    return debug << "[first:" << seq.first << "; last:" << seq.last << "; transition:" << seq.transition
+                 << (seq.testFunction.isEmpty() ? QString() : QString(QString("; testfunction:" + seq.testFunction)))
+                 << "]";
+}
+
+bool Generator::isSingleReferencedFinalState(int i) const
+{
+    return backReferenceMap.value(i) == 1
+           && dfa.at(i).transitions.isEmpty()
+           && !dfa.at(i).symbol.isEmpty();
+}
+
+void Generator::generateTransitions(CodeBlock &body, const TransitionMap &transitions)
+{
+    if (transitions.isEmpty())
+        return;
+
+    QVector<TransitionSequence> sequences = convertToSequences(transitions);
+
+    bool needsCharFunction = false;
+    if (!charFunctionRanges.isEmpty()) {
+        int i = 0;
+        while (i < sequences.count()) {
+            const TransitionSequence &seq = sequences.at(i);
+            if (!seq.testFunction.isEmpty()) {
+                ++i;
+                continue;
+            }
+
+            foreach (TransitionSequence range, charFunctionRanges)
+                if (range.first >= seq.first && range.last <= seq.last) {
+                    needsCharFunction = true;
+
+                    TransitionSequence left, middle, right;
+
+                    left.first = seq.first;
+                    left.last = range.first - 1;
+                    left.transition = seq.transition;
+
+                    middle = range;
+                    middle.transition = seq.transition;
+
+                    right.first = range.last + 1;
+                    right.last = seq.last;
+                    right.transition = seq.transition;
+
+                    sequences.remove(i);
+                    if (left.last >= left.first) {
+                        sequences.insert(i, left);
+                        ++i;
+                    }
+                    sequences.insert(i, middle);
+                    ++i;
+                    if (right.last >= right.first) {
+                        sequences.insert(i, right);
+                        ++i;
+                    }
+
+                    i = -1;
+                    break;
+                }
+
+            ++i;
+        }
+    }
+
+    //qDebug() << "sequence count" << sequences.count();
+    //qDebug() << sequences;
+
+    if (sequences.count() < 10
+        || sequences.last().last == maxInput
+        || needsCharFunction) {
+        foreach (TransitionSequence seq, sequences) {
+            const bool embedFinalState = isSingleReferencedFinalState(seq.transition);
+
+            QString brace;
+            if (embedFinalState)
+                brace = " {";
+
+            if (!seq.testFunction.isEmpty()) {
+                body << "if (" << seq.testFunction << ")" << brace;
+            } else if (seq.first == seq.last) {
+                body << "if (ch.unicode() == " << seq.first << ")" << brace;
+            } else {
+                if (seq.last < maxInput)
+                    body << "if (ch.unicode() >= " << seq.first
+                         << " && ch.unicode() <= " << seq.last << ")" << brace;
+                else
+                    body << "if (ch.unicode() >= " << seq.first << ")" << brace;
+            }
+            body.indent();
+            if (embedFinalState) {
+                body << "token = " << dfa.at(seq.transition).symbol << ";";
+                body << "goto found;";
+
+                body.outdent();
+                body << "}";
+            } else {
+                body << "goto state_" << seq.transition << ";";
+                body.outdent();
+            }
+        }
+    } else {
+        QList<InputType> keys = transitions.keys();
+        qSort(keys);
+
+        body << "switch (ch.unicode()) {";
+        body.indent();
+        for (int k = 0; k < keys.count(); ++k) {
+            const InputType key = keys.at(k);
+            const int trans = transitions.value(key);
+
+            QString keyStr;
+            if (key == '\\')
+                keyStr = QString("\'\\\\\'");
+            else if (key >= 48 && key < 127)
+                keyStr = QString('\'') + QChar::fromLatin1(char(key)) + QChar('\'');
+            else
+                keyStr = QString::number(key);
+
+            if (k < keys.count() - 1
+                && transitions.value(keys.at(k + 1)) == trans) {
+                body << "case " << keyStr << ":";
+            } else {
+                if (isSingleReferencedFinalState(trans)) {
+                    body << "case " << keyStr << ": token = " << dfa.at(trans).symbol << "; goto found;";
+                } else {
+                    body << "case " << keyStr << ": goto state_" << trans << ";";
+                }
+            }
+        }
+        body.outdent();
+        body << "}";
+    }
+}
+
+QString Generator::generate()
+{
+    Class klass(cfg.className);
+    
+    klass.addMember(Class::PublicMember, "QString input");
+    klass.addMember(Class::PublicMember, "int pos");
+    klass.addMember(Class::PublicMember, "int lexemStart");
+    klass.addMember(Class::PublicMember, "int lexemLength");
+    
+    {
+        CodeBlock body;
+        body << "input = inp;";
+        body << "pos = 0;";
+        body << "lexemStart = 0;";
+        body << "lexemLength = 0;";
+        klass.addConstructor(Class::PublicMember, body, "const QString &inp");
+    }
+    
+    {
+        Function next("QChar", "next()");
+        next.setInline(true);
+        if (cfg.caseSensitivity == Qt::CaseSensitive)
+            next.addBody("return (pos < input.length()) ? input.at(pos++) : QChar();");
+        else
+            next.addBody("return (pos < input.length()) ? input.at(pos++).toLower() : QChar();");
+        klass.addMember(Class::PublicMember, next);
+    }
+    
+    /*
+    {
+        Function lexem("QString", "lexem()");
+        lexem.setConst(true);
+        lexem.setInline(true);
+        lexem.addBody("return input.mid(lexemStart, lexemLength);");
+        klass.addMember(Class::PublicMember, lexem);
+    }
+    */
+
+    for (int i = 0; i < dfa.count(); ++i)
+        if (dfa.at(i).symbol.endsWith(QLatin1String("()"))) {
+            Function handlerFunc("int", dfa.at(i).symbol);
+            klass.addMember(Class::PublicMember, handlerFunc);
+        }
+
+    Function lexFunc;
+    lexFunc.setReturnType("int");
+    lexFunc.setName("lex()");
+    
+    CodeBlock body;
+    body << "lexemStart = pos;";
+    body << "lexemLength = 0;";
+    body << "int lastAcceptingPos = -1;";
+    body << "int token = -1;";
+    body << "QChar ch;";
+    body.addNewLine();
+
+    backReferenceMap.clear();
+    foreach (State s, dfa)
+        foreach (int state, s.transitions)
+            backReferenceMap[state]++;
+
+    bool haveSingleReferencedFinalState = false;
+
+    for (int i = 0; i < dfa.count(); ++i) {
+        if (isSingleReferencedFinalState(i)) {
+            haveSingleReferencedFinalState = true;
+            continue;
+        }
+
+        if (i > 0)
+            body << "state_" << i << ":";
+        else
+            body << "// initial state";
+
+        body.indent();
+
+        if (!dfa.at(i).symbol.isEmpty()) {
+            body << "lastAcceptingPos = pos;";
+            body << "token = " << dfa.at(i).symbol << ";";
+        }
+
+        body.outdent();
+
+        body.indent();
+        
+        if (!dfa.at(i).transitions.isEmpty()) {
+            body << "ch = next();";
+            generateTransitions(body, dfa.at(i).transitions);
+        }
+        
+        body << "goto out;";
+        
+        body.outdent();
+    }
+
+    if (haveSingleReferencedFinalState) {
+        body << "found:";
+        body << "lastAcceptingPos = pos;";
+        body.addNewLine();
+    }
+
+    body << "out:";
+    body << "if (lastAcceptingPos != -1) {";
+    body.indent();
+    body << "lexemLength = lastAcceptingPos - lexemStart;";
+    body << "pos = lastAcceptingPos;";
+    body.outdent();
+    body << "}";
+    body << "return token;";
+    
+    lexFunc.addBody(body);
+    
+    klass.addMember(Class::PublicMember, lexFunc);
+
+    QString header;
+    QFile headerFile(headerFileName);
+    if (!headerFileName.isEmpty()
+        && headerFile.exists()
+        && headerFile.open(QIODevice::ReadOnly)) {
+        header = QString::fromUtf8(headerFile.readAll());
+    }
+
+    header += QLatin1String("// auto generated. DO NOT EDIT.\n");
+    
+    return header + klass.declaration() + klass.definition();
+}
+
diff --git a/util/lexgen/generator.h b/util/lexgen/generator.h
new file mode 100644
index 0000000000..b9df8d74fa
--- /dev/null
+++ b/util/lexgen/generator.h
@@ -0,0 +1,221 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef GENERATOR_H
+#define GENERATOR_H
+
+#include <QTextStream>
+#include <QStringList>
+
+#include "nfa.h"
+
+class LineStream
+{
+private:
+    struct SharedStream
+    {
+        int ref;
+        QTextStream *stream;
+    };
+
+public:
+    LineStream(QTextStream *textStream)
+    {
+        shared = new SharedStream;
+        shared->ref = 1;
+        shared->stream = textStream;
+    }
+    LineStream(const LineStream &other)
+    {
+        shared = other.shared;
+        shared->ref++;
+    }
+    LineStream &operator=(const LineStream &other)
+    {
+        if (this == &other)
+            return *this;
+        LineStream copy(other); // keep refcount up
+        qSwap(*shared, *other.shared);
+        return *this;
+    }
+    ~LineStream()
+    {
+        if (!--shared->ref) {
+            (*shared->stream) << endl;
+            delete shared;
+        }
+    }
+    
+    template <typename T>
+    LineStream &operator<<(const T &value)
+    { (*shared->stream) << value; return *this; }
+    
+    SharedStream *shared;
+};
+
+class CodeBlock
+{
+public:
+    inline CodeBlock() { stream.setString(&output, QIODevice::WriteOnly); }
+
+    inline void indent() { indentStr += QLatin1String("    "); }
+    inline void outdent() { indentStr.remove(0, 4); }
+
+    template <typename T>
+    LineStream operator<<(const T &value)
+    { stream << indentStr; stream << value; return LineStream(&stream); }
+    
+    inline void addNewLine() { stream << endl; }
+    
+    inline QString toString() const { stream.flush(); return output; }
+    
+private:
+    QString output;
+    mutable QTextStream stream;
+    QString indentStr;
+};
+
+class Function
+{
+public:
+    inline Function(const QString &returnType, const QString &name)
+        : rtype(returnType), fname(name), iline(false), cnst(false) {}
+    inline Function() : iline(false), cnst(false) {}
+
+    inline void setName(const QString &name) { fname = name; }
+    inline QString name() const { return fname; }
+    
+    inline void setInline(bool i) { iline = i; }
+    inline bool isInline() const { return iline; }
+    
+    inline void setReturnType(const QString &type) { rtype = type; }
+    inline QString returnType() const { return rtype; }
+    
+    inline void addBody(const QString &_body) { body += _body; }
+    inline void addBody(const CodeBlock &block) { body += block.toString(); }
+    inline bool hasBody() const { return !body.isEmpty(); }
+    
+    inline void setConst(bool konst) { cnst = konst; }
+    inline bool isConst() const { return cnst; }
+    
+    void printDeclaration(CodeBlock &block, const QString &funcNamePrefix = QString()) const;
+    QString definition() const;
+
+private:
+    QString signature(const QString &funcNamePrefix = QString()) const;
+
+    QString rtype;
+    QString fname;
+    QString body;
+    bool iline;
+    bool cnst;
+};
+
+class Class
+{
+public:
+    enum Access { PublicMember, ProtectedMember, PrivateMember };
+
+    inline Class(const QString &name) : cname(name) {}
+
+    inline void setName(const QString &name) { cname = name; }
+    inline QString name() const { return cname; }
+    
+    inline void addMember(Access access, const QString &name)
+    { sections[access].variables.append(name); }
+    inline void addMember(Access access, const Function &func)
+    { sections[access].functions.append(func); }
+    
+    void addConstructor(Access access, const QString &body, const QString &args = QString());
+    inline void addConstructor(Access access, const CodeBlock &body, const QString &args = QString())
+    { addConstructor(access, body.toString(), args); }
+    
+    QString declaration() const;
+    QString definition() const;
+    
+private:
+    QString cname;
+    struct Section
+    {
+        QVector<Function> functions;
+        QStringList variables;
+        QVector<Function> constructors;
+        
+        inline bool isEmpty() const
+        { return functions.isEmpty() && variables.isEmpty() && constructors.isEmpty(); }
+        
+        void printDeclaration(const Class *klass, CodeBlock &block) const;
+        QString definition(const Class *klass) const;
+    };
+
+    Section sections[3];
+};
+
+class Generator
+{
+public:
+    Generator(const DFA &dfa, const Config &config);
+    
+    QString generate();
+    
+private:
+    void generateTransitions(CodeBlock &body, const TransitionMap &transitions);
+    bool isSingleReferencedFinalState(int i) const;
+
+    DFA dfa;
+    Config cfg;
+    InputType minInput;
+    InputType maxInput;
+    QHash<int, int> backReferenceMap;
+    QString headerFileName;
+public:
+    struct TransitionSequence
+    {
+        inline TransitionSequence() : first(-1), last(-1), transition(-1) {}
+        InputType first;
+        InputType last;
+        int transition;
+        QString testFunction;
+    };
+private:
+    QVector<TransitionSequence> charFunctionRanges;
+};
+
+#endif
diff --git a/util/lexgen/global.h b/util/lexgen/global.h
new file mode 100644
index 0000000000..49f993e8c6
--- /dev/null
+++ b/util/lexgen/global.h
@@ -0,0 +1,113 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef GLOBAL_H
+#define GLOBAL_H
+
+#include <QHash>
+#include <QDataStream>
+#include <QSet>
+
+#include "configfile.h"
+
+#if 1
+typedef int InputType;
+
+enum SpecialInputType {
+    DigitInput,
+    SpaceInput,
+    Letter
+};
+
+#else
+
+enum SpecialInputType {
+    NoSpecialInput = 0,
+    DigitInput,
+    SpaceInput,
+    LetterOrNumberInput
+};
+
+struct InputType
+{
+    inline InputType() : val(0), specialInput(NoSpecialInput) {}
+    inline InputType(const int &val) : val(val), specialInput(NoSpecialInput) {}
+
+    inline operator int() const { return val; }
+
+    inline bool operator==(const InputType &other) const
+    { return val == other.val; }
+    inline bool operator!=(const InputType &other) const
+    { return val != other.val; }
+
+    int val;
+    SpecialInputType specialInput;
+};
+
+inline int qHash(const InputType &t) { return qHash(t.val); }
+
+inline QDataStream &operator<<(QDataStream &stream, const InputType &i)
+{
+    return stream << i;
+}
+
+inline QDataStream &operator>>(QDataStream &stream, InputType &i)
+{
+    return stream >> i;
+}
+
+#endif
+
+const InputType Epsilon = -1;
+
+struct Config
+{
+    inline Config() : caseSensitivity(Qt::CaseSensitive), debug(false), cache(false) {}
+    QSet<InputType> maxInputSet;
+    Qt::CaseSensitivity caseSensitivity;
+    QString className;
+    bool debug;
+    bool cache;
+    QString ruleFile;
+    ConfigFile::SectionMap configSections;
+};
+
+#endif // GLOBAL_H
diff --git a/util/lexgen/lexgen.lexgen b/util/lexgen/lexgen.lexgen
new file mode 100644
index 0000000000..33efb8b39b
--- /dev/null
+++ b/util/lexgen/lexgen.lexgen
@@ -0,0 +1,24 @@
+[Options]
+case-sensitive
+classname = RegExpTokenizer
+
+[Code Generator Options]
+TokenPrefix = RE2NFA::
+
+[Macros]
+Escape = \\.{1}
+
+[Tokens]
+TOK_QUOTED_STRING = \"[^\"]*\"
+TOK_STRING = [^\{\}\(\)\,\*\|\?\.\+\[]|{Escape}
+TOK_SEQUENCE = \[([^\]]|(\\\]))*\]
+TOK_LBRACE = \{
+TOK_RBRACE = \}
+TOK_LPAREN = \(
+TOK_RPAREN = \)
+TOK_COMMA = \,
+TOK_STAR = \*
+TOK_OR = \|
+TOK_QUESTION = \?
+TOK_DOT = \.
+TOK_PLUS = \+
diff --git a/util/lexgen/lexgen.pri b/util/lexgen/lexgen.pri
new file mode 100644
index 0000000000..b36e00ea62
--- /dev/null
+++ b/util/lexgen/lexgen.pri
@@ -0,0 +1,3 @@
+VPATH += $$PWD
+SOURCES += nfa.cpp configfile.cpp re2nfa.cpp
+INCLUDEPATH += $$PWD
diff --git a/util/lexgen/lexgen.pro b/util/lexgen/lexgen.pro
new file mode 100644
index 0000000000..89ac84de06
--- /dev/null
+++ b/util/lexgen/lexgen.pro
@@ -0,0 +1,6 @@
+TEMPLATE = app
+TARGET = lexgen
+include(lexgen.pri)
+SOURCES += main.cpp \
+    generator.cpp
+QT = core
diff --git a/util/lexgen/main.cpp b/util/lexgen/main.cpp
new file mode 100644
index 0000000000..1cb5d902a6
--- /dev/null
+++ b/util/lexgen/main.cpp
@@ -0,0 +1,323 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "nfa.h"
+#include "re2nfa.h"
+#include "configfile.h"
+#include "generator.h"
+
+#include <QFile>
+#include <QCoreApplication>
+#include <QFileInfo>
+#include <QDateTime>
+
+struct Symbol
+{
+    QString token;
+    QString lexem;
+};
+
+static QList<Symbol> tokenize(const DFA &dfa, const QString &input, Config *cfg, bool *ok = 0)
+{
+    QList<Symbol> symbols;
+    Symbol lastSymbol;
+    int state = 0;
+    int lastAcceptingState = -1;
+    QString lastAcceptingLexem;
+    int lastAcceptingPos = -1;
+    for (int i = 0; i < input.length(); ++i) {
+        QChar ch = input.at(i);
+        QChar chForInput = ch;
+        if (cfg->caseSensitivity == Qt::CaseInsensitive)
+            chForInput = chForInput.toLower();
+        int next = dfa.at(state).transitions.value(chForInput.unicode());
+        if (cfg->debug)
+            qDebug() << "input" << input.at(i) << "leads to state" << next;
+        if (next) {
+            lastSymbol.lexem.append(input.at(i));
+            lastSymbol.token = dfa.at(next).symbol;
+            if (!lastSymbol.token.isEmpty()) {
+                lastAcceptingState = next;
+                lastAcceptingLexem = lastSymbol.lexem;
+                lastAcceptingPos = i;
+            }
+            state = next;
+        } else {
+            if (lastAcceptingState != -1) {
+                if (cfg->debug)
+                    qDebug() << "adding" << dfa.at(lastAcceptingState).symbol << "and backtracking to" << lastAcceptingPos;
+                Symbol s;
+                s.token = dfa.at(lastAcceptingState).symbol;
+                s.lexem = lastAcceptingLexem;
+                symbols << s;
+                lastSymbol = Symbol();
+                state = 0;
+                i = lastAcceptingPos;
+                lastAcceptingPos = -1;
+                lastAcceptingState = -1;
+                continue;
+            }
+            if (state == 0 || lastSymbol.token.isEmpty()) {
+                if (cfg->debug)
+                    qDebug() << "invalid input";
+                if (ok)
+                    *ok = false;
+                return symbols;
+            }
+            if (cfg->debug)
+                qDebug() << "appending symbol with token" << lastSymbol.token;
+            symbols << lastSymbol;
+            lastSymbol = Symbol();
+            state = 0;
+            lastAcceptingState = -1;
+            --i;
+        }
+    }
+    if (!lastSymbol.token.isEmpty()) {
+        if (cfg->debug)
+            qDebug() << "appending (last) symbol with token" << lastSymbol.token;
+        symbols << lastSymbol;
+    } else if (lastAcceptingState != -1) {
+        if (cfg->debug)
+            qDebug() << "appending last accepting state with token" << dfa.at(lastAcceptingState).symbol;
+        Symbol s;
+        s.lexem = lastAcceptingLexem;
+        s.token = dfa.at(lastAcceptingState).symbol;
+        symbols << s;
+    }
+    if (ok)
+        *ok = true;
+    return symbols;
+}
+
+static QSet<InputType> determineMaxInputSet(const ConfigFile::Section &section)
+{
+    QSet<InputType> set;
+
+    QString inputTypeName;
+
+    foreach (const ConfigFile::Entry &entry, section)
+        if (entry.key == QLatin1String("InputType")) {
+            if (!inputTypeName.isEmpty()) {
+                qWarning("Error: InputType field specified multiple times in config file");
+                return QSet<InputType>();
+            }
+            inputTypeName = entry.value;
+        }
+
+    if (inputTypeName.isEmpty())
+        inputTypeName = "quint8";
+
+    if (inputTypeName == "quint8") {
+        for (int i = 1; i < 256; ++i)
+            set.insert(i);
+    } /* else if ### */ 
+    else {
+        qWarning("Error: Unknown input type '%s'", qPrintable(inputTypeName));
+        return QSet<InputType>();
+    }
+
+    return set;
+}
+
+static bool loadConfig(const QString &ruleFile, Config *cfg)
+{
+    ConfigFile::SectionMap sections = ConfigFile::parse(ruleFile);
+    if (sections.isEmpty()) {
+        qWarning("Error parsing %s", qPrintable(ruleFile));
+        return false;
+    }
+
+    QSet<InputType> maxInputSet = determineMaxInputSet(sections.value("Options"));
+    if (maxInputSet.isEmpty())
+        return false;
+
+    Qt::CaseSensitivity cs = Qt::CaseInsensitive;
+    if (sections.value("Options").contains("case-sensitive"))
+        cs = Qt::CaseSensitive;
+
+    cfg->configSections = sections;
+    cfg->caseSensitivity = cs;
+    cfg->className = sections.value("Options").value("classname", "Scanner");
+    cfg->maxInputSet = maxInputSet;
+    cfg->ruleFile = ruleFile;
+    return true;
+}
+
+static DFA generateMachine(const Config &cfg)
+{
+    if (cfg.cache) {
+        QFileInfo ruleInfo(cfg.ruleFile);
+        QFileInfo cacheInfo(ruleInfo.baseName() + ".dfa");
+        if (cacheInfo.exists()
+            && cacheInfo.lastModified() > ruleInfo.lastModified()) {
+            QFile f(cacheInfo.absoluteFilePath());
+            f.open(QIODevice::ReadOnly);
+            QDataStream stream(&f);
+            DFA machine;
+            stream >> machine;
+            return machine;
+        }
+    }
+
+    QMap<QString, NFA> macros;
+    foreach (ConfigFile::Entry e, cfg.configSections.value("Macros")) {
+        int errCol = 0;
+        if (cfg.debug)
+            qDebug() << "parsing" << e.value;
+        NFA nfa = RE2NFA(macros, cfg.maxInputSet, cfg.caseSensitivity).parse(e.value, &errCol);
+        if (nfa.isEmpty()) {
+            qWarning("Parse error in line %d column %d", e.lineNumber, errCol);
+            return DFA();
+        }
+        macros.insert(e.key, nfa);
+    }
+
+    if (!cfg.configSections.contains("Tokens")) {
+        qWarning("Rule file does not contain a [Tokens] section!");
+        return DFA();
+    }
+
+    QVector<NFA> tokens;
+
+    foreach (ConfigFile::Entry e, cfg.configSections.value("Tokens")) {
+        int errCol = 0;
+        if (cfg.debug)
+            qDebug() << "parsing" << e.value;
+        NFA tok = RE2NFA(macros, cfg.maxInputSet, cfg.caseSensitivity).parse(e.value, &errCol);
+        if (tok.isEmpty()) {
+            qWarning("Parse error in line %d column %d while parsing token %s", e.lineNumber, errCol, e.key.toLocal8Bit().constData());
+            return DFA();
+        }
+        tok.setTerminationSymbol(e.key);
+        tokens.append(tok);
+    }
+
+    NFA giganticStateMachine;
+    foreach (NFA nfa, tokens)
+        if (giganticStateMachine.isEmpty())
+            giganticStateMachine = nfa;
+        else
+            giganticStateMachine = NFA::createAlternatingNFA(giganticStateMachine, nfa);
+
+    DFA result = giganticStateMachine.toDFA().minimize();
+    if (cfg.cache) {
+        QFileInfo ruleInfo(cfg.ruleFile);
+        QFileInfo cacheInfo(ruleInfo.baseName() + ".dfa");
+        QFile f(cacheInfo.absoluteFilePath());
+        f.open(QIODevice::WriteOnly | QIODevice::Truncate);
+        QDataStream stream(&f);
+        stream << result;
+    }
+    return result;
+}
+
+#if !defined(AUTOTEST)
+int main(int argc, char **argv)
+{
+    QCoreApplication app(argc, argv);
+    QString ruleFile;
+    Config cfg;
+
+    const QStringList arguments = app.arguments().mid(1);
+    cfg.debug = arguments.contains("-debug");
+    const bool testRules = arguments.contains("-test");
+    cfg.cache = arguments.contains("-cache");
+
+    foreach (const QString &arg, arguments)
+        if (!arg.startsWith(QLatin1Char('-'))) {
+            ruleFile = arg;
+            break;
+        }
+
+    if (ruleFile.isEmpty()) {
+        qWarning("usage: lexgen [-test rulefile");
+        qWarning(" ");
+        qWarning("    the -test option will cause lexgen to interpret standard input");
+        qWarning("    according to the specified rules and print out pairs of token and");
+        qWarning("    lexical element");
+        return 1;
+    }
+
+    if (!loadConfig(ruleFile, &cfg))
+        return 1;
+
+    DFA machine = generateMachine(cfg);
+    if (machine.isEmpty())
+        return 1;
+
+    if (testRules) {
+        qWarning("Testing:");
+        QString input = QTextStream(stdin).readAll();
+        /*
+        qDebug() << "NFA has" << machine.stateCount() << "states";
+        qDebug() << "Converting to DFA... (this may take a while)";
+        DFA dfa = machine.toDFA();
+        qDebug() << "DFA has" << dfa.count() << "states";
+        qDebug() << "Minimizing...";
+        dfa = dfa.minimize();
+        qDebug() << "Minimized DFA has" << dfa.count() << "states";
+        */
+        DFA dfa = machine;
+        if (cfg.debug)
+            qDebug() << "tokenizing" << input;
+        bool ok = false;
+        QList<Symbol> symbols = tokenize(dfa, input, &cfg, &ok);
+        if (symbols.isEmpty()) {
+            qWarning("No tokens produced!");
+        } else {
+            foreach (Symbol s, symbols)
+                    qDebug() << s.token << ":" << s.lexem;
+        }
+        if (ok)
+            qDebug() << symbols.count() << "tokens produced.";
+        else
+            qDebug() << "Error while tokenizing!";
+    } else {
+        Generator gen(machine, cfg);
+        QTextStream(stdout) 
+            << gen.generate();
+    }
+
+    return 0;
+}
+#endif
+
diff --git a/util/lexgen/nfa.cpp b/util/lexgen/nfa.cpp
new file mode 100644
index 0000000000..4adde3a5cf
--- /dev/null
+++ b/util/lexgen/nfa.cpp
@@ -0,0 +1,508 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include "nfa.h"
+#include <QSet>
+#include <limits.h>
+
+NFA NFA::createSingleInputNFA(InputType input)
+{
+    NFA result;
+    result.initialize(2);
+    result.addTransition(result.initialState, input, result.finalState);
+    return result;
+}
+
+NFA NFA::createSymbolNFA(const QString &symbol)
+{
+    NFA result = NFA::createSingleInputNFA(Epsilon);
+    result.states[result.finalState].symbol = symbol;
+    return result;
+}
+
+void NFA::initialize(int size)
+{
+    states.resize(size);
+    states.fill(State());
+    initialState = 0;
+    finalState = size - 1;
+}
+
+void NFA::addTransition(int from, InputType input, int to)
+{
+    assertValidState(from);
+    assertValidState(to);
+
+    states[from].transitions.insertMulti(input, to);
+}
+
+void NFA::copyFrom(const NFA &other, int baseState)
+{
+    assertValidState(baseState);
+    assertValidState(baseState + other.states.count() - 1);
+
+    for (int i = 0; i < other.states.count(); ++i) {
+        State s = other.states.at(i);
+
+        for (TransitionMap::Iterator it = s.transitions.begin(),
+             end = s.transitions.end(); it != end; ++it)
+            *it += baseState;
+
+        states[baseState + i] = s;
+    }
+}
+
+void NFA::initializeFromPair(const NFA &a, const NFA &b,
+                             int *initialA, int *finalA,
+                             int *initialB, int *finalB)
+{
+    initialize(a.states.count() + b.states.count() + 2);
+
+    int baseIdxA = 1;
+    int baseIdxB = 1 + a.states.count();
+
+    *initialA = a.initialState + baseIdxA;
+    *finalA = a.finalState + baseIdxA;
+
+    *initialB = b.initialState + baseIdxB;
+    *finalB = b.finalState + baseIdxB;
+
+    copyFrom(a, baseIdxA);
+    copyFrom(b, baseIdxB);
+}
+
+NFA NFA::createAlternatingNFA(const NFA &a, const NFA &b)
+{
+    NFA result;
+
+    int newInitialA, newFinalA,
+        newInitialB, newFinalB;
+
+    result.initializeFromPair(a, b, &newInitialA, &newFinalA,
+                              &newInitialB, &newFinalB);
+
+    result.addTransition(result.initialState, Epsilon, newInitialA);
+    result.addTransition(result.initialState, Epsilon, newInitialB);
+
+    result.addTransition(newFinalA, Epsilon, result.finalState);
+    result.addTransition(newFinalB, Epsilon, result.finalState);
+
+    return result;
+}
+
+NFA NFA::createConcatenatingNFA(const NFA &a, const NFA &b)
+{
+    NFA result;
+
+    int initialA, finalA,
+        initialB, finalB;
+
+    result.initializeFromPair(a, b, &initialA, &finalA, &initialB, &finalB);
+
+    result.addTransition(result.initialState, Epsilon, initialA);
+    result.addTransition(finalA, Epsilon, initialB);
+    result.addTransition(finalB, Epsilon, result.finalState);
+    return result;
+}
+
+NFA NFA::createOptionalNFA(const NFA &a)
+{
+    NFA result;
+
+    result.initialize(a.states.count() + 2);
+
+    int baseIdxA = 1;
+    int initialA = a.initialState + baseIdxA;
+    int finalA = a.finalState + baseIdxA;
+
+    result.copyFrom(a, baseIdxA);
+
+    result.addTransition(result.initialState, Epsilon, initialA);
+    result.addTransition(result.initialState, Epsilon, result.finalState);
+
+    result.addTransition(finalA, Epsilon, initialA);
+    result.addTransition(finalA, Epsilon, result.finalState);
+
+    return result;
+}
+
+NFA NFA::createStringNFA(const QByteArray &str)
+{
+    NFA result;
+    foreach (char c, str) {
+        NFA ch = NFA::createSingleInputNFA(c);
+        if (result.isEmpty())
+            result = ch;
+        else
+            result = NFA::createConcatenatingNFA(result, ch);
+    }
+    return result;
+}
+
+NFA NFA::createSetNFA(const QSet<InputType> &set)
+{
+    NFA result;
+    result.initialize(set.count() + 2);
+
+    int state = 1;
+    for (QSet<InputType>::ConstIterator it = set.constBegin(), end = set.constEnd();
+         it != end; ++it, ++state) {
+        result.addTransition(result.initialState, Epsilon, state);
+        result.addTransition(state, *it, result.finalState);
+    }
+
+    /*
+    foreach (InputType input, set) {
+        NFA ch = NFA::createSingleInputNFA(input);
+        if (result.isEmpty())
+            result = ch;
+        else
+            result = NFA::createAlternatingNFA(result, ch);
+    }
+    */
+    return result;
+}
+
+NFA NFA::createZeroOrOneNFA(const NFA &a)
+{
+    NFA epsilonNFA = createSingleInputNFA(Epsilon);
+    return NFA::createAlternatingNFA(a, epsilonNFA);
+}
+
+NFA NFA::applyQuantity(const NFA &a, int minOccurrences, int maxOccurrences)
+{
+    NFA result = a;
+    NFA epsilonNFA = createSingleInputNFA(Epsilon);
+
+    if (minOccurrences == 0) {
+        result = NFA::createAlternatingNFA(result, epsilonNFA);
+    } else {
+        minOccurrences--;
+    }
+    maxOccurrences--;
+
+    for (int i = 0; i < minOccurrences; ++i)
+        result = NFA::createConcatenatingNFA(result, a);
+
+    for (int i = minOccurrences; i < maxOccurrences; ++i)
+        result = NFA::createConcatenatingNFA(result, NFA::createAlternatingNFA(a, epsilonNFA));
+
+    return result;
+}
+
+void NFA::debug()
+{
+    qDebug() << "NFA has" << states.count() << "states";
+    qDebug() << "initial state is" << initialState;
+    qDebug() << "final state is" << finalState;
+
+    for (int i = 0; i < states.count(); ++i) {
+        const State &s = states.at(i);
+        for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
+             end = s.transitions.constEnd(); it != end; ++it)
+            qDebug() << "transition from state" << i << "to" << it.value() << "through"
+                     << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
+        if (!s.symbol.isEmpty())
+            qDebug() << "State" << i << "leads to symbol" << s.symbol;
+    }
+}
+
+// helper
+typedef QSet<int> DFAState;
+
+// that's a bad hash, but it's good enough for us
+// and it allows us to use the nice QHash API :)
+inline uint qHash(const DFAState &state)
+{
+    uint val = 0;
+    foreach (int s, state)
+        val |= qHash(s);
+    return val;
+}
+
+DFA NFA::toDFA() const
+{
+    DFA result;
+    result.reserve(states.count());
+
+    QHash<QString, int> symbolReferenceCounts;
+    {
+        QSet<int> symbolStates;
+        for (int i = 0; i < states.count(); ++i)
+            if (!states.at(i).symbol.isEmpty())
+                symbolStates.insert(i);
+
+        QHash<int, QString> epsilonStates;
+        for (int i = 0; i < states.count(); ++i) {
+            const State &s = states.at(i);
+            for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
+                 transition != end; ++transition)
+                if (transition.key() == Epsilon && symbolStates.contains(transition.value()))
+                    epsilonStates.insert(i, states.at(transition.value()).symbol);
+        }
+
+        int lastCount;
+        do {
+            lastCount = epsilonStates.count();
+            for (int i = 0; i < states.count(); ++i) {
+                const State &s = states.at(i);
+                for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
+                     transition != end; ++transition)
+                    if (transition.key() == Epsilon && epsilonStates.contains(transition.value()))
+                        epsilonStates.insert(i, epsilonStates.value(transition.value()));
+            }
+
+        } while (lastCount != epsilonStates.count());
+
+        for (int i = 0; i < states.count(); ++i) {
+            const State &s = states.at(i);
+            for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
+                 transition != end; ++transition) {
+                if (transition.key() == Epsilon)
+                    continue;
+                if (symbolStates.contains(transition.value())) {
+                    const QString symbol = states.at(transition.value()).symbol;
+                    symbolReferenceCounts[symbol]++;
+                } else if (epsilonStates.contains(transition.value())) {
+                    const QString symbol = epsilonStates.value(transition.value());
+                    symbolReferenceCounts[symbol]++;
+                }
+            }
+        }
+        /*
+        for (QHash<QString, int>::ConstIterator symIt = symbolReferenceCounts.constBegin(), symEnd = symbolReferenceCounts.constEnd();
+             symIt != symEnd; ++symIt)
+            qDebug() << "symbol" << symIt.key() << "is reached" << symIt.value() << "times";
+            */
+    }
+
+    
+    QSet<InputType> validInput;
+    foreach (const State &s, states)
+        for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
+             end = s.transitions.constEnd(); it != end; ++it)
+            if (it.key() != Epsilon)
+                validInput.insert(it.key());
+
+    // A DFA state can consist of multiple NFA states.
+    // the dfaStateMap maps from these to the actual
+    // state index within the resulting DFA vector
+    QHash<DFAState, int> dfaStateMap;
+    QStack<DFAState> pendingDFAStates;
+
+    DFAState startState = epsilonClosure(QSet<int>() << initialState);
+
+    result.resize(1);
+    dfaStateMap.insert(startState, 0);
+
+    pendingDFAStates.push(startState);
+
+    while (!pendingDFAStates.isEmpty()) {
+        DFAState state = pendingDFAStates.pop();
+//        qDebug() << "processing" << state << "from the stack of pending states";
+
+        foreach (InputType input, validInput) {
+
+            QSet<int> reachableStates;
+
+            foreach (int nfaState, state) {
+                const TransitionMap &transitions = states.at(nfaState).transitions;
+                TransitionMap::ConstIterator it = transitions.find(input);
+                while (it != transitions.constEnd() && it.key() == input) {
+                    reachableStates.insert(it.value());
+                    ++it;
+                }
+            }
+
+            if (reachableStates.isEmpty())
+                continue;
+
+//            qDebug() << "can reach" << reachableStates << "from input" << char(input);
+
+            QSet<int> closure = epsilonClosure(reachableStates);
+
+//            qDebug() << "closure is" << closure;
+
+            if (!dfaStateMap.contains(closure)) {
+                int dfaState = result.count();
+                result.append(State());
+
+                QString symbol;
+                int refCount = INT_MAX;
+                foreach (int nfaState, closure)
+                    if (!states.at(nfaState).symbol.isEmpty()) {
+//                        qDebug() << "closure also contains symbol" << states.at(nfaState).symbol;
+                        QString candidate = states.at(nfaState).symbol;
+                        int candidateRefCount =symbolReferenceCounts.value(candidate, INT_MAX);
+                        if (candidateRefCount < refCount) {
+                            refCount = candidateRefCount;
+                            symbol = candidate;
+                        }
+                    }
+                if (!symbol.isEmpty())
+                    result.last().symbol = symbol;
+
+                dfaStateMap.insert(closure, dfaState);
+
+                Q_ASSERT(!pendingDFAStates.contains(closure));
+                pendingDFAStates.prepend(closure);
+            }
+
+            result[dfaStateMap.value(state)].transitions.insert(input, dfaStateMap.value(closure));
+        }
+    }
+
+    return result;
+}
+
+QSet<int> NFA::epsilonClosure(const QSet<int> &initialClosure) const
+{
+    QSet<int> closure = initialClosure;
+    closure.reserve(closure.count() * 4);
+
+    QStack<int> stateStack;
+    stateStack.resize(closure.count());
+    qCopy(closure.constBegin(), closure.constEnd(), stateStack.begin());
+
+    while (!stateStack.isEmpty()) {
+        int t = stateStack.pop();
+        const TransitionMap &transitions = states.at(t).transitions;
+        TransitionMap::ConstIterator it = transitions.find(Epsilon);
+        while (it != transitions.constEnd() && it.key() == Epsilon) {
+            const int u = it.value();
+            if (!closure.contains(u)) {
+                closure.insert(u);
+                stateStack.push(u);
+            }
+            ++it;
+        }
+    }
+
+    return closure;
+}
+
+void NFA::setTerminationSymbol(const QString &symbol)
+{
+    states[finalState].symbol = symbol;
+}
+
+void DFA::debug() const
+{
+    qDebug() << "DFA has" << count() << "states";
+
+    for (int i = 0; i < count(); ++i) {
+        const State &s = at(i);
+        if (s.transitions.isEmpty()) {
+            qDebug() << "State" << i << "has no transitions";
+        } else {
+            for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
+                 end = s.transitions.constEnd(); it != end; ++it)
+                qDebug() << "transition from state" << i << "to" << it.value() << "through"
+                         << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
+        }
+        if (!s.symbol.isEmpty())
+            qDebug() << "State" << i << "leads to symbol" << s.symbol;
+    }
+
+}
+
+DFA DFA::minimize() const
+{
+    QVector<bool> inequivalentStates(count() * count());
+    inequivalentStates.fill(false);
+
+    for (int i = 0; i < count(); ++i)
+        for (int j = 0; j < i; ++j) {
+            if (i != j && at(i).symbol != at(j).symbol)
+                inequivalentStates[i * count() + j] = true;
+        }
+
+    bool done;
+    do {
+        done = true;
+        for (int i = 0; i < count(); ++i)
+            for (int j = 0; j < count(); ++j) {
+                if (i == j)
+                    continue;
+
+                if (inequivalentStates[i * count() + j])
+                    continue;
+
+                if (at(i).transitions.keys() != at(j).transitions.keys()) {
+                    inequivalentStates[i * count() + j] = true;
+                    done = false;
+                    continue;
+                }
+
+                foreach (InputType a, at(i).transitions.keys()) {
+                    int r = at(i).transitions.value(a, -1);
+                    if (r == -1)
+                        continue;
+                    int s = at(j).transitions.value(a, -1);
+                    if (s == -1)
+                        continue;
+
+                    if (inequivalentStates[r * count() + s]
+                        || r == s) {
+                        inequivalentStates[i * count() + j] = true;
+                        done = false;
+                        break;
+                    }
+                }
+            }
+    } while (!done);
+
+    QHash<int, int> statesToEliminate;
+    for (int i = 0; i < count(); ++i)
+        for (int j = 0; j < i; ++j)
+            if (!inequivalentStates[i * count() + j]) {
+                statesToEliminate.insertMulti(i, j);
+            }
+
+    /*
+    qDebug() << "states to eliminiate:" << statesToEliminate.count();;
+    qDebug() << "merging" << statesToEliminate;
+    debug();
+    */
+
+    return *this;
+}
+
+
diff --git a/util/lexgen/nfa.h b/util/lexgen/nfa.h
new file mode 100644
index 0000000000..02657b33eb
--- /dev/null
+++ b/util/lexgen/nfa.h
@@ -0,0 +1,127 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef NFA_H
+#define NFA_H
+
+#include <QMap>
+#include <QHash>
+#include <QString>
+#include <QVector>
+#include <QDebug>
+#include <QStack>
+#include <QByteArray>
+
+#include "global.h"
+
+typedef QHash<InputType, int> TransitionMap;
+
+struct State
+{
+    QString symbol;
+    TransitionMap transitions;
+};
+
+inline QDataStream &operator<<(QDataStream &stream, const State &state)
+{
+    return stream << state.symbol << state.transitions;
+}
+
+inline QDataStream &operator>>(QDataStream &stream, State &state)
+{
+    return stream >> state.symbol >> state.transitions;
+}
+
+struct DFA : public QVector<State>
+{
+    void debug() const;
+    DFA minimize() const;
+};
+
+class NFA
+{
+public:
+    static NFA createSingleInputNFA(InputType input);
+    static NFA createSymbolNFA(const QString &symbol);
+    static NFA createAlternatingNFA(const NFA &a, const NFA &b);
+    static NFA createConcatenatingNFA(const NFA &a, const NFA &b);
+    static NFA createOptionalNFA(const NFA &a);
+
+    // convenience
+    static NFA createStringNFA(const QByteArray &str);
+    static NFA createSetNFA(const QSet<InputType> &set);
+    static NFA createZeroOrOneNFA(const NFA &a);
+    static NFA applyQuantity(const NFA &a, int minOccurrences, int maxOccurrences);
+
+    void setTerminationSymbol(const QString &symbol);
+    
+    DFA toDFA() const;
+
+    inline bool isEmpty() const { return states.isEmpty(); }
+    inline int stateCount() const { return states.count(); }
+
+    void debug();
+
+private:
+    void initialize(int size);
+    void addTransition(int from, InputType input, int to);
+    void copyFrom(const NFA &other, int baseState);
+
+    void initializeFromPair(const NFA &a, const NFA &b,
+                            int *initialA, int *finalA,
+                            int *initialB, int *finalB);
+
+    QSet<int> epsilonClosure(const QSet<int> &initialClosure) const;
+
+    inline void assertValidState(int state)
+    { Q_UNUSED(state); Q_ASSERT(state >= 0); Q_ASSERT(state < states.count()); }
+    
+#if defined(AUTOTEST)
+public:
+#endif
+    int initialState;
+    int finalState;
+
+    QVector<State> states;
+};
+
+#endif // NFA_H
+
diff --git a/util/lexgen/re2nfa.cpp b/util/lexgen/re2nfa.cpp
new file mode 100644
index 0000000000..77cf5019fe
--- /dev/null
+++ b/util/lexgen/re2nfa.cpp
@@ -0,0 +1,547 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "re2nfa.h"
+#include "tokenizer.cpp"
+
+RE2NFA::RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs)
+    : macros(macros), index(0), errorColumn(-1), maxInputSet(maxInputSet), caseSensitivity(cs)
+{
+}
+
+NFA RE2NFA::parse(const QString &expression, int *errCol)
+{
+    tokenize(expression);
+
+    if (symbols.isEmpty())
+        return NFA();
+
+    index = 0;
+
+    NFA result = parseExpr();
+    if (result.isEmpty()) {
+        if (errCol)
+            *errCol = errorColumn;
+    }
+    return result;
+}
+
+NFA RE2NFA::parseExpr()
+{
+    NFA value = parseBranch();
+    while (test(TOK_OR)) {
+        NFA rhs = parseBranch();
+        value = NFA::createAlternatingNFA(value, rhs);
+    }
+    return value;
+}
+
+NFA RE2NFA::parseBranch()
+{
+    NFA value = parsePiece();
+    if (!hasNext())
+        return value;
+    NFA next;
+    do {
+        next = parsePiece();
+        if (!next.isEmpty())
+            value = NFA::createConcatenatingNFA(value, next);
+    } while (!next.isEmpty() && hasNext());
+    return value;
+}
+
+NFA RE2NFA::parsePiece()
+{
+    NFA atom = parseAtom();
+    if (atom.isEmpty() || !hasNext())
+        return atom;
+    return parseMaybeQuantifier(atom);
+}
+
+NFA RE2NFA::parseAtom()
+{
+    // ####
+    switch (next()) {
+        case TOK_STRING:
+            return createCharNFA();
+        case TOK_LPAREN: {
+            NFA subExpr = parseExpr();
+            next(TOK_RPAREN);
+            return subExpr;
+        }
+        case TOK_LBRACE: {
+            QString macroName = lexemUntil(TOK_RBRACE);
+            QMap<QString, NFA>::ConstIterator macro = macros.find(macroName);
+            if (macro == macros.end()) {
+                qWarning("Unknown macro '%s' - probably used before defined", qPrintable(macroName));
+                return NFA();
+            }
+            return *macro;
+        }
+        case TOK_LBRACKET: {
+            NFA set = parseSet();
+            next(TOK_RBRACKET);
+            return set;
+        }
+        case TOK_SEQUENCE:
+            return parseSet2();
+        case TOK_DOT:
+            return NFA::createSetNFA(maxInputSet);
+        default:
+            prev();
+            return NFA();
+    }
+}
+
+NFA RE2NFA::parseMaybeQuantifier(const NFA &nfa)
+{
+    // ####
+    switch (next()) {
+        case TOK_STAR:
+            return NFA::createOptionalNFA(nfa);
+        case TOK_QUESTION:
+            return NFA::createZeroOrOneNFA(nfa);
+        case TOK_PLUS:
+            return NFA::createConcatenatingNFA(nfa, NFA::createOptionalNFA(nfa));
+        case TOK_LBRACE: {
+              const int rewind = index - 1;
+
+              QString lexemBeforeComma;
+              QString lexemAfterComma;
+              bool seenComma = false;
+              forever {
+                  if (test(TOK_COMMA)) {
+                      if (seenComma) {
+                          errorColumn = symbol().column;
+                          return NFA();
+                      }
+                      seenComma = true;
+                  } else if (test(TOK_RBRACE)) {
+                      break;
+                  } else {
+                      next(TOK_STRING);
+                      if (seenComma)
+                          lexemAfterComma += symbol().lexem;
+                      else
+                          lexemBeforeComma += symbol().lexem;
+                  }
+              }
+              bool isNumber = false;
+              int min = lexemBeforeComma.toInt(&isNumber);
+              if (!isNumber) {
+                  index = rewind;
+                  return nfa;
+              }
+              int max = min;
+              if (seenComma) {
+                  max = lexemAfterComma.toInt(&isNumber);
+                  if (!isNumber) {
+                      errorColumn = symbol().column;
+                      return NFA();
+                  }
+              }
+              return NFA::applyQuantity(nfa, min, max);
+        }
+        default:
+            prev();
+            return nfa;
+    }
+}
+
+NFA RE2NFA::parseSet()
+{
+    QSet<InputType> set;
+    bool negate = false;
+
+    next(TOK_STRING);
+
+    do {
+        Q_ASSERT(symbol().lexem.length() == 1);
+        // ###
+        QChar ch = symbol().lexem.at(0);
+        if (set.isEmpty() && ch == QLatin1Char('^')) {
+            negate = true;
+            continue;
+        }
+
+        // look ahead for ranges like a-z
+        bool rangeFound = false;
+        if (test(TOK_STRING)) {
+            if (symbol().lexem.length() == 1
+                && symbol().lexem.at(0) == QLatin1Char('-')) {
+                next(TOK_STRING);
+                Q_ASSERT(symbol().lexem.length() == 1);
+                QChar last = symbol().lexem.at(0);
+
+                if (ch.unicode() > last.unicode())
+                    qSwap(ch, last);
+
+                for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
+                    if (caseSensitivity == Qt::CaseInsensitive) {
+                        set.insert(QChar(i).toLower().unicode());
+                    } else {
+                        set.insert(i);
+                    }
+                }
+
+                rangeFound = true;
+            } else {
+                prev();
+            }
+        }
+
+        if (!rangeFound) {
+            if (caseSensitivity == Qt::CaseInsensitive) {
+                set.insert(ch.toLower().unicode());
+            } else {
+                set.insert(ch.unicode());
+            }
+        }
+    } while (test(TOK_STRING));
+
+    if (negate) {
+        QSet<InputType> negatedSet = maxInputSet;
+        negatedSet.subtract(set);
+        set = negatedSet;
+    }
+
+    return NFA::createSetNFA(set);
+}
+
+NFA RE2NFA::parseSet2()
+{
+    QSet<InputType> set;
+    bool negate = false;
+
+    QString str = symbol().lexem;
+    // strip off brackets
+    str.chop(1);
+    str.remove(0, 1);
+
+    int i = 0;
+    while (i < str.length()) {
+        // ###
+        QChar ch = str.at(i++);
+        if (set.isEmpty() && ch == QLatin1Char('^')) {
+            negate = true;
+            continue;
+        }
+
+        // look ahead for ranges like a-z
+        bool rangeFound = false;
+        if (i < str.length() - 1 && str.at(i) == QLatin1Char('-')) {
+            ++i;
+            QChar last = str.at(i++);
+
+            if (ch.unicode() > last.unicode())
+                qSwap(ch, last);
+
+            for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
+                if (caseSensitivity == Qt::CaseInsensitive) {
+                    set.insert(QChar(i).toLower().unicode());
+                } else {
+                    set.insert(i);
+                }
+            }
+
+            rangeFound = true;
+        }
+
+        if (!rangeFound) {
+            if (caseSensitivity == Qt::CaseInsensitive) {
+                set.insert(ch.toLower().unicode());
+            } else {
+                set.insert(ch.unicode());
+            }
+        }
+    }
+
+    if (negate) {
+        QSet<InputType> negatedSet = maxInputSet;
+        negatedSet.subtract(set);
+        set = negatedSet;
+    }
+
+    return NFA::createSetNFA(set);
+}
+NFA RE2NFA::createCharNFA()
+{
+    NFA nfa;
+    // ####
+    if (caseSensitivity == Qt::CaseInsensitive) {
+        nfa = NFA::createStringNFA(symbol().lexem.toLower().toLatin1());
+    } else {
+        nfa = NFA::createStringNFA(symbol().lexem.toLatin1());
+    }
+    return nfa;
+}
+
+static inline int skipQuote(const QString &str, int pos)
+{
+    while (pos < str.length()
+           && str.at(pos) != QLatin1Char('"')) {
+        if (str.at(pos) == QLatin1Char('\\')) {
+            ++pos;
+            if (pos >= str.length())
+                break;
+        }
+        ++pos;
+    }
+    if (pos < str.length())
+        ++pos;
+    return pos;
+}
+
+#if 0
+static const char*tokStr(Token t)
+{
+    switch (t) {
+        case TOK_INVALID: return "TOK_INVALID";
+        case TOK_STRING: return "TOK_STRING";
+        case TOK_LBRACE: return "TOK_LBRACE";
+        case TOK_RBRACE: return "TOK_RBRACE";
+        case TOK_LBRACKET: return "TOK_LBRACKET";
+        case TOK_RBRACKET: return "TOK_RBRACKET";
+        case TOK_LPAREN: return "TOK_LPAREN";
+        case TOK_RPAREN: return "TOK_RPAREN";
+        case TOK_COMMA: return "TOK_COMMA";
+        case TOK_STAR: return "TOK_STAR";
+        case TOK_OR: return "TOK_OR";
+        case TOK_QUESTION: return "TOK_QUESTION";
+        case TOK_DOT: return "TOK_DOT";
+        case TOK_PLUS: return "TOK_PLUS";
+        case TOK_SEQUENCE: return "TOK_SEQUENCE";
+        case TOK_QUOTED_STRING: return "TOK_QUOTED_STRING";
+    }
+    return "";
+}
+#endif
+
+void RE2NFA::tokenize(const QString &input)
+{
+    symbols.clear();
+#if 1
+    RegExpTokenizer tokenizer(input);
+    Symbol sym;
+    int tok = tokenizer.lex();
+    while (tok != -1) {
+        Symbol sym;
+        sym.token = static_cast<Token>(tok);
+        sym.lexem = input.mid(tokenizer.lexemStart, tokenizer.lexemLength);
+
+        if (sym.token == TOK_QUOTED_STRING) {
+            sym.lexem.chop(1);
+            sym.lexem.remove(0, 1);
+            sym.token = TOK_STRING;
+        }
+
+        if (sym.token == TOK_STRING || sym.token == TOK_SEQUENCE) {
+            for (int i = 0; i < sym.lexem.length(); ++i) {
+                if (sym.lexem.at(i) == '\\') {
+                    if (i >= sym.lexem.length() - 1)
+                        break;
+                    QChar ch = sym.lexem.at(i + 1);
+                    if (ch == QLatin1Char('n')) {
+                        ch = '\n';
+                    } else if (ch == QLatin1Char('r')) {
+                        ch = '\r';
+                    } else if (ch == QLatin1Char('t')) {
+                        ch = '\t';
+                    } else if (ch == QLatin1Char('f')) {
+                        ch = '\f';
+                    }
+                    sym.lexem.replace(i, 2, ch);
+                }
+            }
+        }
+
+        /*
+        if (sym.token == TOK_SEQUENCE) {
+            Symbol s;
+            s.token = TOK_LBRACKET;
+            s.lexem = "[";
+            symbols.append(s);
+
+            for (int i = 1; i < sym.lexem.length() - 1; ++i) {
+                s.token = TOK_STRING;
+                s.lexem = sym.lexem.at(i);
+                symbols.append(s);
+            }
+
+            s.token = TOK_RBRACKET;
+            s.lexem = "]";
+            symbols.append(s);
+
+            tok = tokenizer.lex();
+            continue;
+        }
+        */
+
+        symbols.append(sym);
+        tok = tokenizer.lex();
+    }
+#else
+    int pos = 0;
+    bool insideSet = false;
+    while (pos < input.length()) {
+        QChar ch = input.at(pos);
+
+        Symbol sym;
+        sym.column = pos;
+        sym.token = TOK_INVALID;
+        sym.lexem = QString(ch);
+        switch (ch.toLatin1()) {
+            case '"': {
+                if (insideSet) {
+                    sym.token = TOK_STRING;
+                    sym.lexem = QString(ch);
+                    symbols += sym;
+                    ++pos;
+                    continue;
+                }
+                if (pos + 1 >= input.length())
+                    return;
+                int quoteEnd = skipQuote(input, pos + 1);
+                sym.token = TOK_STRING;
+                sym.lexem = input.mid(pos + 1, quoteEnd - pos - 2);
+                symbols += sym;
+                pos = quoteEnd;
+                continue;
+            }
+            case '{':
+                sym.token = (insideSet ? TOK_STRING : TOK_LBRACE);
+                break;
+            case '}':
+                sym.token = (insideSet ? TOK_STRING : TOK_RBRACE);
+                break;
+            case '[':
+                insideSet = true;
+                sym.token = TOK_LBRACKET;
+                break;
+            case ']':
+                insideSet = false;
+                sym.token = TOK_RBRACKET;
+                break;
+            case '(':
+                sym.token = (insideSet ? TOK_STRING : TOK_LPAREN);
+                break;
+            case ')':
+                sym.token = (insideSet ? TOK_STRING : TOK_RPAREN);
+                break;
+            case ',':
+                sym.token = (insideSet ? TOK_STRING : TOK_COMMA);
+                break;
+            case '*':
+                sym.token = (insideSet ? TOK_STRING : TOK_STAR);
+                break;
+            case '|':
+                sym.token = (insideSet ? TOK_STRING : TOK_OR);
+                break;
+            case '?':
+                sym.token = (insideSet ? TOK_STRING : TOK_QUESTION);
+                break;
+            case '.':
+                sym.token = (insideSet ? TOK_STRING : TOK_DOT);
+                break;
+            case '+':
+                sym.token = (insideSet ? TOK_STRING : TOK_PLUS);
+                break;
+            case '\\':
+                ++pos;
+                if (pos >= input.length())
+                    return;
+                ch = input.at(pos);
+                if (ch == QLatin1Char('n')) {
+                    ch = '\n';
+                } else if (ch == QLatin1Char('r')) {
+                    ch = '\r';
+                } else if (ch == QLatin1Char('t')) {
+                    ch = '\t';
+                } else if (ch == QLatin1Char('f')) {
+                    ch = '\f';
+                }
+                // fall through
+            default:
+                sym.token = TOK_STRING;
+                sym.lexem = QString(ch);
+                symbols += sym;
+                ++pos;
+                continue;
+        }
+        symbols += sym;
+        ++pos;
+    }
+#endif
+#if 0
+    foreach (Symbol s, symbols) {
+        qDebug() << "Tok" << tokStr(s.token) << "lexem" << s.lexem;
+    }
+#endif
+}
+
+bool RE2NFA::next(Token t)
+{
+    if (hasNext() && next() == t)
+        return true;
+    errorColumn = symbol().column;
+    Q_ASSERT(false);
+    return false;
+}
+
+bool RE2NFA::test(Token t)
+{
+    if (index >= symbols.count())
+        return false;
+    if (symbols.at(index).token == t) {
+        ++index;
+        return true;
+    }
+    return false;
+}
+
+QString RE2NFA::lexemUntil(Token t)
+{
+    QString lexem;
+    while (hasNext() && next() != t)
+        lexem += symbol().lexem;
+    return lexem;
+}
+
diff --git a/util/lexgen/re2nfa.h b/util/lexgen/re2nfa.h
new file mode 100644
index 0000000000..57b8bde09e
--- /dev/null
+++ b/util/lexgen/re2nfa.h
@@ -0,0 +1,116 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef RE2NFA_H
+#define RE2NFA_H
+
+#include "nfa.h"
+#include <QSet>
+
+class RE2NFA
+{
+public:
+    RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs);
+
+    NFA parse(const QString &expression, int *errorColumn = 0);
+
+private:
+    NFA parseExpr();
+    NFA parseBranch();
+    NFA parsePiece();
+    NFA parseAtom();
+    NFA parseMaybeQuantifier(const NFA &nfa);
+    NFA parseSet();
+    NFA parseSet2();
+
+    NFA createCharNFA();
+
+private:
+    friend class RegExpTokenizer;
+
+    enum Token {
+        TOK_INVALID,
+        TOK_STRING,
+        TOK_LBRACE,   // {
+        TOK_RBRACE,   // }
+        TOK_LBRACKET, // [
+        TOK_RBRACKET, // ]
+        TOK_LPAREN,   // (
+        TOK_RPAREN,   // )
+        TOK_COMMA,
+        TOK_STAR,
+        TOK_OR,
+        TOK_QUESTION,
+        TOK_DOT,
+        TOK_PLUS,
+        TOK_SEQUENCE,
+        TOK_QUOTED_STRING
+    };
+
+    struct Symbol
+    {
+        inline Symbol() : token(TOK_INVALID), column(-1) {}
+        inline Symbol(Token t, const QString &l = QString()) : token(t), lexem(l), column(-1) {}
+        Token token;
+        QString lexem;
+        int column;
+    };
+
+    inline bool hasNext() const { return index < symbols.count(); }
+    inline Token next() { return symbols.at(index++).token; }
+    bool next(Token t);
+    bool test(Token t);
+    inline void prev() { index--; }
+    inline const Symbol &symbol() const { return symbols.at(index - 1); }
+    QString lexemUntil(Token t);
+
+    void tokenize(const QString &input);
+
+    QMap<QString, NFA> macros;
+    QVector<Symbol> symbols;
+    int index;
+    int errorColumn;
+    const QSet<InputType> maxInputSet;
+    Qt::CaseSensitivity caseSensitivity;
+};
+
+#endif // RE2NFA_H
+
diff --git a/util/lexgen/test.lexgen b/util/lexgen/test.lexgen
new file mode 100644
index 0000000000..fd532fd549
--- /dev/null
+++ b/util/lexgen/test.lexgen
@@ -0,0 +1,9 @@
+[Options]
+case-insensitive
+classname = TestScanner
+
+[Tokens]
+TOK_C = [abcd]
+TOK_B = [bc]
+TOK_A = a
+
diff --git a/util/lexgen/tests/testdata/backtrack1/input b/util/lexgen/tests/testdata/backtrack1/input
new file mode 100644
index 0000000000..f5099b509d
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack1/input
@@ -0,0 +1 @@
+LETX
diff --git a/util/lexgen/tests/testdata/backtrack1/output b/util/lexgen/tests/testdata/backtrack1/output
new file mode 100644
index 0000000000..6893deb014
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack1/output
@@ -0,0 +1 @@
+TOK_LET|LET
diff --git a/util/lexgen/tests/testdata/backtrack1/rules.lexgen b/util/lexgen/tests/testdata/backtrack1/rules.lexgen
new file mode 100644
index 0000000000..ade8a15546
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack1/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_LET = LET
+TOK_LETXX = LETXX
diff --git a/util/lexgen/tests/testdata/backtrack2/input b/util/lexgen/tests/testdata/backtrack2/input
new file mode 100644
index 0000000000..59ff5b7301
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack2/input
@@ -0,0 +1 @@
+LETXTRA
diff --git a/util/lexgen/tests/testdata/backtrack2/output b/util/lexgen/tests/testdata/backtrack2/output
new file mode 100644
index 0000000000..348b382818
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack2/output
@@ -0,0 +1,2 @@
+TOK_LET|LET
+TOK_XTRA|XTRA
diff --git a/util/lexgen/tests/testdata/backtrack2/rules.lexgen b/util/lexgen/tests/testdata/backtrack2/rules.lexgen
new file mode 100644
index 0000000000..6f16986e83
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack2/rules.lexgen
@@ -0,0 +1,4 @@
+[Tokens]
+TOK_LET = LET
+TOK_LETXX = LETXX
+TOK_XTRA = XTRA
diff --git a/util/lexgen/tests/testdata/casesensitivity/input b/util/lexgen/tests/testdata/casesensitivity/input
new file mode 100644
index 0000000000..72b7f4869c
--- /dev/null
+++ b/util/lexgen/tests/testdata/casesensitivity/input
@@ -0,0 +1 @@
+abcdAbcDABCDeFgEFGefgEfghiHIHihI
diff --git a/util/lexgen/tests/testdata/casesensitivity/output b/util/lexgen/tests/testdata/casesensitivity/output
new file mode 100644
index 0000000000..3a4e819060
--- /dev/null
+++ b/util/lexgen/tests/testdata/casesensitivity/output
@@ -0,0 +1,14 @@
+TOK_AB|ab
+TOK_CD|cd
+TOK_AB|Ab
+TOK_CD|cD
+TOK_AB|AB
+TOK_CD|CD
+TOK_EFG|eFg
+TOK_EFG|EFG
+TOK_EFG|efg
+TOK_EFG|Efg
+TOK_HI|hi
+TOK_HI|HI
+TOK_HI|Hi
+TOK_HI|hI
diff --git a/util/lexgen/tests/testdata/casesensitivity/rules.lexgen b/util/lexgen/tests/testdata/casesensitivity/rules.lexgen
new file mode 100644
index 0000000000..3347587ffe
--- /dev/null
+++ b/util/lexgen/tests/testdata/casesensitivity/rules.lexgen
@@ -0,0 +1,7 @@
+[Options]
+case-insensitive
+[Tokens]
+TOK_AB = ab
+TOK_CD = cd
+TOK_EFG = [e-g]{3}
+TOK_HI = [hi]{2}
diff --git a/util/lexgen/tests/testdata/comments/input b/util/lexgen/tests/testdata/comments/input
new file mode 100644
index 0000000000..03873e044a
--- /dev/null
+++ b/util/lexgen/tests/testdata/comments/input
@@ -0,0 +1 @@
+/* comment with stuff *//*another comment with * stars * inside*/
diff --git a/util/lexgen/tests/testdata/comments/output b/util/lexgen/tests/testdata/comments/output
new file mode 100644
index 0000000000..2395ad1873
--- /dev/null
+++ b/util/lexgen/tests/testdata/comments/output
@@ -0,0 +1,2 @@
+TOK_COMMENT|/* comment with stuff */
+TOK_COMMENT|/*another comment with * stars * inside*/
diff --git a/util/lexgen/tests/testdata/comments/rules.lexgen b/util/lexgen/tests/testdata/comments/rules.lexgen
new file mode 100644
index 0000000000..490c759cc5
--- /dev/null
+++ b/util/lexgen/tests/testdata/comments/rules.lexgen
@@ -0,0 +1,2 @@
+[Tokens]
+TOK_COMMENT = \/\*[^*]*\*+([^/*][^*]*\*+)*\/
diff --git a/util/lexgen/tests/testdata/dot/input b/util/lexgen/tests/testdata/dot/input
new file mode 100644
index 0000000000..e5b0ad6e91
--- /dev/null
+++ b/util/lexgen/tests/testdata/dot/input
@@ -0,0 +1 @@
+afbcxd
diff --git a/util/lexgen/tests/testdata/dot/output b/util/lexgen/tests/testdata/dot/output
new file mode 100644
index 0000000000..6a9afd4ced
--- /dev/null
+++ b/util/lexgen/tests/testdata/dot/output
@@ -0,0 +1,2 @@
+TOK_AB|afb
+TOK_CD|cxd
diff --git a/util/lexgen/tests/testdata/dot/rules.lexgen b/util/lexgen/tests/testdata/dot/rules.lexgen
new file mode 100644
index 0000000000..03873a71bf
--- /dev/null
+++ b/util/lexgen/tests/testdata/dot/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_AB = a.b
+TOK_CD = c.d
diff --git a/util/lexgen/tests/testdata/negation/input b/util/lexgen/tests/testdata/negation/input
new file mode 100644
index 0000000000..9447b8005d
--- /dev/null
+++ b/util/lexgen/tests/testdata/negation/input
@@ -0,0 +1 @@
+aycabd
diff --git a/util/lexgen/tests/testdata/negation/output b/util/lexgen/tests/testdata/negation/output
new file mode 100644
index 0000000000..0b73263fb9
--- /dev/null
+++ b/util/lexgen/tests/testdata/negation/output
@@ -0,0 +1,2 @@
+TOK_A|ayc
+TOK_B|abd
diff --git a/util/lexgen/tests/testdata/negation/rules.lexgen b/util/lexgen/tests/testdata/negation/rules.lexgen
new file mode 100644
index 0000000000..179810b3a0
--- /dev/null
+++ b/util/lexgen/tests/testdata/negation/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_A = a[^b]c
+TOK_B = abd
diff --git a/util/lexgen/tests/testdata/quoteinset/input b/util/lexgen/tests/testdata/quoteinset/input
new file mode 100644
index 0000000000..5a9b6804a9
--- /dev/null
+++ b/util/lexgen/tests/testdata/quoteinset/input
@@ -0,0 +1 @@
+"a
diff --git a/util/lexgen/tests/testdata/quoteinset/output b/util/lexgen/tests/testdata/quoteinset/output
new file mode 100644
index 0000000000..7ba8890d5d
--- /dev/null
+++ b/util/lexgen/tests/testdata/quoteinset/output
@@ -0,0 +1 @@
+TOK_QUOTEA|"a
diff --git a/util/lexgen/tests/testdata/quoteinset/rules.lexgen b/util/lexgen/tests/testdata/quoteinset/rules.lexgen
new file mode 100644
index 0000000000..9838276a7e
--- /dev/null
+++ b/util/lexgen/tests/testdata/quoteinset/rules.lexgen
@@ -0,0 +1,2 @@
+[Tokens]
+TOK_QUOTEA = ["]a
diff --git a/util/lexgen/tests/testdata/quotes/input b/util/lexgen/tests/testdata/quotes/input
new file mode 100644
index 0000000000..ac5445055d
--- /dev/null
+++ b/util/lexgen/tests/testdata/quotes/input
@@ -0,0 +1 @@
+quotedstring
diff --git a/util/lexgen/tests/testdata/quotes/output b/util/lexgen/tests/testdata/quotes/output
new file mode 100644
index 0000000000..c538e32d75
--- /dev/null
+++ b/util/lexgen/tests/testdata/quotes/output
@@ -0,0 +1 @@
+TOK_STR|quotedstring
diff --git a/util/lexgen/tests/testdata/quotes/rules.lexgen b/util/lexgen/tests/testdata/quotes/rules.lexgen
new file mode 100644
index 0000000000..d528cdd589
--- /dev/null
+++ b/util/lexgen/tests/testdata/quotes/rules.lexgen
@@ -0,0 +1,2 @@
+[Tokens]
+TOK_STR = "quotedstring"
diff --git a/util/lexgen/tests/testdata/simple/input b/util/lexgen/tests/testdata/simple/input
new file mode 100644
index 0000000000..acbe86c7c8
--- /dev/null
+++ b/util/lexgen/tests/testdata/simple/input
@@ -0,0 +1 @@
+abcd
diff --git a/util/lexgen/tests/testdata/simple/output b/util/lexgen/tests/testdata/simple/output
new file mode 100644
index 0000000000..a37a58ea31
--- /dev/null
+++ b/util/lexgen/tests/testdata/simple/output
@@ -0,0 +1,2 @@
+TOK_AB|ab
+TOK_CD|cd
diff --git a/util/lexgen/tests/testdata/simple/rules.lexgen b/util/lexgen/tests/testdata/simple/rules.lexgen
new file mode 100644
index 0000000000..5d958c429a
--- /dev/null
+++ b/util/lexgen/tests/testdata/simple/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_AB = ab
+TOK_CD = cd
diff --git a/util/lexgen/tests/testdata/subsets1/input b/util/lexgen/tests/testdata/subsets1/input
new file mode 100644
index 0000000000..47f66d1c94
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets1/input
@@ -0,0 +1 @@
+abaf
diff --git a/util/lexgen/tests/testdata/subsets1/output b/util/lexgen/tests/testdata/subsets1/output
new file mode 100644
index 0000000000..75dd9361bc
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets1/output
@@ -0,0 +1,2 @@
+TOK_AB|ab
+TOK_AZ|af
diff --git a/util/lexgen/tests/testdata/subsets1/rules.lexgen b/util/lexgen/tests/testdata/subsets1/rules.lexgen
new file mode 100644
index 0000000000..94f51a91e2
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets1/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_AB = ab
+TOK_AZ = a[a-z]
diff --git a/util/lexgen/tests/testdata/subsets2/input b/util/lexgen/tests/testdata/subsets2/input
new file mode 100644
index 0000000000..4d0dc3ad82
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets2/input
@@ -0,0 +1 @@
+abd
diff --git a/util/lexgen/tests/testdata/subsets2/output b/util/lexgen/tests/testdata/subsets2/output
new file mode 100644
index 0000000000..d5a7bc5bb7
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets2/output
@@ -0,0 +1,3 @@
+TOK_A|a
+TOK_B|b
+TOK_D|d
diff --git a/util/lexgen/tests/testdata/subsets2/rules.lexgen b/util/lexgen/tests/testdata/subsets2/rules.lexgen
new file mode 100644
index 0000000000..e0a76294bc
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets2/rules.lexgen
@@ -0,0 +1,4 @@
+[Tokens]
+TOK_D = [abcd]
+TOK_B = [bc]
+TOK_A = a
diff --git a/util/lexgen/tests/tests.pro b/util/lexgen/tests/tests.pro
new file mode 100644
index 0000000000..eb04439a13
--- /dev/null
+++ b/util/lexgen/tests/tests.pro
@@ -0,0 +1,6 @@
+CONFIG += qtestlib
+SOURCES += tst_lexgen.cpp
+TARGET = tst_lexgen
+include(../lexgen.pri)
+QT = core
+DEFINES += SRCDIR=\\\"$$PWD\\\"
diff --git a/util/lexgen/tests/tst_lexgen.cpp b/util/lexgen/tests/tst_lexgen.cpp
new file mode 100644
index 0000000000..c80de646d6
--- /dev/null
+++ b/util/lexgen/tests/tst_lexgen.cpp
@@ -0,0 +1,285 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QtTest/QtTest>
+#define AUTOTEST
+#include "../main.cpp"
+
+class tst_LexGen : public QObject
+{
+    Q_OBJECT
+private slots:
+    void nfa_singleInput();
+    void nfa_alternating();
+    void nfa_concatenating();
+    void nfa_optional();
+    void nfa_toDFA_data();
+    void nfa_toDFA();
+    void lexgen_data();
+    void lexgen();
+};
+
+void tst_LexGen::nfa_singleInput()
+{
+    NFA nfa = NFA::createSingleInputNFA('a');
+
+    QCOMPARE(nfa.initialState, 0);
+    QCOMPARE(nfa.finalState, 1);
+
+    QCOMPARE(nfa.states.count(), 2);
+
+    QCOMPARE(nfa.states.at(0).transitions.count(), 1);
+    QVERIFY(nfa.states.at(0).transitions.contains('a'));
+    QCOMPARE(nfa.states.at(0).transitions.values('a').count(), 1);
+    QCOMPARE(nfa.states.at(0).transitions.value('a'), nfa.finalState);
+
+    QVERIFY(nfa.states.at(1).transitions.isEmpty());
+}
+
+void tst_LexGen::nfa_alternating()
+{
+    NFA a = NFA::createSingleInputNFA('a');
+    NFA b = NFA::createSingleInputNFA('b');
+    NFA nfa = NFA::createAlternatingNFA(a, b);
+
+    const int initialA = 1;
+    const int finalA = 2;
+
+    const int initialB = 3;
+    const int finalB = 4;
+
+    QCOMPARE(nfa.states.count(), 6);
+
+    QCOMPARE(nfa.initialState, 0);
+    QCOMPARE(nfa.finalState, 5);
+
+    QList<int> initialTransitions = nfa.states.at(0).transitions.values(Epsilon);
+    QCOMPARE(initialTransitions.count(), 2);
+    QVERIFY(initialTransitions.contains(initialA));
+    QVERIFY(initialTransitions.contains(initialB));
+
+    // no need to test the individual a and b NFAs, the other
+    // autotest already takes care of that. Just check whether
+    // the epsilon transitions to the final state exist.
+
+    QCOMPARE(nfa.states.at(finalA).transitions.count(), 1);
+    QCOMPARE(nfa.states.at(finalA).transitions.values(Epsilon).count(), 1);
+    QCOMPARE(nfa.states.at(finalA).transitions.value(Epsilon), nfa.finalState);
+
+    QCOMPARE(nfa.states.at(finalB).transitions.count(), 1);
+    QCOMPARE(nfa.states.at(finalB).transitions.values(Epsilon).count(), 1);
+    QCOMPARE(nfa.states.at(finalB).transitions.value(Epsilon), nfa.finalState);
+}
+
+void tst_LexGen::nfa_concatenating()
+{
+    NFA a = NFA::createSingleInputNFA('a');
+    NFA b = NFA::createSingleInputNFA('b');
+    NFA nfa = NFA::createConcatenatingNFA(a, b);
+
+    const int initialA = 1;
+    const int finalA = 2;
+
+    const int initialB = 3;
+    const int finalB = 4;
+
+    QCOMPARE(nfa.states.count(), 6);
+
+    QCOMPARE(nfa.initialState, 0);
+    QCOMPARE(nfa.finalState, 5);
+
+    QCOMPARE(nfa.states.at(0).transitions.count(), 1);
+    QCOMPARE(nfa.states.at(0).transitions.values(Epsilon).count(), 1);
+    QCOMPARE(nfa.states.at(0).transitions.value(Epsilon), initialA);
+
+    QCOMPARE(nfa.states.at(finalA).transitions.values(Epsilon).count(), 1);
+    QCOMPARE(nfa.states.at(finalA).transitions.value(Epsilon), initialB);
+
+    QCOMPARE(nfa.states.at(finalB).transitions.values(Epsilon).count(), 1);
+    QCOMPARE(nfa.states.at(finalB).transitions.value(Epsilon), nfa.finalState);
+}
+
+void tst_LexGen::nfa_optional()
+{
+    NFA a = NFA::createSingleInputNFA('a');
+    NFA nfa = NFA::createOptionalNFA(a);
+
+    const int initialA = 1;
+    const int finalA = 2;
+
+    QCOMPARE(nfa.states.count(), 4);
+
+    QCOMPARE(nfa.initialState, 0);
+    QCOMPARE(nfa.finalState, 3);
+
+    QCOMPARE(nfa.states.at(0).transitions.count(), 2);
+    QList<int> initialTransitions = nfa.states.at(0).transitions.values(Epsilon);
+    QVERIFY(initialTransitions.contains(nfa.finalState));
+    QVERIFY(initialTransitions.contains(initialA));
+
+    QList<int> finalEpsilonATransitions = nfa.states.at(finalA).transitions.values(Epsilon);
+    QVERIFY(finalEpsilonATransitions.contains(initialA));
+    QVERIFY(finalEpsilonATransitions.contains(nfa.finalState));
+}
+
+Q_DECLARE_METATYPE(NFA);
+Q_DECLARE_METATYPE(DFA);
+
+void tst_LexGen::nfa_toDFA_data()
+{
+    QTest::addColumn<NFA>("nfa");
+    QTest::addColumn<DFA>("expectedDFA");
+
+    NFA a = NFA::createSingleInputNFA('a');
+    NFA b = NFA::createSingleInputNFA('b');
+    NFA c = NFA::createSingleInputNFA('c');
+
+    NFA nfa;
+    DFA dfa;
+
+    dfa.clear();
+    dfa.resize(3);
+    dfa[0].transitions.insert('a', 1);
+    dfa[1].transitions.insert('b', 2);
+
+    nfa = NFA::createConcatenatingNFA(a, b);
+
+    QTest::newRow("simple concat") << nfa << dfa;
+
+    dfa.clear();
+    dfa.resize(3);
+    dfa[0].transitions.insert('a', 1);
+    dfa[0].transitions.insert('b', 2);
+
+    nfa = NFA::createAlternatingNFA(a, b);
+
+    QTest::newRow("simple alternate") << nfa << dfa;
+
+}
+
+void tst_LexGen::nfa_toDFA()
+{
+    QFETCH(NFA, nfa);
+    QFETCH(DFA, expectedDFA);
+
+    DFA dfa = nfa.toDFA();
+
+    QCOMPARE(dfa.count(), expectedDFA.count());
+    for (int i = 0; i < dfa.count(); ++i) {
+        if (dfa.at(i).transitions != expectedDFA.at(i).transitions) {
+            qDebug() << "DFAs differ in state" << i;
+            qDebug() << "NFA:";
+            nfa.debug();
+            qDebug() << "Actual DFA:";
+            dfa.debug();
+            qDebug() << "Expected DFA:";
+            expectedDFA.debug();
+            QVERIFY(false);
+        }
+    }
+}
+
+void tst_LexGen::lexgen_data()
+{
+    QTest::addColumn<QString>("ruleFile");
+    QTest::addColumn<QString>("input");
+    QTest::addColumn<QString>("expectedOutput");
+
+    QDir d(QString(SRCDIR));
+    d.cd("testdata");
+    foreach (QString test, d.entryList(QDir::Dirs | QDir::NoDotAndDotDot)) {
+        QString dir = d.absoluteFilePath(test) + '/';
+        QTest::newRow(qPrintable(test)) 
+            << dir + "rules.lexgen"
+            << dir + "input"
+            << dir + "output"
+            ;
+    }
+}
+
+void tst_LexGen::lexgen()
+{
+    QFETCH(QString, ruleFile);
+    QFETCH(QString, input);
+    QFETCH(QString, expectedOutput);
+
+    Config conf;
+    QVERIFY(loadConfig(ruleFile, &conf));
+    DFA dfa = generateMachine(conf);
+    QVERIFY(!dfa.isEmpty());
+    conf.debug = true;
+
+    QFile f(input);
+    QVERIFY(f.open(QIODevice::ReadOnly));
+    input = QString::fromUtf8(f.readAll());
+    f.close();
+    if (input.endsWith(QLatin1Char('\n')))
+        input.chop(1);
+//    machine.debug();
+    bool ok = false;
+    QList<Symbol> symbols = tokenize(dfa, input, &conf, &ok);
+    QVERIFY(ok);
+    f.setFileName(expectedOutput);
+    QVERIFY(f.open(QIODevice::ReadOnly));
+    QStringList lines;
+    while (!f.atEnd()) {
+        QString line = QString::fromUtf8(f.readLine());
+        if (line.endsWith(QLatin1Char('\n')))
+            line.chop(1);
+        lines << line;
+    }
+    f.close();
+
+//    dfa.debug();
+    QCOMPARE(lines.count(), symbols.count());
+
+    for (int i = 0; i < lines.count(); ++i) {
+        QStringList l = lines.at(i).split(QChar::fromLatin1('|'));
+        QCOMPARE(l.count(), 2);
+        QString expectedToken = l.at(0);
+        QString expectedLexem = l.at(1);
+        QCOMPARE(symbols.at(i).token, expectedToken);
+        QCOMPARE(symbols.at(i).lexem, expectedLexem);
+    }
+}
+
+QTEST_MAIN(tst_LexGen)
+#include "tst_lexgen.moc"
diff --git a/util/lexgen/tokenizer.cpp b/util/lexgen/tokenizer.cpp
new file mode 100644
index 0000000000..39fc7ca1ef
--- /dev/null
+++ b/util/lexgen/tokenizer.cpp
@@ -0,0 +1,237 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+// auto generated. DO NOT EDIT.
+class RegExpTokenizer
+{
+public:
+    RegExpTokenizer(const QString &inp);
+
+    inline QChar next() {
+        return (pos < input.length()) ? input.at(pos++) : QChar();
+    }
+    int lex();
+
+    QString input;
+    int pos;
+    int lexemStart;
+    int lexemLength;
+};
+
+RegExpTokenizer::RegExpTokenizer(const QString &inp)
+{
+    input = inp;
+    pos = 0;
+    lexemStart = 0;
+    lexemLength = 0;
+}
+
+
+int RegExpTokenizer::lex()
+{
+    lexemStart = pos;
+    lexemLength = 0;
+    int lastAcceptingPos = -1;
+    int token = -1;
+    QChar ch;
+    
+    // initial state
+        ch = next();
+        if (ch.unicode() >= 1 && ch.unicode() <= 33)
+            goto state_1;
+        if (ch.unicode() == 34)
+            goto state_2;
+        if (ch.unicode() >= 35 && ch.unicode() <= 39)
+            goto state_1;
+        if (ch.unicode() == 40) {
+            token = RE2NFA::TOK_LPAREN;
+            goto found;
+        }
+        if (ch.unicode() == 41) {
+            token = RE2NFA::TOK_RPAREN;
+            goto found;
+        }
+        if (ch.unicode() == 42) {
+            token = RE2NFA::TOK_STAR;
+            goto found;
+        }
+        if (ch.unicode() == 43) {
+            token = RE2NFA::TOK_PLUS;
+            goto found;
+        }
+        if (ch.unicode() == 44) {
+            token = RE2NFA::TOK_COMMA;
+            goto found;
+        }
+        if (ch.unicode() == 45)
+            goto state_1;
+        if (ch.unicode() == 46) {
+            token = RE2NFA::TOK_DOT;
+            goto found;
+        }
+        if (ch.unicode() >= 47 && ch.unicode() <= 62)
+            goto state_1;
+        if (ch.unicode() == 63) {
+            token = RE2NFA::TOK_QUESTION;
+            goto found;
+        }
+        if (ch.unicode() >= 64 && ch.unicode() <= 90)
+            goto state_1;
+        if (ch.unicode() == 91)
+            goto state_10;
+        if (ch.unicode() == 92)
+            goto state_11;
+        if (ch.unicode() >= 93 && ch.unicode() <= 122)
+            goto state_1;
+        if (ch.unicode() == 123) {
+            token = RE2NFA::TOK_LBRACE;
+            goto found;
+        }
+        if (ch.unicode() == 124) {
+            token = RE2NFA::TOK_OR;
+            goto found;
+        }
+        if (ch.unicode() == 125) {
+            token = RE2NFA::TOK_RBRACE;
+            goto found;
+        }
+        if (ch.unicode() >= 126)
+            goto state_1;
+        goto out;
+    state_1:
+        lastAcceptingPos = pos;
+        token = RE2NFA::TOK_STRING;
+        goto out;
+    state_2:
+        lastAcceptingPos = pos;
+        token = RE2NFA::TOK_STRING;
+        ch = next();
+        if (ch.unicode() >= 1 && ch.unicode() <= 33)
+            goto state_15;
+        if (ch.unicode() == 34)
+            goto state_16;
+        if (ch.unicode() >= 35)
+            goto state_15;
+        goto out;
+    state_10:
+        ch = next();
+        if (ch.unicode() >= 1 && ch.unicode() <= 91)
+            goto state_17;
+        if (ch.unicode() == 92)
+            goto state_18;
+        if (ch.unicode() == 93)
+            goto state_19;
+        if (ch.unicode() >= 94)
+            goto state_17;
+        goto out;
+    state_11:
+        lastAcceptingPos = pos;
+        token = RE2NFA::TOK_STRING;
+        ch = next();
+        if (ch.unicode() >= 1)
+            goto state_20;
+        goto out;
+    state_15:
+        ch = next();
+        if (ch.unicode() >= 1 && ch.unicode() <= 33)
+            goto state_15;
+        if (ch.unicode() == 34)
+            goto state_16;
+        if (ch.unicode() >= 35)
+            goto state_15;
+        goto out;
+    state_16:
+        lastAcceptingPos = pos;
+        token = RE2NFA::TOK_QUOTED_STRING;
+        goto out;
+    state_17:
+        ch = next();
+        if (ch.unicode() >= 1 && ch.unicode() <= 91)
+            goto state_17;
+        if (ch.unicode() == 92)
+            goto state_18;
+        if (ch.unicode() == 93)
+            goto state_19;
+        if (ch.unicode() >= 94)
+            goto state_17;
+        goto out;
+    state_18:
+        ch = next();
+        if (ch.unicode() >= 1 && ch.unicode() <= 91)
+            goto state_17;
+        if (ch.unicode() == 92)
+            goto state_18;
+        if (ch.unicode() == 93)
+            goto state_21;
+        if (ch.unicode() >= 94)
+            goto state_17;
+        goto out;
+    state_19:
+        lastAcceptingPos = pos;
+        token = RE2NFA::TOK_SEQUENCE;
+        goto out;
+    state_20:
+        lastAcceptingPos = pos;
+        token = RE2NFA::TOK_STRING;
+        goto out;
+    state_21:
+        lastAcceptingPos = pos;
+        token = RE2NFA::TOK_SEQUENCE;
+        ch = next();
+        if (ch.unicode() >= 1 && ch.unicode() <= 91)
+            goto state_17;
+        if (ch.unicode() == 92)
+            goto state_18;
+        if (ch.unicode() == 93)
+            goto state_19;
+        if (ch.unicode() >= 94)
+            goto state_17;
+        goto out;
+    found:
+    lastAcceptingPos = pos;
+    
+    out:
+    if (lastAcceptingPos != -1) {
+        lexemLength = lastAcceptingPos - lexemStart;
+        pos = lastAcceptingPos;
+    }
+    return token;
+}
+