diff options
author | Liang Qi <liang.qi@theqtcompany.com> | 2015-10-23 14:01:35 +0200 |
---|---|---|
committer | Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com> | 2015-10-23 14:45:03 +0200 |
commit | 790aef362fd195adf97d8c780a7cbbbade27d51f (patch) | |
tree | 8be464687ab21806cfe9f7ada27098b563aa41b2 /src/tools/qdoc/tokenizer.cpp | |
parent | 9720efbd1035c2e939b0581163e6d804c713dd96 (diff) | |
parent | 07475c662eb73c833da2d461b8ef2702ca1e2cfb (diff) |
Merge remote-tracking branch 'origin/5.6' into dev
Conflicts:
.qmake.conf
configure
src/corelib/global/qglobal.h
src/tools/qdoc/node.cpp
src/tools/qdoc/qdocdatabase.cpp
tests/auto/corelib/io/qsettings/tst_qsettings.cpp
tools/configure/configureapp.cpp
Change-Id: I66028ae5e441a06b73ee85ba72a03a3af3e8593f
Diffstat (limited to 'src/tools/qdoc/tokenizer.cpp')
-rw-r--r-- | src/tools/qdoc/tokenizer.cpp | 799 |
1 files changed, 0 insertions, 799 deletions
diff --git a/src/tools/qdoc/tokenizer.cpp b/src/tools/qdoc/tokenizer.cpp deleted file mode 100644 index 987fff548c..0000000000 --- a/src/tools/qdoc/tokenizer.cpp +++ /dev/null @@ -1,799 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2015 The Qt Company Ltd. -** Contact: http://www.qt.io/licensing/ -** -** This file is part of the tools applications of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL21$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see http://www.qt.io/terms-conditions. For further -** information use the contact form at http://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 2.1 or version 3 as published by the Free -** Software Foundation and appearing in the file LICENSE.LGPLv21 and -** LICENSE.LGPLv3 included in the packaging of this file. Please review the -** following information to ensure the GNU Lesser General Public License -** requirements will be met: https://www.gnu.org/licenses/lgpl.html and -** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. -** -** As a special exception, The Qt Company gives you certain additional -** rights. These rights are described in The Qt Company LGPL Exception -** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include "config.h" -#include "tokenizer.h" -#include "generator.h" - -#include <qfile.h> -#include <qhash.h> -#include <qregexp.h> -#include <qstring.h> -#include <qtextcodec.h> - -#include <ctype.h> -#include <string.h> - -QT_BEGIN_NAMESPACE - -#define LANGUAGE_CPP "Cpp" - -/* qmake ignore Q_OBJECT */ - -/* - Keep in sync with tokenizer.h. -*/ -static const char *kwords[] = { - "char", "class", "const", "double", "enum", "explicit", - "friend", "inline", "int", "long", "namespace", "operator", - "private", "protected", "public", "short", "signals", "signed", - "slots", "static", "struct", "template", "typedef", "typename", - "union", "unsigned", "using", "virtual", "void", "volatile", - "__int64", - "Q_OBJECT", - "Q_OVERRIDE", - "Q_PROPERTY", - "Q_PRIVATE_PROPERTY", - "Q_DECLARE_SEQUENTIAL_ITERATOR", - "Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR", - "Q_DECLARE_ASSOCIATIVE_ITERATOR", - "Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR", - "Q_DECLARE_FLAGS", - "Q_SIGNALS", - "Q_SLOTS", - "QT_COMPAT", - "QT_COMPAT_CONSTRUCTOR", - "QT_DEPRECATED", - "QT_MOC_COMPAT", - "QT_MODULE", - "QT3_SUPPORT", - "QT3_SUPPORT_CONSTRUCTOR", - "QT3_MOC_SUPPORT", - "QDOC_PROPERTY", - "QPrivateSignal" -}; - -static const int KwordHashTableSize = 4096; -static int kwordHashTable[KwordHashTableSize]; - -static QHash<QByteArray, bool> *ignoredTokensAndDirectives = 0; - -static QRegExp *comment = 0; -static QRegExp *versionX = 0; -static QRegExp *definedX = 0; - -static QRegExp *defines = 0; -static QRegExp *falsehoods = 0; - -#ifndef QT_NO_TEXTCODEC -static QTextCodec *sourceCodec = 0; -#endif - -/* - This function is a perfect hash function for the 37 keywords of C99 - (with a hash table size of 512). It should perform well on our - Qt-enhanced C++ subset. -*/ -static int hashKword(const char *s, int len) -{ - return (((uchar) s[0]) + (((uchar) s[2]) << 5) + - (((uchar) s[len - 1]) << 3)) % KwordHashTableSize; -} - -static void insertKwordIntoHash(const char *s, int number) -{ - int k = hashKword(s, int(strlen(s))); - while (kwordHashTable[k]) { - if (++k == KwordHashTableSize) - k = 0; - } - kwordHashTable[k] = number; -} - -Tokenizer::Tokenizer(const Location& loc, QFile &in) -{ - init(); - yyIn = in.readAll(); - yyPos = 0; - start(loc); -} - -Tokenizer::Tokenizer(const Location& loc, const QByteArray &in) - : yyIn(in) -{ - init(); - yyPos = 0; - start(loc); -} - -Tokenizer::~Tokenizer() -{ - delete[] yyLexBuf1; - delete[] yyLexBuf2; -} - -int Tokenizer::getToken() -{ - char *t = yyPrevLex; - yyPrevLex = yyLex; - yyLex = t; - - while (yyCh != EOF) { - yyTokLoc = yyCurLoc; - yyLexLen = 0; - - if (isspace(yyCh)) { - do { - yyCh = getChar(); - } while (isspace(yyCh)); - } - else if (isalpha(yyCh) || yyCh == '_') { - do { - yyCh = getChar(); - } while (isalnum(yyCh) || yyCh == '_'); - - int k = hashKword(yyLex, int(yyLexLen)); - for (;;) { - int i = kwordHashTable[k]; - if (i == 0) { - return Tok_Ident; - } - else if (i == -1) { - if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) { - if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive - int parenDepth = 0; - while (yyCh != EOF && (yyCh != ')' || parenDepth > 1)) { - if (yyCh == '(') - ++parenDepth; - else if (yyCh == ')') - --parenDepth; - yyCh = getChar(); - } - if (yyCh == ')') - yyCh = getChar(); - } - break; - } - } - else if (strcmp(yyLex, kwords[i - 1]) == 0) { - int ret = (int) Tok_FirstKeyword + i - 1; - if (ret != Tok_typename) - return ret; - break; - } - - if (++k == KwordHashTableSize) - k = 0; - } - } - else if (isdigit(yyCh)) { - do { - yyCh = getChar(); - } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' || - yyCh == '-'); - return Tok_Number; - } - else { - switch (yyCh) { - case '!': - case '%': - yyCh = getChar(); - if (yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - case '"': - yyCh = getChar(); - - while (yyCh != EOF && yyCh != '"') { - if (yyCh == '\\') - yyCh = getChar(); - yyCh = getChar(); - } - yyCh = getChar(); - - if (yyCh == EOF) - yyTokLoc.warning(tr("Unterminated C++ string literal"), - tr("Maybe you forgot '/*!' at the beginning of the file?")); - else - return Tok_String; - break; - case '#': - return getTokenAfterPreprocessor(); - case '&': - yyCh = getChar(); - /* - Removed check for '&&', only interpret '&=' as an operator. - '&&' is also used for an rvalue reference. QTBUG-32675 - */ - if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } - else { - return Tok_Ampersand; - } - case '\'': - yyCh = getChar(); - /* - Allow empty character literal. QTBUG-25775 - */ - if (yyCh == '\'') { - yyCh = getChar(); - break; - } - if (yyCh == '\\') - yyCh = getChar(); - do { - yyCh = getChar(); - } while (yyCh != EOF && yyCh != '\''); - - if (yyCh == EOF) { - yyTokLoc.warning(tr("Unterminated C++ character literal")); - } - else { - yyCh = getChar(); - return Tok_Number; - } - break; - case '(': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyParenDepth++; - if (isspace(yyCh)) { - do { - yyCh = getChar(); - } while (isspace(yyCh)); - yyLexLen = 1; - yyLex[1] = '\0'; - } - if (yyCh == '*') { - yyCh = getChar(); - return Tok_LeftParenAster; - } - return Tok_LeftParen; - case ')': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyParenDepth--; - return Tok_RightParen; - case '*': - yyCh = getChar(); - if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_Aster; - } - case '^': - yyCh = getChar(); - if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_Caret; - } - case '+': - yyCh = getChar(); - if (yyCh == '+' || yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - case ',': - yyCh = getChar(); - return Tok_Comma; - case '-': - yyCh = getChar(); - if (yyCh == '-' || yyCh == '=') { - yyCh = getChar(); - } else if (yyCh == '>') { - yyCh = getChar(); - if (yyCh == '*') - yyCh = getChar(); - } - return Tok_SomeOperator; - case '.': - yyCh = getChar(); - if (yyCh == '*') { - yyCh = getChar(); - } else if (yyCh == '.') { - do { - yyCh = getChar(); - } while (yyCh == '.'); - return Tok_Ellipsis; - } else if (isdigit(yyCh)) { - do { - yyCh = getChar(); - } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' || - yyCh == '-'); - return Tok_Number; - } - return Tok_SomeOperator; - case '/': - yyCh = getChar(); - if (yyCh == '/') { - do { - yyCh = getChar(); - } while (yyCh != EOF && yyCh != '\n'); - } else if (yyCh == '*') { - bool metDoc = false; // empty doc is no doc - bool metSlashAsterBang = false; - bool metAster = false; - bool metAsterSlash = false; - - yyCh = getChar(); - if (yyCh == '!') - metSlashAsterBang = true; - - while (!metAsterSlash) { - if (yyCh == EOF) { - yyTokLoc.warning(tr("Unterminated C++ comment")); - break; - } else { - if (yyCh == '*') { - metAster = true; - } else if (metAster && yyCh == '/') { - metAsterSlash = true; - } else { - metAster = false; - if (isgraph(yyCh)) - metDoc = true; - } - } - yyCh = getChar(); - } - if (metSlashAsterBang && metDoc) - return Tok_Doc; - else if (yyParenDepth > 0) - return Tok_Comment; - } else { - if (yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - } - break; - case ':': - yyCh = getChar(); - if (yyCh == ':') { - yyCh = getChar(); - return Tok_Gulbrandsen; - } else { - return Tok_Colon; - } - case ';': - yyCh = getChar(); - return Tok_Semicolon; - case '<': - yyCh = getChar(); - if (yyCh == '<') { - yyCh = getChar(); - if (yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - } else if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_LeftAngle; - } - case '=': - yyCh = getChar(); - if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_Equal; - } - case '>': - yyCh = getChar(); - if (yyCh == '>') { - yyCh = getChar(); - if (yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - } else if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_RightAngle; - } - case '?': - yyCh = getChar(); - return Tok_SomeOperator; - case '[': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyBracketDepth++; - return Tok_LeftBracket; - case '\\': - yyCh = getChar(); - yyCh = getChar(); // skip one character - break; - case ']': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyBracketDepth--; - return Tok_RightBracket; - case '{': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyBraceDepth++; - return Tok_LeftBrace; - case '}': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyBraceDepth--; - return Tok_RightBrace; - case '|': - yyCh = getChar(); - if (yyCh == '|' || yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - case '~': - yyCh = getChar(); - return Tok_Tilde; - case '@': - yyCh = getChar(); - return Tok_At; - default: - // ### We should really prevent qdoc from looking at snippet files rather than - // ### suppress warnings when reading them. - if (yyNumPreprocessorSkipping == 0 && !(yyTokLoc.fileName().endsWith(".qdoc") || - yyTokLoc.fileName().endsWith(".js"))) { - yyTokLoc.warning(tr("Hostile character 0x%1 in C++ source") - .arg((uchar)yyCh, 1, 16)); - } - yyCh = getChar(); - } - } - } - - if (yyPreprocessorSkipping.count() > 1) { - yyTokLoc.warning(tr("Expected #endif before end of file")); - // clear it out or we get an infinite loop! - while (!yyPreprocessorSkipping.isEmpty()) { - popSkipping(); - } - } - - strcpy(yyLex, "end-of-input"); - yyLexLen = strlen(yyLex); - return Tok_Eoi; -} - -void Tokenizer::initialize(const Config &config) -{ - QString versionSym = config.getString(CONFIG_VERSIONSYM); - - QString sourceEncoding = config.getString(CONFIG_SOURCEENCODING); - if (sourceEncoding.isEmpty()) - sourceEncoding = QLatin1String("ISO-8859-1"); -#ifndef QT_NO_TEXTCODEC - sourceCodec = QTextCodec::codecForName(sourceEncoding.toLocal8Bit()); -#endif - - comment = new QRegExp("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)"); - comment->setMinimal(true); - versionX = new QRegExp("$cannot possibly match^"); - if (!versionSym.isEmpty()) - versionX->setPattern("[ \t]*(?:" + QRegExp::escape(versionSym) - + ")[ \t]+\"([^\"]*)\"[ \t]*"); - definedX = new QRegExp("defined ?\\(?([A-Z_0-9a-z]+) ?\\)"); - - QStringList d = config.getStringList(CONFIG_DEFINES); - d += "qdoc"; - defines = new QRegExp(d.join('|')); - falsehoods = new QRegExp(config.getStringList(CONFIG_FALSEHOODS).join('|')); - - /* - The keyword hash table is always cleared before any words are inserted. - */ - memset(kwordHashTable, 0, sizeof(kwordHashTable)); - for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++) - insertKwordIntoHash(kwords[i], i + 1); - - ignoredTokensAndDirectives = new QHash<QByteArray, bool>; - - QStringList tokens = config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS); - foreach (const QString &t, tokens) { - const QByteArray tb = t.toLatin1(); - ignoredTokensAndDirectives->insert(tb, false); - insertKwordIntoHash(tb.data(), -1); - } - - QStringList directives = config.getStringList(LANGUAGE_CPP + Config::dot - + CONFIG_IGNOREDIRECTIVES); - foreach (const QString &d, directives) { - const QByteArray db = d.toLatin1(); - ignoredTokensAndDirectives->insert(db, true); - insertKwordIntoHash(db.data(), -1); - } -} - -/*! - The heap allocated variables are freed here. The keyword - hash table is not cleared here, but it is cleared in the - initialize() function, before any keywords are inserted. - */ -void Tokenizer::terminate() -{ - delete comment; - comment = 0; - delete versionX; - versionX = 0; - delete definedX; - definedX = 0; - delete defines; - defines = 0; - delete falsehoods; - falsehoods = 0; - delete ignoredTokensAndDirectives; - ignoredTokensAndDirectives = 0; -} - -void Tokenizer::init() -{ - yyLexBuf1 = new char[(int) yyLexBufSize]; - yyLexBuf2 = new char[(int) yyLexBufSize]; - yyPrevLex = yyLexBuf1; - yyPrevLex[0] = '\0'; - yyLex = yyLexBuf2; - yyLex[0] = '\0'; - yyLexLen = 0; - yyPreprocessorSkipping.push(false); - yyNumPreprocessorSkipping = 0; - yyBraceDepth = 0; - yyParenDepth = 0; - yyBracketDepth = 0; - yyCh = '\0'; - parsingMacro = false; -} - -void Tokenizer::start(const Location& loc) -{ - yyTokLoc = loc; - yyCurLoc = loc; - yyCurLoc.start(); - strcpy(yyPrevLex, "beginning-of-input"); - strcpy(yyLex, "beginning-of-input"); - yyLexLen = strlen(yyLex); - yyBraceDepth = 0; - yyParenDepth = 0; - yyBracketDepth = 0; - yyCh = '\0'; - yyCh = getChar(); -} - -/* - Returns the next token, if # was met. This function interprets the - preprocessor directive, skips over any #ifdef'd out tokens, and returns the - token after all of that. -*/ -int Tokenizer::getTokenAfterPreprocessor() -{ - yyCh = getChar(); - while (isspace(yyCh) && yyCh != '\n') - yyCh = getChar(); - - /* - #directive condition - */ - QString directive; - QString condition; - - while (isalpha(yyCh)) { - directive += QChar(yyCh); - yyCh = getChar(); - } - if (!directive.isEmpty()) { - while (yyCh != EOF && yyCh != '\n') { - if (yyCh == '\\') { - yyCh = getChar(); - if (yyCh == '\r') - yyCh = getChar(); - } - condition += yyCh; - yyCh = getChar(); - } - condition.remove(*comment); - condition = condition.simplified(); - - /* - The #if, #ifdef, #ifndef, #elif, #else, and #endif - directives have an effect on the skipping stack. For - instance, if the code processed so far is - - #if 1 - #if 0 - #if 1 - // ... - #else - - the skipping stack contains, from bottom to top, false true - true (assuming 0 is false and 1 is true). If at least one - entry of the stack is true, the tokens are skipped. - - This mechanism is simple yet hard to understand. - */ - if (directive[0] == QChar('i')) { - if (directive == QString("if")) - pushSkipping(!isTrue(condition)); - else if (directive == QString("ifdef")) - pushSkipping(!defines->exactMatch(condition)); - else if (directive == QString("ifndef")) - pushSkipping(defines->exactMatch(condition)); - } else if (directive[0] == QChar('e')) { - if (directive == QString("elif")) { - bool old = popSkipping(); - if (old) - pushSkipping(!isTrue(condition)); - else - pushSkipping(true); - } else if (directive == QString("else")) { - pushSkipping(!popSkipping()); - } else if (directive == QString("endif")) { - popSkipping(); - } - } else if (directive == QString("define")) { - if (versionX->exactMatch(condition)) - yyVersion = versionX->cap(1); - } - } - - int tok; - do { - /* - We set yyLex now, and after getToken() this will be - yyPrevLex. This way, we skip over the preprocessor - directive. - */ - qstrcpy(yyLex, yyPrevLex); - - /* - If getToken() meets another #, it will call - getTokenAfterPreprocessor() once again, which could in turn - call getToken() again, etc. Unless there are 10,000 or so - preprocessor directives in a row, this shouldn't overflow - the stack. - */ - tok = getToken(); - } while (yyNumPreprocessorSkipping > 0 && tok != Tok_Eoi); - return tok; -} - -/* - Pushes a new skipping value onto the stack. This corresponds to entering a - new #if block. -*/ -void Tokenizer::pushSkipping(bool skip) -{ - yyPreprocessorSkipping.push(skip); - if (skip) - yyNumPreprocessorSkipping++; -} - -/* - Pops a skipping value from the stack. This corresponds to reaching a #endif. -*/ -bool Tokenizer::popSkipping() -{ - if (yyPreprocessorSkipping.isEmpty()) { - yyTokLoc.warning(tr("Unexpected #elif, #else or #endif")); - return true; - } - - bool skip = yyPreprocessorSkipping.pop(); - if (skip) - yyNumPreprocessorSkipping--; - return skip; -} - -/* - Returns \c true if the condition evaluates as true, otherwise false. The - condition is represented by a string. Unsophisticated parsing techniques are - used. The preprocessing method could be named StriNg-Oriented PreProcessing, - as SNOBOL stands for StriNg-Oriented symBOlic Language. -*/ -bool Tokenizer::isTrue(const QString &condition) -{ - int firstOr = -1; - int firstAnd = -1; - int parenDepth = 0; - - /* - Find the first logical operator at top level, but be careful - about precedence. Examples: - - X || Y // the or - X || Y || Z // the leftmost or - X || Y && Z // the or - X && Y || Z // the or - (X || Y) && Z // the and - */ - for (int i = 0; i < (int) condition.length() - 1; i++) { - QChar ch = condition[i]; - if (ch == QChar('(')) { - parenDepth++; - } else if (ch == QChar(')')) { - parenDepth--; - } else if (parenDepth == 0) { - if (condition[i + 1] == ch) { - if (ch == QChar('|')) { - firstOr = i; - break; - } else if (ch == QChar('&')) { - if (firstAnd == -1) - firstAnd = i; - } - } - } - } - if (firstOr != -1) - return isTrue(condition.left(firstOr)) || - isTrue(condition.mid(firstOr + 2)); - if (firstAnd != -1) - return isTrue(condition.left(firstAnd)) && - isTrue(condition.mid(firstAnd + 2)); - - QString t = condition.simplified(); - if (t.isEmpty()) - return true; - - if (t[0] == QChar('!')) - return !isTrue(t.mid(1)); - if (t[0] == QChar('(') && t.endsWith(QChar(')'))) - return isTrue(t.mid(1, t.length() - 2)); - - if (definedX->exactMatch(t)) - return defines->exactMatch(definedX->cap(1)); - else - return !falsehoods->exactMatch(t); -} - -QString Tokenizer::lexeme() const -{ -#ifndef QT_NO_TEXTCODEC - return sourceCodec->toUnicode(yyLex); -#else - return QString::fromUtf8(yyLex); -#endif -} - -QString Tokenizer::previousLexeme() const -{ -#ifndef QT_NO_TEXTCODEC - return sourceCodec->toUnicode(yyPrevLex); -#else - return QString::fromUtf8(yyPrevLex); -#endif -} - -QT_END_NAMESPACE |