diff options
Diffstat (limited to 'src/qdoc/tokenizer.cpp')
-rw-r--r-- | src/qdoc/tokenizer.cpp | 806 |
1 files changed, 0 insertions, 806 deletions
diff --git a/src/qdoc/tokenizer.cpp b/src/qdoc/tokenizer.cpp deleted file mode 100644 index dd2282995..000000000 --- a/src/qdoc/tokenizer.cpp +++ /dev/null @@ -1,806 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2019 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the tools applications of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include "tokenizer.h" - -#include "config.h" -#include "generator.h" - -#include <QtCore/qfile.h> -#include <QtCore/qhash.h> -#include <QtCore/qregularexpression.h> -#include <QtCore/qstring.h> -#include <QtCore/qstringconverter.h> - -#include <cctype> -#include <cstring> - -QT_BEGIN_NAMESPACE - -#define LANGUAGE_CPP "Cpp" - -/* qmake ignore Q_OBJECT */ - -/* - Keep in sync with tokenizer.h. -*/ -static const char *kwords[] = { "char", - "class", - "const", - "double", - "enum", - "explicit", - "friend", - "inline", - "int", - "long", - "namespace", - "operator", - "private", - "protected", - "public", - "short", - "signals", - "signed", - "slots", - "static", - "struct", - "template", - "typedef", - "typename", - "union", - "unsigned", - "using", - "virtual", - "void", - "volatile", - "__int64", - "default", - "delete", - "final", - "override", - "Q_OBJECT", - "Q_OVERRIDE", - "Q_PROPERTY", - "Q_PRIVATE_PROPERTY", - "Q_DECLARE_SEQUENTIAL_ITERATOR", - "Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR", - "Q_DECLARE_ASSOCIATIVE_ITERATOR", - "Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR", - "Q_DECLARE_FLAGS", - "Q_SIGNALS", - "Q_SLOTS", - "QT_COMPAT", - "QT_COMPAT_CONSTRUCTOR", - "QT_DEPRECATED", - "QT_MOC_COMPAT", - "QT_MODULE", - "QT3_SUPPORT", - "QT3_SUPPORT_CONSTRUCTOR", - "QT3_MOC_SUPPORT", - "QDOC_PROPERTY", - "QPrivateSignal" }; - -static const int KwordHashTableSize = 4096; -static int kwordHashTable[KwordHashTableSize]; - -static QHash<QByteArray, bool> *ignoredTokensAndDirectives = nullptr; - -static QRegularExpression *comment = nullptr; -static QRegularExpression *versionX = nullptr; -static QRegularExpression *definedX = nullptr; - -static QRegularExpression *defines = nullptr; -static QRegularExpression *falsehoods = nullptr; - -static QStringDecoder sourceDecoder; - -/* - This function is a perfect hash function for the 37 keywords of C99 - (with a hash table size of 512). It should perform well on our - Qt-enhanced C++ subset. -*/ -static int hashKword(const char *s, int len) -{ - return (((uchar)s[0]) + (((uchar)s[2]) << 5) + (((uchar)s[len - 1]) << 3)) % KwordHashTableSize; -} - -static void insertKwordIntoHash(const char *s, int number) -{ - int k = hashKword(s, int(strlen(s))); - while (kwordHashTable[k]) { - if (++k == KwordHashTableSize) - k = 0; - } - kwordHashTable[k] = number; -} - -Tokenizer::Tokenizer(const Location &loc, QFile &in) -{ - init(); - yyIn = in.readAll(); - yyPos = 0; - start(loc); -} - -Tokenizer::Tokenizer(const Location &loc, const QByteArray &in) : yyIn(in) -{ - init(); - yyPos = 0; - start(loc); -} - -Tokenizer::~Tokenizer() -{ - delete[] yyLexBuf1; - delete[] yyLexBuf2; -} - -int Tokenizer::getToken() -{ - char *t = yyPrevLex; - yyPrevLex = yyLex; - yyLex = t; - - while (yyCh != EOF) { - yyTokLoc = yyCurLoc; - yyLexLen = 0; - - if (isspace(yyCh)) { - do { - yyCh = getChar(); - } while (isspace(yyCh)); - } else if (isalpha(yyCh) || yyCh == '_') { - do { - yyCh = getChar(); - } while (isalnum(yyCh) || yyCh == '_'); - - int k = hashKword(yyLex, int(yyLexLen)); - for (;;) { - int i = kwordHashTable[k]; - if (i == 0) { - return Tok_Ident; - } else if (i == -1) { - if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) { - if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive - int parenDepth = 0; - while (yyCh != EOF && (yyCh != ')' || parenDepth > 1)) { - if (yyCh == '(') - ++parenDepth; - else if (yyCh == ')') - --parenDepth; - yyCh = getChar(); - } - if (yyCh == ')') - yyCh = getChar(); - } - break; - } - } else if (strcmp(yyLex, kwords[i - 1]) == 0) { - int ret = (int)Tok_FirstKeyword + i - 1; - if (ret != Tok_typename) - return ret; - break; - } - - if (++k == KwordHashTableSize) - k = 0; - } - } else if (isdigit(yyCh)) { - do { - yyCh = getChar(); - } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' || yyCh == '-'); - return Tok_Number; - } else { - switch (yyCh) { - case '!': - case '%': - yyCh = getChar(); - if (yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - case '"': - yyCh = getChar(); - - while (yyCh != EOF && yyCh != '"') { - if (yyCh == '\\') - yyCh = getChar(); - yyCh = getChar(); - } - yyCh = getChar(); - - if (yyCh == EOF) - yyTokLoc.warning( - QStringLiteral("Unterminated C++ string literal"), - QStringLiteral("Maybe you forgot '/*!' at the beginning of the file?")); - else - return Tok_String; - break; - case '#': - return getTokenAfterPreprocessor(); - case '&': - yyCh = getChar(); - /* - Removed check for '&&', only interpret '&=' as an operator. - '&&' is also used for an rvalue reference. QTBUG-32675 - */ - if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_Ampersand; - } - case '\'': - yyCh = getChar(); - /* - Allow empty character literal. QTBUG-25775 - */ - if (yyCh == '\'') { - yyCh = getChar(); - break; - } - if (yyCh == '\\') - yyCh = getChar(); - do { - yyCh = getChar(); - } while (yyCh != EOF && yyCh != '\''); - - if (yyCh == EOF) { - yyTokLoc.warning(QStringLiteral("Unterminated C++ character literal")); - } else { - yyCh = getChar(); - return Tok_Number; - } - break; - case '(': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyParenDepth++; - if (isspace(yyCh)) { - do { - yyCh = getChar(); - } while (isspace(yyCh)); - yyLexLen = 1; - yyLex[1] = '\0'; - } - if (yyCh == '*') { - yyCh = getChar(); - return Tok_LeftParenAster; - } - return Tok_LeftParen; - case ')': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyParenDepth--; - return Tok_RightParen; - case '*': - yyCh = getChar(); - if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_Aster; - } - case '^': - yyCh = getChar(); - if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_Caret; - } - case '+': - yyCh = getChar(); - if (yyCh == '+' || yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - case ',': - yyCh = getChar(); - return Tok_Comma; - case '-': - yyCh = getChar(); - if (yyCh == '-' || yyCh == '=') { - yyCh = getChar(); - } else if (yyCh == '>') { - yyCh = getChar(); - if (yyCh == '*') - yyCh = getChar(); - } - return Tok_SomeOperator; - case '.': - yyCh = getChar(); - if (yyCh == '*') { - yyCh = getChar(); - } else if (yyCh == '.') { - do { - yyCh = getChar(); - } while (yyCh == '.'); - return Tok_Ellipsis; - } else if (isdigit(yyCh)) { - do { - yyCh = getChar(); - } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' || yyCh == '-'); - return Tok_Number; - } - return Tok_SomeOperator; - case '/': - yyCh = getChar(); - if (yyCh == '/') { - do { - yyCh = getChar(); - } while (yyCh != EOF && yyCh != '\n'); - } else if (yyCh == '*') { - bool metDoc = false; // empty doc is no doc - bool metSlashAsterBang = false; - bool metAster = false; - bool metAsterSlash = false; - - yyCh = getChar(); - if (yyCh == '!') - metSlashAsterBang = true; - - while (!metAsterSlash) { - if (yyCh == EOF) { - yyTokLoc.warning(QStringLiteral("Unterminated C++ comment")); - break; - } else { - if (yyCh == '*') { - metAster = true; - } else if (metAster && yyCh == '/') { - metAsterSlash = true; - } else { - metAster = false; - if (isgraph(yyCh)) - metDoc = true; - } - } - yyCh = getChar(); - } - if (metSlashAsterBang && metDoc) - return Tok_Doc; - else if (yyParenDepth > 0) - return Tok_Comment; - } else { - if (yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - } - break; - case ':': - yyCh = getChar(); - if (yyCh == ':') { - yyCh = getChar(); - return Tok_Gulbrandsen; - } else { - return Tok_Colon; - } - case ';': - yyCh = getChar(); - return Tok_Semicolon; - case '<': - yyCh = getChar(); - if (yyCh == '<') { - yyCh = getChar(); - if (yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - } else if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_LeftAngle; - } - case '=': - yyCh = getChar(); - if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_Equal; - } - case '>': - yyCh = getChar(); - if (yyCh == '>') { - yyCh = getChar(); - if (yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - } else if (yyCh == '=') { - yyCh = getChar(); - return Tok_SomeOperator; - } else { - return Tok_RightAngle; - } - case '?': - yyCh = getChar(); - return Tok_SomeOperator; - case '[': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyBracketDepth++; - return Tok_LeftBracket; - case '\\': - yyCh = getChar(); - yyCh = getChar(); // skip one character - break; - case ']': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyBracketDepth--; - return Tok_RightBracket; - case '{': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyBraceDepth++; - return Tok_LeftBrace; - case '}': - yyCh = getChar(); - if (yyNumPreprocessorSkipping == 0) - yyBraceDepth--; - return Tok_RightBrace; - case '|': - yyCh = getChar(); - if (yyCh == '|' || yyCh == '=') - yyCh = getChar(); - return Tok_SomeOperator; - case '~': - yyCh = getChar(); - return Tok_Tilde; - case '@': - yyCh = getChar(); - return Tok_At; - default: - // ### We should really prevent qdoc from looking at snippet files rather than - // ### suppress warnings when reading them. - if (yyNumPreprocessorSkipping == 0 - && !(yyTokLoc.fileName().endsWith(".qdoc") - || yyTokLoc.fileName().endsWith(".js"))) { - yyTokLoc.warning(QStringLiteral("Hostile character 0x%1 in C++ source") - .arg((uchar)yyCh, 1, 16)); - } - yyCh = getChar(); - } - } - } - - if (yyPreprocessorSkipping.count() > 1) { - yyTokLoc.warning(QStringLiteral("Expected #endif before end of file")); - // clear it out or we get an infinite loop! - while (!yyPreprocessorSkipping.isEmpty()) { - popSkipping(); - } - } - - strcpy(yyLex, "end-of-input"); - yyLexLen = strlen(yyLex); - return Tok_Eoi; -} - -void Tokenizer::initialize() -{ - Config &config = Config::instance(); - QString versionSym = config.getString(CONFIG_VERSIONSYM); - - QString sourceEncoding = config.getString(CONFIG_SOURCEENCODING); - if (sourceEncoding.isEmpty()) - sourceEncoding = QLatin1String("UTF-8"); - sourceDecoder = QStringDecoder(sourceEncoding.toUtf8()); - if (!sourceDecoder.isValid()) { - qWarning() << "Source encoding" << sourceEncoding << "is not supported. Using UTF-8."; - sourceDecoder = QStringDecoder::Utf8; - } - - comment = new QRegularExpression("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)", QRegularExpression::InvertedGreedinessOption); - versionX = new QRegularExpression("$cannot possibly match^"); - if (!versionSym.isEmpty()) - versionX->setPattern("^[ \t]*(?:" + QRegularExpression::escape(versionSym) - + ")[ \t]+\"([^\"]*)\"[ \t]*$"); - definedX = new QRegularExpression("^defined ?\\(?([A-Z_0-9a-z]+) ?\\)?$"); - - QStringList d = config.getStringList(CONFIG_DEFINES); - d += "qdoc"; - defines = new QRegularExpression(QRegularExpression::anchoredPattern(d.join('|'))); - falsehoods = new QRegularExpression(QRegularExpression::anchoredPattern(config.getStringList(CONFIG_FALSEHOODS).join('|'))); - - /* - The keyword hash table is always cleared before any words are inserted. - */ - memset(kwordHashTable, 0, sizeof(kwordHashTable)); - for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++) - insertKwordIntoHash(kwords[i], i + 1); - - ignoredTokensAndDirectives = new QHash<QByteArray, bool>; - - const QStringList tokens = - config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS); - for (const auto &token : tokens) { - const QByteArray tb = token.toLatin1(); - ignoredTokensAndDirectives->insert(tb, false); - insertKwordIntoHash(tb.data(), -1); - } - - const QStringList directives = - config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNOREDIRECTIVES); - for (const auto &directive : directives) { - const QByteArray db = directive.toLatin1(); - ignoredTokensAndDirectives->insert(db, true); - insertKwordIntoHash(db.data(), -1); - } -} - -/*! - The heap allocated variables are freed here. The keyword - hash table is not cleared here, but it is cleared in the - initialize() function, before any keywords are inserted. - */ -void Tokenizer::terminate() -{ - delete comment; - comment = nullptr; - delete versionX; - versionX = nullptr; - delete definedX; - definedX = nullptr; - delete defines; - defines = nullptr; - delete falsehoods; - falsehoods = nullptr; - delete ignoredTokensAndDirectives; - ignoredTokensAndDirectives = nullptr; -} - -void Tokenizer::init() -{ - yyLexBuf1 = new char[(int)yyLexBufSize]; - yyLexBuf2 = new char[(int)yyLexBufSize]; - yyPrevLex = yyLexBuf1; - yyPrevLex[0] = '\0'; - yyLex = yyLexBuf2; - yyLex[0] = '\0'; - yyLexLen = 0; - yyPreprocessorSkipping.push(false); - yyNumPreprocessorSkipping = 0; - yyBraceDepth = 0; - yyParenDepth = 0; - yyBracketDepth = 0; - yyCh = '\0'; - parsingMacro = false; -} - -void Tokenizer::start(const Location &loc) -{ - yyTokLoc = loc; - yyCurLoc = loc; - yyCurLoc.start(); - strcpy(yyPrevLex, "beginning-of-input"); - strcpy(yyLex, "beginning-of-input"); - yyLexLen = strlen(yyLex); - yyBraceDepth = 0; - yyParenDepth = 0; - yyBracketDepth = 0; - yyCh = '\0'; - yyCh = getChar(); -} - -/* - Returns the next token, if # was met. This function interprets the - preprocessor directive, skips over any #ifdef'd out tokens, and returns the - token after all of that. -*/ -int Tokenizer::getTokenAfterPreprocessor() -{ - yyCh = getChar(); - while (isspace(yyCh) && yyCh != '\n') - yyCh = getChar(); - - /* - #directive condition - */ - QString directive; - QString condition; - - while (isalpha(yyCh)) { - directive += QChar(yyCh); - yyCh = getChar(); - } - if (!directive.isEmpty()) { - while (yyCh != EOF && yyCh != '\n') { - if (yyCh == '\\') { - yyCh = getChar(); - if (yyCh == '\r') - yyCh = getChar(); - } - condition += yyCh; - yyCh = getChar(); - } - condition.remove(*comment); - condition = condition.simplified(); - - /* - The #if, #ifdef, #ifndef, #elif, #else, and #endif - directives have an effect on the skipping stack. For - instance, if the code processed so far is - - #if 1 - #if 0 - #if 1 - // ... - #else - - the skipping stack contains, from bottom to top, false true - true (assuming 0 is false and 1 is true). If at least one - entry of the stack is true, the tokens are skipped. - - This mechanism is simple yet hard to understand. - */ - if (directive[0] == QChar('i')) { - if (directive == QString("if")) - pushSkipping(!isTrue(condition)); - else if (directive == QString("ifdef")) - pushSkipping(!defines->match(condition).hasMatch()); - else if (directive == QString("ifndef")) - pushSkipping(defines->match(condition).hasMatch()); - } else if (directive[0] == QChar('e')) { - if (directive == QString("elif")) { - bool old = popSkipping(); - if (old) - pushSkipping(!isTrue(condition)); - else - pushSkipping(true); - } else if (directive == QString("else")) { - pushSkipping(!popSkipping()); - } else if (directive == QString("endif")) { - popSkipping(); - } - } else if (directive == QString("define")) { - auto match = versionX->match(condition); - if (match.hasMatch()) - yyVersion = match.captured(1); - } - } - - int tok; - do { - /* - We set yyLex now, and after getToken() this will be - yyPrevLex. This way, we skip over the preprocessor - directive. - */ - qstrcpy(yyLex, yyPrevLex); - - /* - If getToken() meets another #, it will call - getTokenAfterPreprocessor() once again, which could in turn - call getToken() again, etc. Unless there are 10,000 or so - preprocessor directives in a row, this shouldn't overflow - the stack. - */ - tok = getToken(); - } while (yyNumPreprocessorSkipping > 0 && tok != Tok_Eoi); - return tok; -} - -/* - Pushes a new skipping value onto the stack. This corresponds to entering a - new #if block. -*/ -void Tokenizer::pushSkipping(bool skip) -{ - yyPreprocessorSkipping.push(skip); - if (skip) - yyNumPreprocessorSkipping++; -} - -/* - Pops a skipping value from the stack. This corresponds to reaching a #endif. -*/ -bool Tokenizer::popSkipping() -{ - if (yyPreprocessorSkipping.isEmpty()) { - yyTokLoc.warning(QStringLiteral("Unexpected #elif, #else or #endif")); - return true; - } - - bool skip = yyPreprocessorSkipping.pop(); - if (skip) - yyNumPreprocessorSkipping--; - return skip; -} - -/* - Returns \c true if the condition evaluates as true, otherwise false. The - condition is represented by a string. Unsophisticated parsing techniques are - used. The preprocessing method could be named StriNg-Oriented PreProcessing, - as SNOBOL stands for StriNg-Oriented symBOlic Language. -*/ -bool Tokenizer::isTrue(const QString &condition) -{ - int firstOr = -1; - int firstAnd = -1; - int parenDepth = 0; - - /* - Find the first logical operator at top level, but be careful - about precedence. Examples: - - X || Y // the or - X || Y || Z // the leftmost or - X || Y && Z // the or - X && Y || Z // the or - (X || Y) && Z // the and - */ - for (int i = 0; i < condition.length() - 1; i++) { - QChar ch = condition[i]; - if (ch == QChar('(')) { - parenDepth++; - } else if (ch == QChar(')')) { - parenDepth--; - } else if (parenDepth == 0) { - if (condition[i + 1] == ch) { - if (ch == QChar('|')) { - firstOr = i; - break; - } else if (ch == QChar('&')) { - if (firstAnd == -1) - firstAnd = i; - } - } - } - } - if (firstOr != -1) - return isTrue(condition.left(firstOr)) || isTrue(condition.mid(firstOr + 2)); - if (firstAnd != -1) - return isTrue(condition.left(firstAnd)) && isTrue(condition.mid(firstAnd + 2)); - - QString t = condition.simplified(); - if (t.isEmpty()) - return true; - - if (t[0] == QChar('!')) - return !isTrue(t.mid(1)); - if (t[0] == QChar('(') && t.endsWith(QChar(')'))) - return isTrue(t.mid(1, t.length() - 2)); - - auto match = definedX->match(t); - if (match.hasMatch()) - return defines->match(match.captured(1)).hasMatch(); - else - return !falsehoods->match(t).hasMatch(); -} - -QString Tokenizer::lexeme() const -{ - return sourceDecoder(yyLex); -} - -QString Tokenizer::previousLexeme() const -{ - return sourceDecoder(yyPrevLex); -} - -QT_END_NAMESPACE |