diff options
Diffstat (limited to 'src/qdoc/qdoc/src/qdoc/tokenizer.h')
-rw-r--r-- | src/qdoc/qdoc/src/qdoc/tokenizer.h | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/src/qdoc/qdoc/src/qdoc/tokenizer.h b/src/qdoc/qdoc/src/qdoc/tokenizer.h new file mode 100644 index 000000000..d5669dfb7 --- /dev/null +++ b/src/qdoc/qdoc/src/qdoc/tokenizer.h @@ -0,0 +1,179 @@ +// Copyright (C) 2021 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 + +#ifndef TOKENIZER_H +#define TOKENIZER_H + +#include "location.h" + +#include <QtCore/qfile.h> +#include <QtCore/qstack.h> +#include <QtCore/qstring.h> + +QT_BEGIN_NAMESPACE + +/* + Here come the C++ tokens we support. The first part contains + all-purpose tokens; then come keywords. + + If you add a keyword, make sure to modify the keyword array in + tokenizer.cpp as well, and possibly adjust Tok_FirstKeyword and + Tok_LastKeyword. +*/ +enum { + Tok_Eoi, + Tok_Ampersand, + Tok_Aster, + Tok_Caret, + Tok_LeftParen, + Tok_RightParen, + Tok_LeftParenAster, + Tok_Equal, + Tok_LeftBrace, + Tok_RightBrace, + Tok_Semicolon, + Tok_Colon, + Tok_LeftAngle, + Tok_RightAngle, + Tok_Comma, + Tok_Ellipsis, + Tok_Gulbrandsen, + Tok_LeftBracket, + Tok_RightBracket, + Tok_Tilde, + Tok_SomeOperator, + Tok_Number, + Tok_String, + Tok_Doc, + Tok_Comment, + Tok_Ident, + Tok_At, + Tok_char, + Tok_class, + Tok_const, + Tok_double, + Tok_int, + Tok_long, + Tok_operator, + Tok_short, + Tok_signed, + Tok_typename, + Tok_unsigned, + Tok_void, + Tok_volatile, + Tok_int64, + Tok_QPrivateSignal, + Tok_FirstKeyword = Tok_char, + Tok_LastKeyword = Tok_QPrivateSignal +}; + +/* + The Tokenizer class implements lexical analysis of C++ source + files. + + Not every operator or keyword of C++ is recognized; only those + that are interesting to us. Some Qt keywords or macros are also + recognized. +*/ + +class Tokenizer +{ +public: + Tokenizer(const Location &loc, QByteArray in); + Tokenizer(const Location &loc, QFile &file); + + ~Tokenizer(); + + int getToken(); + void setParsingFnOrMacro(bool macro) { m_parsingMacro = macro; } + + [[nodiscard]] const Location &location() const { return m_tokLoc; } + [[nodiscard]] QString previousLexeme() const; + [[nodiscard]] QString lexeme() const; + [[nodiscard]] QString version() const { return m_version; } + [[nodiscard]] int parenDepth() const { return m_parenDepth; } + [[nodiscard]] int bracketDepth() const { return m_bracketDepth; } + + static void initialize(); + static void terminate(); + static bool isTrue(const QString &condition); + +private: + void init(); + void start(const Location &loc); + /* + Represents the maximum amount of characters that a token can be composed + of. + + When a token with more characters than the maximum amount is encountered, a + warning is issued and parsing continues, discarding all characters from the + currently parsed token that don't fit into the buffer. + */ + enum { yyLexBufSize = 1048576 }; + + int getch() { return m_pos == m_in.size() ? EOF : m_in[m_pos++]; } + + inline int getChar() + { + using namespace Qt::StringLiterals; + + if (m_ch == EOF) + return EOF; + if (m_lexLen < yyLexBufSize - 1) { + m_lex[m_lexLen++] = (char)m_ch; + m_lex[m_lexLen] = '\0'; + } else if (!token_too_long_warning_was_issued) { + location().warning( + u"The content is too long.\n"_s, + u"The maximum amount of characters for this content is %1.\n"_s.arg(yyLexBufSize) + + "Consider splitting it or reducing its size." + ); + + token_too_long_warning_was_issued = true; + } + m_curLoc.advance(QChar(m_ch)); + int ch = getch(); + if (ch == EOF) + return EOF; + // cast explicitly to make sure the value of ch + // is in range [0..255] to avoid assert messages + // when using debug CRT that checks its input. + return int(uint(uchar(ch))); + } + + int getTokenAfterPreprocessor(); + void pushSkipping(bool skip); + bool popSkipping(); + + Location m_tokLoc; + Location m_curLoc; + char *m_lexBuf1 { nullptr }; + char *m_lexBuf2 { nullptr }; + char *m_prevLex { nullptr }; + char *m_lex { nullptr }; + size_t m_lexLen {}; + QStack<bool> m_preprocessorSkipping; + int m_numPreprocessorSkipping {}; + int m_braceDepth {}; + int m_parenDepth {}; + int m_bracketDepth {}; + int m_ch {}; + + QString m_version {}; + bool m_parsingMacro {}; + + // Used to ensure that the warning that is issued when a token is + // too long to fit into our fixed sized buffer is not repeated for each + // character of that token after the last saved one. + // The flag is reset whenever a new token is requested, so as to allow + // reporting all such tokens that are too long during a single execution. + bool token_too_long_warning_was_issued{false}; + +protected: + QByteArray m_in {}; + int m_pos {}; +}; + +QT_END_NAMESPACE + +#endif |