summaryrefslogtreecommitdiffstats
path: root/src/qdoc/qdoc/src/qdoc/tokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/qdoc/qdoc/src/qdoc/tokenizer.h')
-rw-r--r--src/qdoc/qdoc/src/qdoc/tokenizer.h179
1 files changed, 179 insertions, 0 deletions
diff --git a/src/qdoc/qdoc/src/qdoc/tokenizer.h b/src/qdoc/qdoc/src/qdoc/tokenizer.h
new file mode 100644
index 000000000..d5669dfb7
--- /dev/null
+++ b/src/qdoc/qdoc/src/qdoc/tokenizer.h
@@ -0,0 +1,179 @@
+// Copyright (C) 2021 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+#ifndef TOKENIZER_H
+#define TOKENIZER_H
+
+#include "location.h"
+
+#include <QtCore/qfile.h>
+#include <QtCore/qstack.h>
+#include <QtCore/qstring.h>
+
+QT_BEGIN_NAMESPACE
+
+/*
+ Here come the C++ tokens we support. The first part contains
+ all-purpose tokens; then come keywords.
+
+ If you add a keyword, make sure to modify the keyword array in
+ tokenizer.cpp as well, and possibly adjust Tok_FirstKeyword and
+ Tok_LastKeyword.
+*/
+enum {
+ Tok_Eoi,
+ Tok_Ampersand,
+ Tok_Aster,
+ Tok_Caret,
+ Tok_LeftParen,
+ Tok_RightParen,
+ Tok_LeftParenAster,
+ Tok_Equal,
+ Tok_LeftBrace,
+ Tok_RightBrace,
+ Tok_Semicolon,
+ Tok_Colon,
+ Tok_LeftAngle,
+ Tok_RightAngle,
+ Tok_Comma,
+ Tok_Ellipsis,
+ Tok_Gulbrandsen,
+ Tok_LeftBracket,
+ Tok_RightBracket,
+ Tok_Tilde,
+ Tok_SomeOperator,
+ Tok_Number,
+ Tok_String,
+ Tok_Doc,
+ Tok_Comment,
+ Tok_Ident,
+ Tok_At,
+ Tok_char,
+ Tok_class,
+ Tok_const,
+ Tok_double,
+ Tok_int,
+ Tok_long,
+ Tok_operator,
+ Tok_short,
+ Tok_signed,
+ Tok_typename,
+ Tok_unsigned,
+ Tok_void,
+ Tok_volatile,
+ Tok_int64,
+ Tok_QPrivateSignal,
+ Tok_FirstKeyword = Tok_char,
+ Tok_LastKeyword = Tok_QPrivateSignal
+};
+
+/*
+ The Tokenizer class implements lexical analysis of C++ source
+ files.
+
+ Not every operator or keyword of C++ is recognized; only those
+ that are interesting to us. Some Qt keywords or macros are also
+ recognized.
+*/
+
+class Tokenizer
+{
+public:
+ Tokenizer(const Location &loc, QByteArray in);
+ Tokenizer(const Location &loc, QFile &file);
+
+ ~Tokenizer();
+
+ int getToken();
+ void setParsingFnOrMacro(bool macro) { m_parsingMacro = macro; }
+
+ [[nodiscard]] const Location &location() const { return m_tokLoc; }
+ [[nodiscard]] QString previousLexeme() const;
+ [[nodiscard]] QString lexeme() const;
+ [[nodiscard]] QString version() const { return m_version; }
+ [[nodiscard]] int parenDepth() const { return m_parenDepth; }
+ [[nodiscard]] int bracketDepth() const { return m_bracketDepth; }
+
+ static void initialize();
+ static void terminate();
+ static bool isTrue(const QString &condition);
+
+private:
+ void init();
+ void start(const Location &loc);
+ /*
+ Represents the maximum amount of characters that a token can be composed
+ of.
+
+ When a token with more characters than the maximum amount is encountered, a
+ warning is issued and parsing continues, discarding all characters from the
+ currently parsed token that don't fit into the buffer.
+ */
+ enum { yyLexBufSize = 1048576 };
+
+ int getch() { return m_pos == m_in.size() ? EOF : m_in[m_pos++]; }
+
+ inline int getChar()
+ {
+ using namespace Qt::StringLiterals;
+
+ if (m_ch == EOF)
+ return EOF;
+ if (m_lexLen < yyLexBufSize - 1) {
+ m_lex[m_lexLen++] = (char)m_ch;
+ m_lex[m_lexLen] = '\0';
+ } else if (!token_too_long_warning_was_issued) {
+ location().warning(
+ u"The content is too long.\n"_s,
+ u"The maximum amount of characters for this content is %1.\n"_s.arg(yyLexBufSize) +
+ "Consider splitting it or reducing its size."
+ );
+
+ token_too_long_warning_was_issued = true;
+ }
+ m_curLoc.advance(QChar(m_ch));
+ int ch = getch();
+ if (ch == EOF)
+ return EOF;
+ // cast explicitly to make sure the value of ch
+ // is in range [0..255] to avoid assert messages
+ // when using debug CRT that checks its input.
+ return int(uint(uchar(ch)));
+ }
+
+ int getTokenAfterPreprocessor();
+ void pushSkipping(bool skip);
+ bool popSkipping();
+
+ Location m_tokLoc;
+ Location m_curLoc;
+ char *m_lexBuf1 { nullptr };
+ char *m_lexBuf2 { nullptr };
+ char *m_prevLex { nullptr };
+ char *m_lex { nullptr };
+ size_t m_lexLen {};
+ QStack<bool> m_preprocessorSkipping;
+ int m_numPreprocessorSkipping {};
+ int m_braceDepth {};
+ int m_parenDepth {};
+ int m_bracketDepth {};
+ int m_ch {};
+
+ QString m_version {};
+ bool m_parsingMacro {};
+
+ // Used to ensure that the warning that is issued when a token is
+ // too long to fit into our fixed sized buffer is not repeated for each
+ // character of that token after the last saved one.
+ // The flag is reset whenever a new token is requested, so as to allow
+ // reporting all such tokens that are too long during a single execution.
+ bool token_too_long_warning_was_issued{false};
+
+protected:
+ QByteArray m_in {};
+ int m_pos {};
+};
+
+QT_END_NAMESPACE
+
+#endif