diff options
author | Francois Ferrand <thetypz@gmail.com> | 2014-10-24 14:55:43 +0200 |
---|---|---|
committer | Francois Ferrand <thetypz@gmail.com> | 2016-03-08 17:24:22 +0000 |
commit | 41b232962a9222ddc594ef7d564041af0c476cb9 (patch) | |
tree | e38343360733bd601d388c08a9fb20ff43744023 | |
parent | 90571432280a36f5e9ef5da468f832cf1d973f66 (diff) |
C++: fix trigraph parsing in macros.
Trigraphs must only be parsed before/during preprocessing. The preprocessor
will now replace trigraphs with their standard form, and re-lexing in
TranslationUnit will not try to parse any trigraph.
Also added a few missing trigraphs: ??=, ??', ??! and ??-.
Task-number: QTCREATORBUG-13253
Change-Id: I1723ed53b00090b878c22b83b7e963b647b65f72
Reviewed-by: Nikolai Kosjar <nikolai.kosjar@theqtcompany.com>
-rw-r--r-- | src/libs/3rdparty/cplusplus/Lexer.cpp | 44 | ||||
-rw-r--r-- | src/libs/3rdparty/cplusplus/Lexer.h | 1 | ||||
-rw-r--r-- | src/libs/3rdparty/cplusplus/Token.h | 4 | ||||
-rw-r--r-- | src/libs/cplusplus/pp-engine.cpp | 21 | ||||
-rw-r--r-- | tests/auto/cplusplus/lexer/tst_lexer.cpp | 46 | ||||
-rw-r--r-- | tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp | 24 |
6 files changed, 131 insertions, 9 deletions
diff --git a/src/libs/3rdparty/cplusplus/Lexer.cpp b/src/libs/3rdparty/cplusplus/Lexer.cpp index db7cb9e0f83..68f87f126c1 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.cpp +++ b/src/libs/3rdparty/cplusplus/Lexer.cpp @@ -336,20 +336,62 @@ void Lexer::scan_helper(Token *tok) break; case '?': - if (_yychar == '?') { + if (_yychar == '?' && f._ppMode) { yyinp(); if (_yychar == '(') { yyinp(); tok->f.kind = T_LBRACKET; + tok->f.trigraph = true; } else if (_yychar == ')') { yyinp(); tok->f.kind = T_RBRACKET; + tok->f.trigraph = true; } else if (_yychar == '<') { yyinp(); tok->f.kind = T_LBRACE; + tok->f.trigraph = true; } else if (_yychar == '>') { yyinp(); tok->f.kind = T_RBRACE; + tok->f.trigraph = true; + } else if (_yychar == '=') { + yyinp(); + tok->f.trigraph = true; + if (_yychar == '?' && *(_currentChar + 1) == '?' && *(_currentChar + 2) == '=') { + yyinp(); + yyinp(); + yyinp(); + tok->f.kind = T_POUND_POUND; + } else { + tok->f.kind = T_POUND; + } + } else if (_yychar == '\'') { + yyinp(); + if (_yychar == '=') { + yyinp(); + tok->f.kind = T_CARET_EQUAL; + } else { + tok->f.kind = T_CARET; + } + tok->f.trigraph = true; + } else if (_yychar == '!') { + yyinp(); + if (_yychar == '=') { + yyinp(); + tok->f.kind = T_PIPE_EQUAL; + } else { + tok->f.kind = T_PIPE; + } + tok->f.trigraph = true; + } else if (_yychar == '-') { + yyinp(); + if (_yychar == '=') { + yyinp(); + tok->f.kind = T_TILDE_EQUAL; + } else { + tok->f.kind = T_TILDE; + } + tok->f.trigraph = true; } } else { tok->f.kind = T_QUESTION; diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h index d47dcdf12ae..8e862ea54c4 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.h +++ b/src/libs/3rdparty/cplusplus/Lexer.h @@ -125,6 +125,7 @@ private: unsigned _scanKeywords: 1; unsigned _scanAngleStringLiteralTokens: 1; unsigned _ppMode: 1; + unsigned _ignoreTrigraph : 1; }; struct State { diff --git a/src/libs/3rdparty/cplusplus/Token.h b/src/libs/3rdparty/cplusplus/Token.h index 39a74031c9e..afe732d4931 100644 --- a/src/libs/3rdparty/cplusplus/Token.h +++ b/src/libs/3rdparty/cplusplus/Token.h @@ -371,8 +371,10 @@ public: // The token is C++11 user-defined literal such as: // 12_km, 0.5_Pa, 'c'_X, "abd"_L, u16"xyz"_M unsigned userDefinedLiteral : 1; + // Indicates the token is a trigraph + unsigned trigraph : 1; // Unused... - unsigned pad : 2; + unsigned pad : 1; // The token length in bytes and UTF16 chars. unsigned bytes : 16; unsigned utf16chars : 16; diff --git a/src/libs/cplusplus/pp-engine.cpp b/src/libs/cplusplus/pp-engine.cpp index c505f94e3e0..134fb4d51c8 100644 --- a/src/libs/cplusplus/pp-engine.cpp +++ b/src/libs/cplusplus/pp-engine.cpp @@ -50,6 +50,7 @@ #include <cplusplus/Lexer.h> #include <cplusplus/Token.h> #include <cplusplus/Literals.h> +#include <cplusplus/cppassert.h> #include <utils/scopedswap.h> @@ -1439,7 +1440,25 @@ void Preprocessor::preprocess(const QString &fileName, const QByteArray &source, enforceSpacing(tk, macroExpanded); // Finally output the token. - currentOutputBuffer().append(tk.tokenStart(), tk.bytes()); + if (!tk.f.trigraph) { + currentOutputBuffer().append(tk.tokenStart(), tk.bytes()); + } else { + switch (tk.kind()) { + case T_LBRACKET: currentOutputBuffer().append("["); break; + case T_RBRACKET: currentOutputBuffer().append("]"); break; + case T_LBRACE: currentOutputBuffer().append("{"); break; + case T_RBRACE: currentOutputBuffer().append("}"); break; + case T_POUND: currentOutputBuffer().append("#"); break; + case T_POUND_POUND: currentOutputBuffer().append("##"); break; + case T_CARET: currentOutputBuffer().append("^"); break; + case T_CARET_EQUAL: currentOutputBuffer().append("^="); break; + case T_PIPE: currentOutputBuffer().append("|"); break; + case T_PIPE_EQUAL: currentOutputBuffer().append("|="); break; + case T_TILDE: currentOutputBuffer().append("~"); break; + case T_TILDE_EQUAL: currentOutputBuffer().append("~="); break; + default: CPP_ASSERT(0, qDebug() << tk.spell()); break; + } + } } while (tk.isNot(T_EOF_SYMBOL)); diff --git a/tests/auto/cplusplus/lexer/tst_lexer.cpp b/tests/auto/cplusplus/lexer/tst_lexer.cpp index 673a59008a0..6f32622cfcc 100644 --- a/tests/auto/cplusplus/lexer/tst_lexer.cpp +++ b/tests/auto/cplusplus/lexer/tst_lexer.cpp @@ -70,6 +70,8 @@ private slots: void literals_data(); void preprocessor(); void preprocessor_data(); + void trigraph(); + void trigraph_data(); void bytes_and_utf16chars(); void bytes_and_utf16chars_data(); @@ -263,12 +265,6 @@ void tst_SimpleLexer::basic_data() << T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT << T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT << T_CPP_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; - - source = "?" "?(?" "?)?" "?<?" "?>a?b:c"; - expectedTokenKindList = TokenKindList() - << T_LBRACKET << T_RBRACKET << T_LBRACE << T_RBRACE - << T_IDENTIFIER << T_QUESTION << T_IDENTIFIER << T_COLON << T_IDENTIFIER; - QTest::newRow(source) << source << expectedTokenKindList; } void tst_SimpleLexer::literals() @@ -744,5 +740,43 @@ void tst_SimpleLexer::incremental_data() << (TokenKindList() << T_IDENTIFIER); } +void tst_SimpleLexer::trigraph() +{ + QFETCH(QByteArray, source); + QFETCH(TokenKindList, expectedTokenKindList); + + run(source, toTokens(expectedTokenKindList), false, CompareKind, true); +} + +void tst_SimpleLexer::trigraph_data() +{ + QTest::addColumn<QByteArray>("source"); + QTest::addColumn<TokenKindList>("expectedTokenKindList"); + + QTest::newRow("pound_trigraph") << _("?" "?=") << (TokenKindList() << T_POUND); + + QTest::newRow("caret_trigraph") << _("?" "?'") << (TokenKindList() << T_CARET); + + QTest::newRow("left_bracket_trigraph") << _("?" "?(") << (TokenKindList() << T_LBRACKET); + + QTest::newRow("right_bracket_trigraph") << _("?" "?)") << (TokenKindList() << T_RBRACKET); + + QTest::newRow("pipe_trigraph") << _("?" "?!") << (TokenKindList() << T_PIPE); + + QTest::newRow("left_brace_trigraph") << _("?" "?<") << (TokenKindList() << T_LBRACE); + + QTest::newRow("right_brace_trigraph") << _("?" "?>") << (TokenKindList() << T_RBRACE); + + QTest::newRow("tilde_trigraph") << _("?" "?-") << (TokenKindList() << T_TILDE); + + QTest::newRow("pound_pound_trigraph") << _("?" "?=" "?" "?=") << (TokenKindList() << T_POUND_POUND); + + QTest::newRow("caret_equal_trigraph") << _("?" "?'=") << (TokenKindList() << T_CARET_EQUAL); + + QTest::newRow("pipe_equal_trigraph") << _("?" "?!=") << (TokenKindList() << T_PIPE_EQUAL); + + QTest::newRow("tilde_equal_trigraph") << _("?" "?-=") << (TokenKindList() << T_TILDE_EQUAL); +} + QTEST_APPLESS_MAIN(tst_SimpleLexer) #include "tst_lexer.moc" diff --git a/tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp b/tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp index 3eb71da58bd..2b4b6cd97a0 100644 --- a/tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp +++ b/tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp @@ -405,6 +405,7 @@ private slots: void concat(); void excessive_nesting(); void multi_byte_code_point_in_expansion(); + void trigraph(); }; // Remove all #... lines, and 'simplify' string, to allow easily comparing the result @@ -2092,6 +2093,29 @@ void tst_Preprocessor::compare_input_output(bool keepComments) QVERIFY(compare(prep, output)); } +void tst_Preprocessor::trigraph() +{ + Environment env; + Preprocessor preprocess(0, &env); + + // We cannot use actual trigraphs in strings, they would be replaced by the preprocessor when + // compiling the test, so we use strings with 'j' character instead of '?', and perform a + // replacement at runtime. + + // Trigraphs in source code are replaced + QByteArray prep = preprocess.run(QLatin1String("<stdin>"), + QByteArray("jj( jj) jj< jj> jj= jj=jj= jj' jj'= jj! jj!= jj- jj-=").replace('j', '?'), + true, false); + QCOMPARE(prep.constData(), "[ ] { } # ## ^ ^= | |= ~ ~="); + + // Trigraphs that appear after macro expansion are not replaced + prep = preprocess.run(QLatin1String("<stdin>"), + "#define TRIGRAPH(x...) ? ## x ## ? ## x ## =\n" + "TRIGRAPH()", + true, false); + QCOMPARE(prep.constData(), QByteArray("\njj=").replace('j', '?').data()); +} + QTEST_APPLESS_MAIN(tst_Preprocessor) #include "tst_preprocessor.moc" |