aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrancois Ferrand <thetypz@gmail.com>2014-10-24 14:55:43 +0200
committerFrancois Ferrand <thetypz@gmail.com>2016-03-08 17:24:22 +0000
commit41b232962a9222ddc594ef7d564041af0c476cb9 (patch)
treee38343360733bd601d388c08a9fb20ff43744023
parent90571432280a36f5e9ef5da468f832cf1d973f66 (diff)
C++: fix trigraph parsing in macros.
Trigraphs must only be parsed before/during preprocessing. The preprocessor will now replace trigraphs with their standard form, and re-lexing in TranslationUnit will not try to parse any trigraph. Also added a few missing trigraphs: ??=, ??', ??! and ??-. Task-number: QTCREATORBUG-13253 Change-Id: I1723ed53b00090b878c22b83b7e963b647b65f72 Reviewed-by: Nikolai Kosjar <nikolai.kosjar@theqtcompany.com>
-rw-r--r--src/libs/3rdparty/cplusplus/Lexer.cpp44
-rw-r--r--src/libs/3rdparty/cplusplus/Lexer.h1
-rw-r--r--src/libs/3rdparty/cplusplus/Token.h4
-rw-r--r--src/libs/cplusplus/pp-engine.cpp21
-rw-r--r--tests/auto/cplusplus/lexer/tst_lexer.cpp46
-rw-r--r--tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp24
6 files changed, 131 insertions, 9 deletions
diff --git a/src/libs/3rdparty/cplusplus/Lexer.cpp b/src/libs/3rdparty/cplusplus/Lexer.cpp
index db7cb9e0f83..68f87f126c1 100644
--- a/src/libs/3rdparty/cplusplus/Lexer.cpp
+++ b/src/libs/3rdparty/cplusplus/Lexer.cpp
@@ -336,20 +336,62 @@ void Lexer::scan_helper(Token *tok)
break;
case '?':
- if (_yychar == '?') {
+ if (_yychar == '?' && f._ppMode) {
yyinp();
if (_yychar == '(') {
yyinp();
tok->f.kind = T_LBRACKET;
+ tok->f.trigraph = true;
} else if (_yychar == ')') {
yyinp();
tok->f.kind = T_RBRACKET;
+ tok->f.trigraph = true;
} else if (_yychar == '<') {
yyinp();
tok->f.kind = T_LBRACE;
+ tok->f.trigraph = true;
} else if (_yychar == '>') {
yyinp();
tok->f.kind = T_RBRACE;
+ tok->f.trigraph = true;
+ } else if (_yychar == '=') {
+ yyinp();
+ tok->f.trigraph = true;
+ if (_yychar == '?' && *(_currentChar + 1) == '?' && *(_currentChar + 2) == '=') {
+ yyinp();
+ yyinp();
+ yyinp();
+ tok->f.kind = T_POUND_POUND;
+ } else {
+ tok->f.kind = T_POUND;
+ }
+ } else if (_yychar == '\'') {
+ yyinp();
+ if (_yychar == '=') {
+ yyinp();
+ tok->f.kind = T_CARET_EQUAL;
+ } else {
+ tok->f.kind = T_CARET;
+ }
+ tok->f.trigraph = true;
+ } else if (_yychar == '!') {
+ yyinp();
+ if (_yychar == '=') {
+ yyinp();
+ tok->f.kind = T_PIPE_EQUAL;
+ } else {
+ tok->f.kind = T_PIPE;
+ }
+ tok->f.trigraph = true;
+ } else if (_yychar == '-') {
+ yyinp();
+ if (_yychar == '=') {
+ yyinp();
+ tok->f.kind = T_TILDE_EQUAL;
+ } else {
+ tok->f.kind = T_TILDE;
+ }
+ tok->f.trigraph = true;
}
} else {
tok->f.kind = T_QUESTION;
diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h
index d47dcdf12ae..8e862ea54c4 100644
--- a/src/libs/3rdparty/cplusplus/Lexer.h
+++ b/src/libs/3rdparty/cplusplus/Lexer.h
@@ -125,6 +125,7 @@ private:
unsigned _scanKeywords: 1;
unsigned _scanAngleStringLiteralTokens: 1;
unsigned _ppMode: 1;
+ unsigned _ignoreTrigraph : 1;
};
struct State {
diff --git a/src/libs/3rdparty/cplusplus/Token.h b/src/libs/3rdparty/cplusplus/Token.h
index 39a74031c9e..afe732d4931 100644
--- a/src/libs/3rdparty/cplusplus/Token.h
+++ b/src/libs/3rdparty/cplusplus/Token.h
@@ -371,8 +371,10 @@ public:
// The token is C++11 user-defined literal such as:
// 12_km, 0.5_Pa, 'c'_X, "abd"_L, u16"xyz"_M
unsigned userDefinedLiteral : 1;
+ // Indicates the token is a trigraph
+ unsigned trigraph : 1;
// Unused...
- unsigned pad : 2;
+ unsigned pad : 1;
// The token length in bytes and UTF16 chars.
unsigned bytes : 16;
unsigned utf16chars : 16;
diff --git a/src/libs/cplusplus/pp-engine.cpp b/src/libs/cplusplus/pp-engine.cpp
index c505f94e3e0..134fb4d51c8 100644
--- a/src/libs/cplusplus/pp-engine.cpp
+++ b/src/libs/cplusplus/pp-engine.cpp
@@ -50,6 +50,7 @@
#include <cplusplus/Lexer.h>
#include <cplusplus/Token.h>
#include <cplusplus/Literals.h>
+#include <cplusplus/cppassert.h>
#include <utils/scopedswap.h>
@@ -1439,7 +1440,25 @@ void Preprocessor::preprocess(const QString &fileName, const QByteArray &source,
enforceSpacing(tk, macroExpanded);
// Finally output the token.
- currentOutputBuffer().append(tk.tokenStart(), tk.bytes());
+ if (!tk.f.trigraph) {
+ currentOutputBuffer().append(tk.tokenStart(), tk.bytes());
+ } else {
+ switch (tk.kind()) {
+ case T_LBRACKET: currentOutputBuffer().append("["); break;
+ case T_RBRACKET: currentOutputBuffer().append("]"); break;
+ case T_LBRACE: currentOutputBuffer().append("{"); break;
+ case T_RBRACE: currentOutputBuffer().append("}"); break;
+ case T_POUND: currentOutputBuffer().append("#"); break;
+ case T_POUND_POUND: currentOutputBuffer().append("##"); break;
+ case T_CARET: currentOutputBuffer().append("^"); break;
+ case T_CARET_EQUAL: currentOutputBuffer().append("^="); break;
+ case T_PIPE: currentOutputBuffer().append("|"); break;
+ case T_PIPE_EQUAL: currentOutputBuffer().append("|="); break;
+ case T_TILDE: currentOutputBuffer().append("~"); break;
+ case T_TILDE_EQUAL: currentOutputBuffer().append("~="); break;
+ default: CPP_ASSERT(0, qDebug() << tk.spell()); break;
+ }
+ }
} while (tk.isNot(T_EOF_SYMBOL));
diff --git a/tests/auto/cplusplus/lexer/tst_lexer.cpp b/tests/auto/cplusplus/lexer/tst_lexer.cpp
index 673a59008a0..6f32622cfcc 100644
--- a/tests/auto/cplusplus/lexer/tst_lexer.cpp
+++ b/tests/auto/cplusplus/lexer/tst_lexer.cpp
@@ -70,6 +70,8 @@ private slots:
void literals_data();
void preprocessor();
void preprocessor_data();
+ void trigraph();
+ void trigraph_data();
void bytes_and_utf16chars();
void bytes_and_utf16chars_data();
@@ -263,12 +265,6 @@ void tst_SimpleLexer::basic_data()
<< T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT
<< T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT << T_CPP_DOXY_COMMENT;
QTest::newRow(source) << source << expectedTokenKindList;
-
- source = "?" "?(?" "?)?" "?<?" "?>a?b:c";
- expectedTokenKindList = TokenKindList()
- << T_LBRACKET << T_RBRACKET << T_LBRACE << T_RBRACE
- << T_IDENTIFIER << T_QUESTION << T_IDENTIFIER << T_COLON << T_IDENTIFIER;
- QTest::newRow(source) << source << expectedTokenKindList;
}
void tst_SimpleLexer::literals()
@@ -744,5 +740,43 @@ void tst_SimpleLexer::incremental_data()
<< (TokenKindList() << T_IDENTIFIER);
}
+void tst_SimpleLexer::trigraph()
+{
+ QFETCH(QByteArray, source);
+ QFETCH(TokenKindList, expectedTokenKindList);
+
+ run(source, toTokens(expectedTokenKindList), false, CompareKind, true);
+}
+
+void tst_SimpleLexer::trigraph_data()
+{
+ QTest::addColumn<QByteArray>("source");
+ QTest::addColumn<TokenKindList>("expectedTokenKindList");
+
+ QTest::newRow("pound_trigraph") << _("?" "?=") << (TokenKindList() << T_POUND);
+
+ QTest::newRow("caret_trigraph") << _("?" "?'") << (TokenKindList() << T_CARET);
+
+ QTest::newRow("left_bracket_trigraph") << _("?" "?(") << (TokenKindList() << T_LBRACKET);
+
+ QTest::newRow("right_bracket_trigraph") << _("?" "?)") << (TokenKindList() << T_RBRACKET);
+
+ QTest::newRow("pipe_trigraph") << _("?" "?!") << (TokenKindList() << T_PIPE);
+
+ QTest::newRow("left_brace_trigraph") << _("?" "?<") << (TokenKindList() << T_LBRACE);
+
+ QTest::newRow("right_brace_trigraph") << _("?" "?>") << (TokenKindList() << T_RBRACE);
+
+ QTest::newRow("tilde_trigraph") << _("?" "?-") << (TokenKindList() << T_TILDE);
+
+ QTest::newRow("pound_pound_trigraph") << _("?" "?=" "?" "?=") << (TokenKindList() << T_POUND_POUND);
+
+ QTest::newRow("caret_equal_trigraph") << _("?" "?'=") << (TokenKindList() << T_CARET_EQUAL);
+
+ QTest::newRow("pipe_equal_trigraph") << _("?" "?!=") << (TokenKindList() << T_PIPE_EQUAL);
+
+ QTest::newRow("tilde_equal_trigraph") << _("?" "?-=") << (TokenKindList() << T_TILDE_EQUAL);
+}
+
QTEST_APPLESS_MAIN(tst_SimpleLexer)
#include "tst_lexer.moc"
diff --git a/tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp b/tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp
index 3eb71da58bd..2b4b6cd97a0 100644
--- a/tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp
+++ b/tests/auto/cplusplus/preprocessor/tst_preprocessor.cpp
@@ -405,6 +405,7 @@ private slots:
void concat();
void excessive_nesting();
void multi_byte_code_point_in_expansion();
+ void trigraph();
};
// Remove all #... lines, and 'simplify' string, to allow easily comparing the result
@@ -2092,6 +2093,29 @@ void tst_Preprocessor::compare_input_output(bool keepComments)
QVERIFY(compare(prep, output));
}
+void tst_Preprocessor::trigraph()
+{
+ Environment env;
+ Preprocessor preprocess(0, &env);
+
+ // We cannot use actual trigraphs in strings, they would be replaced by the preprocessor when
+ // compiling the test, so we use strings with 'j' character instead of '?', and perform a
+ // replacement at runtime.
+
+ // Trigraphs in source code are replaced
+ QByteArray prep = preprocess.run(QLatin1String("<stdin>"),
+ QByteArray("jj( jj) jj< jj> jj= jj=jj= jj' jj'= jj! jj!= jj- jj-=").replace('j', '?'),
+ true, false);
+ QCOMPARE(prep.constData(), "[ ] { } # ## ^ ^= | |= ~ ~=");
+
+ // Trigraphs that appear after macro expansion are not replaced
+ prep = preprocess.run(QLatin1String("<stdin>"),
+ "#define TRIGRAPH(x...) ? ## x ## ? ## x ## =\n"
+ "TRIGRAPH()",
+ true, false);
+ QCOMPARE(prep.constData(), QByteArray("\njj=").replace('j', '?').data());
+}
+
QTEST_APPLESS_MAIN(tst_Preprocessor)
#include "tst_preprocessor.moc"