summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/clang/Basic/DiagnosticLexKinds.td3
-rw-r--r--include/clang/Basic/TokenKinds.def3
-rw-r--r--include/clang/Basic/TokenKinds.h6
-rw-r--r--lib/Lex/Lexer.cpp20
-rw-r--r--lib/Lex/LiteralSupport.cpp8
-rw-r--r--lib/Lex/MacroArgs.cpp1
-rw-r--r--lib/Lex/PPExpressions.cpp1
-rw-r--r--lib/Lex/TokenConcatenation.cpp8
-rw-r--r--lib/Parse/ParseExpr.cpp1
-rw-r--r--lib/Parse/ParseTentative.cpp1
-rw-r--r--test/Lexer/utf8-char-literal.cpp9
-rw-r--r--www/cxx_status.html9
12 files changed, 56 insertions, 14 deletions
diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td
index 9bc27abc4a..2fcfa02c39 100644
--- a/include/clang/Basic/DiagnosticLexKinds.td
+++ b/include/clang/Basic/DiagnosticLexKinds.td
@@ -201,6 +201,9 @@ def warn_c99_compat_unicode_literal : Warning<
def warn_cxx98_compat_unicode_literal : Warning<
"unicode literals are incompatible with C++98">,
InGroup<CXX98Compat>, DefaultIgnore;
+def warn_cxx14_compat_u8_character_literal : Warning<
+ "unicode literals are incompatible with C++ standards before C++1z">,
+ InGroup<CXXPre1zCompat>, DefaultIgnore;
def warn_cxx11_compat_user_defined_literal : Warning<
"identifier after literal will be treated as a user-defined literal suffix "
"in C++11">, InGroup<CXX11Compat>, DefaultIgnore;
diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def
index d2b06df549..c96b8eb4d6 100644
--- a/include/clang/Basic/TokenKinds.def
+++ b/include/clang/Basic/TokenKinds.def
@@ -133,6 +133,9 @@ TOK(numeric_constant) // 0x123
TOK(char_constant) // 'a'
TOK(wide_char_constant) // L'b'
+// C++1z Character Constants
+TOK(utf8_char_constant) // u8'a'
+
// C++11 Character Constants
TOK(utf16_char_constant) // u'a'
TOK(utf32_char_constant) // U'a'
diff --git a/include/clang/Basic/TokenKinds.h b/include/clang/Basic/TokenKinds.h
index e2cffb4d75..f4ecb3eb30 100644
--- a/include/clang/Basic/TokenKinds.h
+++ b/include/clang/Basic/TokenKinds.h
@@ -86,9 +86,9 @@ inline bool isStringLiteral(TokenKind K) {
/// constant, string, etc.
inline bool isLiteral(TokenKind K) {
return K == tok::numeric_constant || K == tok::char_constant ||
- K == tok::wide_char_constant || K == tok::utf16_char_constant ||
- K == tok::utf32_char_constant || isStringLiteral(K) ||
- K == tok::angle_string_literal;
+ K == tok::wide_char_constant || K == tok::utf8_char_constant ||
+ K == tok::utf16_char_constant || K == tok::utf32_char_constant ||
+ isStringLiteral(K) || K == tok::angle_string_literal;
}
/// \brief Return true if this is any of tok::annot_* kinds.
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 0aaad9bafb..c2e9716123 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1889,17 +1889,20 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
/// LexCharConstant - Lex the remainder of a character constant, after having
-/// lexed either ' or L' or u' or U'.
+/// lexed either ' or L' or u8' or u' or U'.
bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
tok::TokenKind Kind) {
// Does this character contain the \0 character?
const char *NulCharacter = nullptr;
- if (!isLexingRawMode() &&
- (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant))
- Diag(BufferPtr, getLangOpts().CPlusPlus
- ? diag::warn_cxx98_compat_unicode_literal
- : diag::warn_c99_compat_unicode_literal);
+ if (!isLexingRawMode()) {
+ if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
+ Diag(BufferPtr, getLangOpts().CPlusPlus
+ ? diag::warn_cxx98_compat_unicode_literal
+ : diag::warn_c99_compat_unicode_literal);
+ else if (Kind == tok::utf8_char_constant)
+ Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
+ }
char C = getAndAdvanceChar(CurPtr, Result);
if (C == '\'') {
@@ -3068,6 +3071,11 @@ LexNextToken:
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
tok::utf8_string_literal);
+ if (Char2 == '\'' && LangOpts.CPlusPlus1z)
+ return LexCharConstant(
+ Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf8_char_constant);
if (Char2 == 'R' && LangOpts.CPlusPlus11) {
unsigned SizeTmp3;
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 096805c3cf..03331fb33e 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -28,6 +28,7 @@ static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
default: llvm_unreachable("Unknown token type!");
case tok::char_constant:
case tok::string_literal:
+ case tok::utf8_char_constant:
case tok::utf8_string_literal:
return Target.getCharWidth();
case tok::wide_char_constant:
@@ -1031,9 +1032,10 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
const char *TokBegin = begin;
// Skip over wide character determinant.
- if (Kind != tok::char_constant) {
+ if (Kind != tok::char_constant)
+ ++begin;
+ if (Kind == tok::utf8_char_constant)
++begin;
- }
// Skip over the entry quote.
assert(begin[0] == '\'' && "Invalid token lexed");
@@ -1077,6 +1079,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
if (tok::wide_char_constant == Kind) {
largest_character_for_kind =
0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
+ } else if (tok::utf8_char_constant == Kind) {
+ largest_character_for_kind = 0x7F;
} else if (tok::utf16_char_constant == Kind) {
largest_character_for_kind = 0xFFFF;
} else if (tok::utf32_char_constant == Kind) {
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index 0fa32399bc..9967f3f0e4 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -218,6 +218,7 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
Tok.is(tok::char_constant) || // 'x'
Tok.is(tok::wide_char_constant) || // L'x'.
+ Tok.is(tok::utf8_char_constant) || // u8'x'.
Tok.is(tok::utf16_char_constant) || // u'x'.
Tok.is(tok::utf32_char_constant)) { // U'x'.
bool Invalid = false;
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index a3f5d938ce..9cf72cf8f8 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -273,6 +273,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
}
case tok::char_constant: // 'x'
case tok::wide_char_constant: // L'x'
+ case tok::utf8_char_constant: // u8'x'
case tok::utf16_char_constant: // u'x'
case tok::utf32_char_constant: { // U'x'
// Complain about, and drop, any ud-suffix.
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index 866cbb142c..08327496ab 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -99,6 +99,10 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
TokenInfo[tok::utf32_char_constant ] |= aci_custom;
}
+ // These tokens have custom code in C++1z mode.
+ if (PP.getLangOpts().CPlusPlus1z)
+ TokenInfo[tok::utf8_char_constant] |= aci_custom;
+
// These tokens change behavior if followed by an '='.
TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
@@ -213,6 +217,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
case tok::utf32_string_literal:
case tok::char_constant:
case tok::wide_char_constant:
+ case tok::utf8_char_constant:
case tok::utf16_char_constant:
case tok::utf32_char_constant:
if (!PP.getLangOpts().CPlusPlus11)
@@ -236,7 +241,8 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) ||
Tok.is(tok::utf8_string_literal) || Tok.is(tok::utf16_string_literal) ||
Tok.is(tok::utf32_string_literal) || Tok.is(tok::wide_char_constant) ||
- Tok.is(tok::utf16_char_constant) || Tok.is(tok::utf32_char_constant))
+ Tok.is(tok::utf8_char_constant) || Tok.is(tok::utf16_char_constant) ||
+ Tok.is(tok::utf32_char_constant))
return true;
// If this isn't identifier + string, we're done.
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index 208ead8646..6913de9fa1 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -910,6 +910,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
}
case tok::char_constant: // constant: character-constant
case tok::wide_char_constant:
+ case tok::utf8_char_constant:
case tok::utf16_char_constant:
case tok::utf32_char_constant:
Res = Actions.ActOnCharacterConstant(Tok, /*UDLScope*/getCurScope());
diff --git a/lib/Parse/ParseTentative.cpp b/lib/Parse/ParseTentative.cpp
index 944e88722f..1f39c25590 100644
--- a/lib/Parse/ParseTentative.cpp
+++ b/lib/Parse/ParseTentative.cpp
@@ -892,6 +892,7 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) {
case tok::numeric_constant:
case tok::char_constant:
case tok::wide_char_constant:
+ case tok::utf8_char_constant:
case tok::utf16_char_constant:
case tok::utf32_char_constant:
case tok::string_literal:
diff --git a/test/Lexer/utf8-char-literal.cpp b/test/Lexer/utf8-char-literal.cpp
index 7a4d126097..0ddaabc842 100644
--- a/test/Lexer/utf8-char-literal.cpp
+++ b/test/Lexer/utf8-char-literal.cpp
@@ -1,6 +1,15 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c11 -x c -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++1z -fsyntax-only -verify %s
int array0[u'ñ' == u'\xf1'? 1 : -1];
int array1['\xF1' != u'\xf1'? 1 : -1];
int array1['ñ' != u'\xf1'? 1 : -1]; // expected-error {{character too large for enclosing character literal type}}
+#if __cplusplus > 201402L
+char a = u8'ñ'; // expected-error {{character too large for enclosing character literal type}}
+char b = u8'\x80'; // ok
+char c = u8'\u0080'; // expected-error {{character too large for enclosing character literal type}}
+char d = u8'\u1234'; // expected-error {{character too large for enclosing character literal type}}
+char e = u8'ሴ'; // expected-error {{character too large for enclosing character literal type}}
+char f = u8'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
+#endif
diff --git a/www/cxx_status.html b/www/cxx_status.html
index 20617147f5..345226b92a 100644
--- a/www/cxx_status.html
+++ b/www/cxx_status.html
@@ -549,12 +549,17 @@ as the draft C++1z standard evolves.</p>
<!-- Urbana papers -->
<tr>
<td>Fold expressions</td>
- <td><!--<a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4051.html">-->N4295<!--</a>--></td>
+ <td><!--<a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4295.html">-->N4295<!--</a>--></td>
+ <td class="svn" align="center">SVN</td>
+ </tr>
+ <tr>
+ <td><tt>u8</tt> character literals</td>
+ <td><!--<a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4267.html">-->N4267<!--</a>--></td>
<td class="svn" align="center">SVN</td>
</tr>
<tr>
<td>Nested namespace definition</td>
- <td><!--<a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4051.html">-->N4230<!--</a>--></td>
+ <td><!--<a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4230.html">-->N4230<!--</a>--></td>
<td class="svn" align="center">SVN</td>
</tr>
</table>