C++: Basic support for C++11 user-defined literals

1. Extends lexer so digit or string can be followed by underscore '_' and alphanumeric defining literal. 2. Extends parser so it accepts operator"" _abc(...) user-defined literal definition. 3. Adds Token::Flags.userDefinedLiteral bool flag field representing if token carries user-defined literal. 4. Adds C++11 auto tests case with: 12_km, 0.5_Pa, 'c'_X, "abd"_L, u"xyz"_M 5. All optional suffix scanning methods now return boolean if the suffix was found. 6. Adds C++ Lexer tests for user-defined literals with C++11 feature enabled. This change however does not make QtCreator understand user-defined literal semantics, e.g. properly resolve type when applying custom literal operator. Change-Id: I30e62f025ec9fb11c39261985ea4d772b1a80949 Reviewed-by: Nikolai Kosjar <nikolai.kosjar@theqtcompany.com>
author: Adam Strzelecki <ono@java.pl> 2014-11-02 14:42:23 +0100
committer: Nikolai Kosjar <nikolai.kosjar@theqtcompany.com> 2015-02-17 09:45:34 +0000
commit: 425811291dfd41782cc91f6c1293d41af7b0e4d8 (patch)
tree: c730fb107d9b040248230d688790b45a08741be8
parent: 5699991a2fd4ef35d24720249692612d069d391b (diff)
7 files changed, 115 insertions, 22 deletions
diff --git a/src/libs/3rdparty/cplusplus/Lexer.cpp b/src/libs/3rdparty/cplusplus/Lexer.cpp
index 3fb9a66bd24..981daef51fe 100644
--- a/src/libs/3rdparty/cplusplus/Lexer.cpp
+++ b/src/libs/3rdparty/cplusplus/Lexer.cpp
@@ -314,7 +314,8 @@ void Lexer::scan_helper(Token *tok)
             yyinp();
             scanDigitSequence(); // this is optional: we already skipped over the first digit
             scanExponentPart();
-            scanOptionalFloatingSuffix();
+            if (!scanOptionalFloatingSuffix())
+                scanOptionalUserDefinedLiteral(tok);
             if (std::isalnum(_yychar) || _yychar == '_') {
                 do {
                     yyinp();
@@ -683,6 +684,7 @@ void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
         tok->f.kind = T_STRING_LITERAL;
 
     scanUntilQuote(tok, '"');
+    scanOptionalUserDefinedLiteral(tok);
 }
 
 void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
@@ -758,6 +760,7 @@ void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
         tok->f.kind = T_CHAR_LITERAL;
 
     scanUntilQuote(tok, '\'');
+    scanOptionalUserDefinedLiteral(tok);
 }
 
 void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
@@ -802,13 +805,16 @@ bool Lexer::scanExponentPart()
     return scanDigitSequence();
 }
 
-void Lexer::scanOptionalFloatingSuffix()
+bool Lexer::scanOptionalFloatingSuffix()
 {
-    if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L')
+    if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L') {
         yyinp();
+        return true;
+    }
+    return false;
 }
 
-void Lexer::scanOptionalIntegerSuffix(bool allowU)
+bool Lexer::scanOptionalIntegerSuffix(bool allowU)
 {
     switch(_yychar) {
     case 'u':
@@ -817,19 +823,28 @@ void Lexer::scanOptionalIntegerSuffix(bool allowU)
             yyinp();
             scanOptionalIntegerSuffix(false);
         }
-        return;
+        return true;
     case 'l':
         yyinp();
         if (_yychar == 'l')
             yyinp();
-        return;
+        return true;
     case 'L':
         yyinp();
         if (_yychar == 'L')
             yyinp();
-        return;
+        return true;
     default:
-        return;
+        return false;
+    }
+}
+
+void Lexer::scanOptionalUserDefinedLiteral(Token *tok)
+{
+    if (_languageFeatures.cxx11Enabled && _yychar == '_') {
+        tok->f.userDefinedLiteral = true;
+        while (std::isalnum(_yychar) || _yychar == '_' || isByteOfMultiByteCodePoint(_yychar))
+            yyinp();
     }
 }
 
@@ -844,19 +859,22 @@ void Lexer::scanNumericLiteral(Token *tok)
                    (_yychar >= 'A' && _yychar <= 'F')) {
                 yyinp();
             }
-            scanOptionalIntegerSuffix();
+            if (!scanOptionalIntegerSuffix())
+                scanOptionalUserDefinedLiteral(tok);
             goto theEnd;
         } else if (_yychar == 'b' || _yychar == 'B') { // see n3472
             yyinp();
             while (_yychar == '0' || _yychar == '1')
                 yyinp();
-            scanOptionalIntegerSuffix();
+            if (!scanOptionalIntegerSuffix())
+                scanOptionalUserDefinedLiteral(tok);
             goto theEnd;
         } else if (_yychar >= '0' && _yychar <= '7') {
             do {
                 yyinp();
             } while (_yychar >= '0' && _yychar <= '7');
-            scanOptionalIntegerSuffix();
+            if (!scanOptionalIntegerSuffix())
+                scanOptionalUserDefinedLiteral(tok);
             goto theEnd;
         }
     }
@@ -866,16 +884,18 @@ void Lexer::scanNumericLiteral(Token *tok)
             yyinp();
             scanDigitSequence(); // this is optional: "1." is a valid floating point number
             scanExponentPart();
-            scanOptionalFloatingSuffix();
+            if (!scanOptionalFloatingSuffix())
+                scanOptionalUserDefinedLiteral(tok);
             break;
         } else if (_yychar == 'e' || _yychar == 'E') {
-            if (scanExponentPart())
-                scanOptionalFloatingSuffix();
+            if (scanExponentPart() && !scanOptionalFloatingSuffix())
+                scanOptionalUserDefinedLiteral(tok);
             break;
         } else if (std::isdigit(_yychar)) {
             yyinp();
         } else {
-            scanOptionalIntegerSuffix();
+            if (!scanOptionalIntegerSuffix())
+                scanOptionalUserDefinedLiteral(tok);
             break;
         }
     }
@@ -911,6 +931,7 @@ void Lexer::scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped)
         } else if (std::isalnum(_yychar) || _yychar == '_' || _yychar == '.') {
             yyinp();
         } else {
+            scanOptionalUserDefinedLiteral(tok);
             break;
         }
     }
diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h
index 0309c69950e..c19ee904f98 100644
--- a/src/libs/3rdparty/cplusplus/Lexer.h
+++ b/src/libs/3rdparty/cplusplus/Lexer.h
@@ -100,8 +100,9 @@ private:
     void scanUntilQuote(Token *tok, unsigned char quote);
     bool scanDigitSequence();
     bool scanExponentPart();
-    void scanOptionalFloatingSuffix();
-    void scanOptionalIntegerSuffix(bool allowU = true);
+    bool scanOptionalFloatingSuffix();
+    bool scanOptionalIntegerSuffix(bool allowU = true);
+    void scanOptionalUserDefinedLiteral(Token *tok);
     void scanNumericLiteral(Token *tok);
     void scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped);
     void scanIdentifier(Token *tok, unsigned extraProcessedChars = 0);
diff --git a/src/libs/3rdparty/cplusplus/Parser.cpp b/src/libs/3rdparty/cplusplus/Parser.cpp
index 75970b16715..19bcd358b57 100644
--- a/src/libs/3rdparty/cplusplus/Parser.cpp
+++ b/src/libs/3rdparty/cplusplus/Parser.cpp
@@ -1274,6 +1274,14 @@ bool Parser::parseOperator(OperatorAST *&node) // ### FIXME
         } else if (LA() == T_LBRACKET && LA(2) == T_RBRACKET) {
             ast->op_token = ast->open_token = consumeToken();
             ast->close_token = consumeToken();
+        } else if (_languageFeatures.cxx11Enabled &&
+                   LA() == T_STRING_LITERAL && LA(2) == T_IDENTIFIER &&
+                   !tok().f.userDefinedLiteral && tok().string->size() == 0 &&
+                   tok(2).identifier->size() > 1 && tok(2).identifier->chars()[0] == '_') {
+            // C++11 user-defined literal operator, e.g.:
+            // int operator"" _abc123(const char *str, size_t size) { ... }
+            ast->op_token = consumeToken();
+            consumeToken(); // consume literal operator identifier
         } else {
             return false;
         }
diff --git a/src/libs/3rdparty/cplusplus/Token.h b/src/libs/3rdparty/cplusplus/Token.h
index c3c6b37b6d4..1f18c652c91 100644
--- a/src/libs/3rdparty/cplusplus/Token.h
+++ b/src/libs/3rdparty/cplusplus/Token.h
@@ -302,6 +302,7 @@ public:
     inline bool joined() const { return f.joined; }
     inline bool expanded() const { return f.expanded; }
     inline bool generated() const { return f.generated; }
+    inline bool userDefinedLiteral() const { return f.userDefinedLiteral; }
 
     inline unsigned bytes() const { return f.bytes; }
     inline unsigned bytesBegin() const { return byteOffset; }
@@ -363,8 +364,11 @@ public:
         // Tokens '1', '+', '2', and ';' are all expanded. However only tokens '+' and ';'
         // are generated.
         unsigned generated     : 1;
+        // The token is C++11 user-defined literal such as:
+        // 12_km, 0.5_Pa, 'c'_X, "abd"_L, u16"xyz"_M
+        unsigned userDefinedLiteral : 1;
         // Unused...
-        unsigned pad           : 3;
+        unsigned pad           : 2;
         // The token length in bytes and UTF16 chars.
         unsigned bytes         : 16;
         unsigned utf16chars    : 16;
diff --git a/tests/auto/cplusplus/cxx11/data/userDefinedLiterals.1.cpp b/tests/auto/cplusplus/cxx11/data/userDefinedLiterals.1.cpp
new file mode 100644
index 00000000000..f194b3ed51d
--- /dev/null
+++ b/tests/auto/cplusplus/cxx11/data/userDefinedLiterals.1.cpp
@@ -0,0 +1,7 @@
+constexpr long double operator"" _inv(long double value) {
+  return 1.0 / value;
+}
+int main() {
+  auto foo = operator"" _inv(2.3);
+  return 12_km + 0.5_Pa + 'c'_X + "abd"_L + u"xyz"_M;
+}
diff --git a/tests/auto/cplusplus/cxx11/tst_cxx11.cpp b/tests/auto/cplusplus/cxx11/tst_cxx11.cpp
index 0cb48f7e39a..cf8b540f23d 100644
--- a/tests/auto/cplusplus/cxx11/tst_cxx11.cpp
+++ b/tests/auto/cplusplus/cxx11/tst_cxx11.cpp
@@ -197,6 +197,7 @@ void tst_cxx11::parse_data()
     QTest::newRow("threadLocal.1") << "threadLocal.1.cpp" << "";
     QTest::newRow("trailingtypespec.1") << "trailingtypespec.1.cpp" << "";
     QTest::newRow("lambda.2") << "lambda.2.cpp" << "";
+    QTest::newRow("userDefinedLiterals.1") << "userDefinedLiterals.1.cpp" << "";
 }
 
 void tst_cxx11::parse()
diff --git a/tests/auto/cplusplus/lexer/tst_lexer.cpp b/tests/auto/cplusplus/lexer/tst_lexer.cpp
index eadc39a90bf..5622c8097e0 100644
--- a/tests/auto/cplusplus/lexer/tst_lexer.cpp
+++ b/tests/auto/cplusplus/lexer/tst_lexer.cpp
@@ -61,7 +61,8 @@ public:
         CompareBytesEnd        = 1 << 4,
         CompareUtf16Chars      = 1 << 5,
         CompareUtf16CharsBegin = 1 << 6,
-        CompareUtf16CharsEnd   = 1 << 7
+        CompareUtf16CharsEnd   = 1 << 7,
+        CompareUserDefinedLiteral = 1 << 8
     };
     Q_DECLARE_FLAGS(TokenCompareFlags, TokenCompareFlag)
 
@@ -77,6 +78,8 @@ private slots:
 
     void bytes_and_utf16chars();
     void bytes_and_utf16chars_data();
+    void user_defined_literals();
+    void user_defined_literals_data();
     void offsets();
     void offsets_data();
 
@@ -87,7 +90,8 @@ private:
              const Tokens &expectedTokens,
              bool preserveState,
              TokenCompareFlags compareFlags,
-             bool preprocessorMode = false);
+             bool preprocessorMode = false,
+             const LanguageFeatures &extraLanguageFeatures = LanguageFeatures());
 
     int _state;
 };
@@ -109,12 +113,18 @@ void tst_SimpleLexer::run(const QByteArray &source,
                           const Tokens &expectedTokens,
                           bool preserveState,
                           TokenCompareFlags compareFlags,
-                          bool preprocessorMode)
+                          bool preprocessorMode,
+                          const LanguageFeatures &extraLanguageFeatures)
 {
     QVERIFY(compareFlags);
 
     SimpleLexer lexer;
     lexer.setPreprocessorMode(preprocessorMode);
+    if (extraLanguageFeatures.flags) {
+        LanguageFeatures languageFeatures = lexer.languageFeatures();
+        languageFeatures.flags |= extraLanguageFeatures.flags;
+        lexer.setLanguageFeatures(languageFeatures);
+    }
     const Tokens tokens = lexer(source, preserveState ? _state : 0);
     if (preserveState)
         _state = lexer.state();
@@ -146,6 +156,8 @@ void tst_SimpleLexer::run(const QByteArray &source,
             QCOMPARE(token.utf16charsBegin(), expectedToken.utf16charsBegin());
         if (compareFlags & CompareUtf16CharsEnd)
             QCOMPARE(token.utf16charsEnd(), expectedToken.utf16charsEnd());
+        if (compareFlags & CompareUserDefinedLiteral)
+            QCOMPARE(token.userDefinedLiteral(), expectedToken.userDefinedLiteral());
     }
 
     QString msg = QLatin1String("Less tokens than expected: got %1, expected %2.");
@@ -364,12 +376,14 @@ void tst_SimpleLexer::bytes_and_utf16chars()
     run(source, expectedTokens, false, compareFlags);
 }
 
-static Tokens createToken(unsigned kind, unsigned bytes, unsigned utf16chars)
+static Tokens createToken(unsigned kind, unsigned bytes, unsigned utf16chars,
+                          bool userDefinedLiteral = false)
 {
     Token t;
     t.f.kind = kind;
     t.f.bytes = bytes;
     t.f.utf16chars = utf16chars;
+    t.f.userDefinedLiteral = userDefinedLiteral;
     return Tokens() << t;
 }
 
@@ -445,6 +459,43 @@ void tst_SimpleLexer::bytes_and_utf16chars_data()
         << _("\"" UC_U00FC UC_U4E8C UC_U10302 "\"") << createToken(T_STRING_LITERAL, 11, 6);
 }
 
+void tst_SimpleLexer::user_defined_literals()
+{
+    QFETCH(QByteArray, source);
+    QFETCH(Tokens, expectedTokens);
+
+    const TokenCompareFlags compareFlags = CompareKind | CompareBytes | CompareUtf16Chars | CompareUserDefinedLiteral;
+    LanguageFeatures languageFeatures;
+    languageFeatures.cxx11Enabled = true;
+    run(source, expectedTokens, false, compareFlags, false, languageFeatures);
+}
+
+void tst_SimpleLexer::user_defined_literals_data()
+{
+    QTest::addColumn<QByteArray>("source");
+    QTest::addColumn<Tokens>("expectedTokens");
+
+    typedef QByteArray _;
+
+    // String User-defined Literals
+    QTest::newRow("latin1 string non-user-defined literal")
+        << _("\"hello\"") << createToken(T_STRING_LITERAL, 7, 7, false);
+    QTest::newRow("latin1 string user-defined literal")
+        << _("\"hello\"_udl") << createToken(T_STRING_LITERAL, 11, 11, true);
+
+    // Numeric User-defined Literals
+    QTest::newRow("numeric non user-defined literal with integer suffix")
+        << _("11LL") << createToken(T_NUMERIC_LITERAL, 4, 4, false);
+    QTest::newRow("numeric non user-defined literal with decimal part")
+        << _("11.1") << createToken(T_NUMERIC_LITERAL, 4, 4, false);
+    QTest::newRow("numeric non user-defined literal with float suffix")
+        << _("11.1f") << createToken(T_NUMERIC_LITERAL, 5, 5, false);
+    QTest::newRow("numeric user-defined literal without decimal part")
+        << _("11_udl") << createToken(T_NUMERIC_LITERAL, 6, 6, true);
+    QTest::newRow("numeric user-defined literal with decimal part")
+        << _("11.1_udl") << createToken(T_NUMERIC_LITERAL, 8, 8, true);
+}
+
 static Token createToken(unsigned kind, unsigned byteOffset, unsigned bytes,
                          unsigned utf16charsOffset, unsigned utf16chars)
 {
author	Adam Strzelecki <ono@java.pl>	2014-11-02 14:42:23 +0100
committer	Nikolai Kosjar <nikolai.kosjar@theqtcompany.com>	2015-02-17 09:45:34 +0000
commit	425811291dfd41782cc91f6c1293d41af7b0e4d8 (patch)
tree	c730fb107d9b040248230d688790b45a08741be8
parent	5699991a2fd4ef35d24720249692612d069d391b (diff)