1 files changed, 172 insertions, 121 deletions
diff --git a/src/linguist/lupdate/python.cpp b/src/linguist/lupdate/python.cpp
index 9ed3457e6..0bc3bf5e8 100644
--- a/src/linguist/lupdate/python.cpp
+++ b/src/linguist/lupdate/python.cpp
@@ -1,31 +1,6 @@
-/****************************************************************************
-**
-** Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de>
-** Copyright (C) 2021 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the Qt Linguist of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de>
+// Copyright (C) 2021 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 
 #include <translator.h>
 #include "lupdate.h"
@@ -42,19 +17,27 @@
 
 QT_BEGIN_NAMESPACE
 
-static const char MagicComment[] = "TRANSLATOR ";
+static const char PythonMagicComment[] = "TRANSLATOR ";
 
 /*
   The first part of this source file is the Python tokenizer.  We skip
   most of Python; the only tokens that interest us are defined here.
 */
 
-enum Token { Tok_Eof, Tok_class, Tok_return, Tok_tr,
+enum Token { Tok_Eof, Tok_class, Tok_def, Tok_return, Tok_tr,
              Tok_trUtf8, Tok_translate, Tok_Ident,
              Tok_Comment, Tok_Dot, Tok_String,
              Tok_LeftParen, Tok_RightParen,
              Tok_Comma, Tok_None, Tok_Integer};
 
+enum class StringType
+{
+    NoString,
+    String,
+    FormatString,
+    RawString
+};
+
 /*
   The tokenizer maintains the following global variables. The names
   should be self-explanatory.
@@ -76,6 +59,7 @@ static QByteArray id;
 QHash<QByteArray, Token> tokens = {
     {"None", Tok_None},
     {"class", Tok_class},
+    {"def", Tok_def},
     {"return", Tok_return},
     {"__tr", Tok_tr}, // Legacy?
     {"__trUtf8", Tok_trUtf8}
@@ -101,8 +85,6 @@ using ContextPair = QPair<QByteArray, int>;
 using ContextStack = QStack<ContextPair>;
 static ContextStack yyContextStack;
 
-static int yyContextPops;
-
 static int getCharFromFile()
 {
     int c;
@@ -120,17 +102,6 @@ static int getCharFromFile()
     } else if (yyCountingIndentation && (c == 32 || c == 9)) {
         yyContinuousSpaceCount++;
     } else {
-        if (yyIndentationSize == 1 && yyContinuousSpaceCount > yyIndentationSize)
-            yyIndentationSize = yyContinuousSpaceCount;
-        if (yyCountingIndentation && yyContextStack.count() > 1) {
-            ContextPair& top = yyContextStack.top();
-            if (top.second == 0 && yyContinuousSpaceCount > 0) {
-                top.second = yyContinuousSpaceCount;
-                yyContinuousSpaceCount = 0;
-            } else if (yyContinuousSpaceCount < top.second) {
-                yyContextPops = (top.second - yyContinuousSpaceCount) / yyIndentationSize;
-            }
-        }
         yyCountingIndentation = false;
     }
     return c;
@@ -156,18 +127,82 @@ static void startTokenizer(const QString &fileName, int (*getCharFunc)(),
     yyParenDepth = 0;
     yyCurLineNo = 1;
 
-    yyIndentationSize = 1;
+    yyIndentationSize = -1;
     yyContinuousSpaceCount = 0;
     yyCountingIndentation = false;
     yyContextStack.clear();
-    yyContextPops = 0;
 }
 
-static Token parseString()
+static bool parseStringEscape(int quoteChar, StringType stringType)
 {
     static const char tab[] = "abfnrtv";
     static const char backTab[] = "\a\b\f\n\r\t\v";
 
+    yyCh = getChar();
+    if (yyCh == EOF)
+        return false;
+
+    if (stringType == StringType::RawString) {
+        if (yyCh != quoteChar) // Only quotes can be escaped in raw strings
+            yyString[yyStringLen++] = '\\';
+        yyString[yyStringLen++] = yyCh;
+        yyCh = getChar();
+        return true;
+    }
+
+    if (yyCh == 'x') {
+        QByteArray hex = "0";
+        yyCh = getChar();
+        if (yyCh == EOF)
+            return false;
+        while (std::isxdigit(yyCh)) {
+            hex += char(yyCh);
+            yyCh = getChar();
+            if (yyCh == EOF)
+                return false;
+        }
+        uint n;
+#ifdef Q_CC_MSVC
+        sscanf_s(hex, "%x", &n);
+#else
+        std::sscanf(hex, "%x", &n);
+#endif
+        if (yyStringLen < sizeof(yyString) - 1)
+            yyString[yyStringLen++] = char(n);
+         return true;
+    }
+
+    if (yyCh >= '0' && yyCh < '8') {
+         QByteArray oct;
+         int n = 0;
+         do {
+            oct += char(yyCh);
+            ++n;
+            yyCh = getChar();
+            if (yyCh == EOF)
+                return false;
+         } while (yyCh >= '0' && yyCh < '8' && n < 3);
+#ifdef Q_CC_MSVC
+         sscanf_s(oct, "%o", &n);
+#else
+         std::sscanf(oct, "%o", &n);
+#endif
+         if (yyStringLen < sizeof(yyString) - 1)
+            yyString[yyStringLen++] = char(n);
+         return true;
+    }
+
+    const char *p = std::strchr(tab, yyCh);
+    if (yyStringLen < sizeof(yyString) - 1) {
+         yyString[yyStringLen++] = p == nullptr
+                                   ? char(yyCh) : backTab[p - tab];
+    }
+    yyCh = getChar();
+    return true;
+}
+
+static Token parseString(StringType stringType = StringType::NoString)
+{
     int quoteChar = yyCh;
     bool tripleQuote = false;
     bool singleQuote = true;
@@ -207,48 +242,8 @@ static Token parseString()
         }
 
         if (yyCh == '\\') {
-            yyCh = getChar();
-
-            if (yyCh == 'x') {
-                QByteArray hex = "0";
-
-                yyCh = getChar();
-                while (std::isxdigit(yyCh)) {
-                    hex += char(yyCh);
-                    yyCh = getChar();
-                }
-                uint n;
-#ifdef Q_CC_MSVC
-                sscanf_s(hex, "%x", &n);
-#else
-                std::sscanf(hex, "%x", &n);
-#endif
-                if (yyStringLen < sizeof(yyString) - 1)
-                    yyString[yyStringLen++] = char(n);
-            } else if (yyCh >= '0' && yyCh < '8') {
-                QByteArray oct;
-                int n = 0;
-
-                do {
-                    oct += char(yyCh);
-                    ++n;
-                    yyCh = getChar();
-                } while (yyCh >= '0' && yyCh < '8' && n < 3);
-#ifdef Q_CC_MSVC
-                sscanf_s(oct, "%o", &n);
-#else
-                std::sscanf(oct, "%o", &n);
-#endif
-                if (yyStringLen < sizeof(yyString) - 1)
-                    yyString[yyStringLen++] = char(n);
-            } else {
-                const char *p = std::strchr(tab, yyCh);
-                if (yyStringLen < sizeof(yyString) - 1) {
-                    yyString[yyStringLen++] = (p == nullptr)
-                            ? char(yyCh) : backTab[p - tab];
-                }
-                yyCh = getChar();
-            }
+            if (!parseStringEscape(quoteChar, stringType))
+                return Tok_Eof;
         } else {
             char *yStart = yyString + yyStringLen;
             char *yp = yStart;
@@ -287,7 +282,7 @@ static QByteArray readLine()
     return result;
 }
 
-static Token getToken()
+static Token getToken(StringType stringType = StringType::NoString)
 {
     yyIdent.clear();
     yyCommentLen = 0;
@@ -313,6 +308,7 @@ static Token getToken()
                 id = readLine().trimmed();
                 break;
             case EOF:
+                return Tok_Eof;
             case '\n':
                 break;
             default:
@@ -324,7 +320,7 @@ static Token getToken()
             break;
         case '"':
         case '\'':
-            return parseString();
+            return parseString(stringType);
         case '(':
             yyParenDepth++;
             yyCh = getChar();
@@ -396,15 +392,34 @@ static bool match(Token t)
     return matches;
 }
 
+static bool matchStringStart()
+{
+    if (yyTok == Tok_String)
+        return true;
+    // Check for f"bla{var}" and raw strings r"bla".
+    if (yyTok == Tok_Ident && yyIdent.size() == 1) {
+        switch (yyIdent.at(0)) {
+        case 'r':
+            yyTok = getToken(StringType::RawString);
+            return yyTok == Tok_String;
+        case 'f':
+            yyTok = getToken(StringType::FormatString);
+            return yyTok == Tok_String;
+        }
+    }
+    return false;
+}
+
 static bool matchString(QByteArray *s)
 {
-    const bool matches = (yyTok == Tok_String);
     s->clear();
-    while (yyTok == Tok_String) {
+    bool ok = false;
+    while (matchStringStart()) {
         *s += yyString;
         yyTok = getToken();
+        ok = true;
     }
-    return matches;
+    return ok;
 }
 
 static bool matchEncoding(bool *utf8)
@@ -515,33 +530,57 @@ static bool parseTranslate(QByteArray *text, QByteArray *context, QByteArray *co
     if (match(Tok_RightParen))
         return true;
 
-    // look for comment
-    if (!match(Tok_Comma) || !matchStringOrNone(comment))
+    // not a comma or a right paren, illegal syntax
+    if (!match(Tok_Comma))
         return false;
 
+    // python accepts trailing commas within parenthesis, so allow a comma with nothing after
+    if (match(Tok_RightParen))
+        return true;
+
+    // check for comment
+    if (!matchStringOrNone(comment))
+        return false; // not a comment, or a trailing comma... something is wrong
+
     if (match(Tok_RightParen))
         return true;
 
-    // look for encoding
+    // not a comma or a right paren, illegal syntax
     if (!match(Tok_Comma))
         return false;
 
-    if (matchEncoding(utf8)) {
-        if (!match(Tok_RightParen)) {
-            // look for the plural quantifier,
-            // this can be a number, an identifier or a function call,
-            // so for simplicity we mark it as plural if we know we have a comma instead of an
-            // right parentheses.
-            *plural = match(Tok_Comma);
-        }
+    // python accepts trailing commas within parenthesis, so allow a comma with nothing after
+    if (match(Tok_RightParen))
         return true;
+
+    // look for optional encoding information
+    if (matchEncoding(utf8)) {
+        if (match(Tok_RightParen))
+            return true;
+
+        // not a comma or a right paren, illegal syntax
+        if (!match(Tok_Comma))
+            return false;
+
+        // python accepts trailing commas within parenthesis, so allow a comma with nothing after
+        if (match(Tok_RightParen))
+            return true;
     }
 
-    // This can be a QTranslator::translate("context", "source", "comment", n) plural translation
-    if (!matchExpression() || !match(Tok_RightParen))
+    // Must be a plural expression
+    if (!matchExpression())
         return false;
+
     *plural = true;
-    return true;
+
+    // Ignore any trailing comma here
+    match(Tok_Comma);
+
+    // This must be the end, or there are too many parameters
+    if (match(Tok_RightParen))
+        return true;
+
+    return false;
 }
 
 static inline void setMessageParameters(TranslatorMessage *message)
@@ -566,22 +605,33 @@ static void parse(Translator &tor, ConversionData &cd,
     QByteArray prefix;
     bool utf8 = false;
 
-    yyContextStack.push({initialContext, 0});
-
     yyTok = getToken();
     while (yyTok != Tok_Eof) {
 
-        if (yyContextPops > 0) {
-            for ( int i = 0; i < yyContextPops; i++)
-                yyContextStack.pop();
-            yyContextPops = 0;
-        }
-
         switch (yyTok) {
-            case Tok_class:
+            case Tok_class: {
+                if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
+                    yyIndentationSize = yyContinuousSpaceCount; // First indented "class"
+                const int indent = yyIndentationSize > 0
+                                   ? yyContinuousSpaceCount / yyIndentationSize : 0;
+                while (!yyContextStack.isEmpty() && yyContextStack.top().second >= indent)
+                    yyContextStack.pop();
+                yyTok = getToken();
+                yyContextStack.push({yyIdent, indent});
                 yyTok = getToken();
-                yyContextStack.push({yyIdent, 0});
-                yyContinuousSpaceCount = 0;
+            }
+                break;
+            case Tok_def:
+                if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
+                    yyIndentationSize = yyContinuousSpaceCount; // First indented "def"
+                if (!yyContextStack.isEmpty()) {
+                    // Pop classes if the function is further outdented than the class on the top
+                    // (end of a nested class).
+                    const int classIndent = yyIndentationSize > 0
+                                            ? yyContinuousSpaceCount / yyIndentationSize - 1 : 0;
+                    while (!yyContextStack.isEmpty() && yyContextStack.top().second > classIndent)
+                        yyContextStack.pop();
+                }
                 yyTok = getToken();
                 break;
             case Tok_tr:
@@ -607,7 +657,8 @@ static void parse(Translator &tor, ConversionData &cd,
                     if (prefix.isEmpty())
                         context = defaultContext;
                     else if (prefix == "self")
-                        context = yyContextStack.top().first;
+                        context = yyContextStack.isEmpty()
+                                  ? initialContext : yyContextStack.top().first;
                     else
                         context = prefix;
 
@@ -649,8 +700,8 @@ static void parse(Translator &tor, ConversionData &cd,
             case Tok_Comment:
                 comment = yyComment;
                 comment = comment.simplified();
-                if (comment.left(sizeof(MagicComment) - 1) == MagicComment) {
-                    comment.remove(0, sizeof(MagicComment) - 1);
+                if (comment.left(sizeof(PythonMagicComment) - 1) == PythonMagicComment) {
+                    comment.remove(0, sizeof(PythonMagicComment) - 1);
                     int k = comment.indexOf(' ');
                     if (k == -1) {
                         context = comment;