summaryrefslogtreecommitdiffstats
path: root/src/linguist/lupdate/python.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/linguist/lupdate/python.cpp')
-rw-r--r--src/linguist/lupdate/python.cpp293
1 files changed, 172 insertions, 121 deletions
diff --git a/src/linguist/lupdate/python.cpp b/src/linguist/lupdate/python.cpp
index 9ed3457e6..0bc3bf5e8 100644
--- a/src/linguist/lupdate/python.cpp
+++ b/src/linguist/lupdate/python.cpp
@@ -1,31 +1,6 @@
-/****************************************************************************
-**
-** Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de>
-** Copyright (C) 2021 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the Qt Linguist of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de>
+// Copyright (C) 2021 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include <translator.h>
#include "lupdate.h"
@@ -42,19 +17,27 @@
QT_BEGIN_NAMESPACE
-static const char MagicComment[] = "TRANSLATOR ";
+static const char PythonMagicComment[] = "TRANSLATOR ";
/*
The first part of this source file is the Python tokenizer. We skip
most of Python; the only tokens that interest us are defined here.
*/
-enum Token { Tok_Eof, Tok_class, Tok_return, Tok_tr,
+enum Token { Tok_Eof, Tok_class, Tok_def, Tok_return, Tok_tr,
Tok_trUtf8, Tok_translate, Tok_Ident,
Tok_Comment, Tok_Dot, Tok_String,
Tok_LeftParen, Tok_RightParen,
Tok_Comma, Tok_None, Tok_Integer};
+enum class StringType
+{
+ NoString,
+ String,
+ FormatString,
+ RawString
+};
+
/*
The tokenizer maintains the following global variables. The names
should be self-explanatory.
@@ -76,6 +59,7 @@ static QByteArray id;
QHash<QByteArray, Token> tokens = {
{"None", Tok_None},
{"class", Tok_class},
+ {"def", Tok_def},
{"return", Tok_return},
{"__tr", Tok_tr}, // Legacy?
{"__trUtf8", Tok_trUtf8}
@@ -101,8 +85,6 @@ using ContextPair = QPair<QByteArray, int>;
using ContextStack = QStack<ContextPair>;
static ContextStack yyContextStack;
-static int yyContextPops;
-
static int getCharFromFile()
{
int c;
@@ -120,17 +102,6 @@ static int getCharFromFile()
} else if (yyCountingIndentation && (c == 32 || c == 9)) {
yyContinuousSpaceCount++;
} else {
- if (yyIndentationSize == 1 && yyContinuousSpaceCount > yyIndentationSize)
- yyIndentationSize = yyContinuousSpaceCount;
- if (yyCountingIndentation && yyContextStack.count() > 1) {
- ContextPair& top = yyContextStack.top();
- if (top.second == 0 && yyContinuousSpaceCount > 0) {
- top.second = yyContinuousSpaceCount;
- yyContinuousSpaceCount = 0;
- } else if (yyContinuousSpaceCount < top.second) {
- yyContextPops = (top.second - yyContinuousSpaceCount) / yyIndentationSize;
- }
- }
yyCountingIndentation = false;
}
return c;
@@ -156,18 +127,82 @@ static void startTokenizer(const QString &fileName, int (*getCharFunc)(),
yyParenDepth = 0;
yyCurLineNo = 1;
- yyIndentationSize = 1;
+ yyIndentationSize = -1;
yyContinuousSpaceCount = 0;
yyCountingIndentation = false;
yyContextStack.clear();
- yyContextPops = 0;
}
-static Token parseString()
+static bool parseStringEscape(int quoteChar, StringType stringType)
{
static const char tab[] = "abfnrtv";
static const char backTab[] = "\a\b\f\n\r\t\v";
+ yyCh = getChar();
+ if (yyCh == EOF)
+ return false;
+
+ if (stringType == StringType::RawString) {
+ if (yyCh != quoteChar) // Only quotes can be escaped in raw strings
+ yyString[yyStringLen++] = '\\';
+ yyString[yyStringLen++] = yyCh;
+ yyCh = getChar();
+ return true;
+ }
+
+ if (yyCh == 'x') {
+ QByteArray hex = "0";
+ yyCh = getChar();
+ if (yyCh == EOF)
+ return false;
+ while (std::isxdigit(yyCh)) {
+ hex += char(yyCh);
+ yyCh = getChar();
+ if (yyCh == EOF)
+ return false;
+ }
+ uint n;
+#ifdef Q_CC_MSVC
+ sscanf_s(hex, "%x", &n);
+#else
+ std::sscanf(hex, "%x", &n);
+#endif
+ if (yyStringLen < sizeof(yyString) - 1)
+ yyString[yyStringLen++] = char(n);
+ return true;
+ }
+
+ if (yyCh >= '0' && yyCh < '8') {
+ QByteArray oct;
+ int n = 0;
+ do {
+ oct += char(yyCh);
+ ++n;
+ yyCh = getChar();
+ if (yyCh == EOF)
+ return false;
+ } while (yyCh >= '0' && yyCh < '8' && n < 3);
+#ifdef Q_CC_MSVC
+ sscanf_s(oct, "%o", &n);
+#else
+ std::sscanf(oct, "%o", &n);
+#endif
+ if (yyStringLen < sizeof(yyString) - 1)
+ yyString[yyStringLen++] = char(n);
+ return true;
+ }
+
+ const char *p = std::strchr(tab, yyCh);
+ if (yyStringLen < sizeof(yyString) - 1) {
+ yyString[yyStringLen++] = p == nullptr
+ ? char(yyCh) : backTab[p - tab];
+ }
+ yyCh = getChar();
+ return true;
+}
+
+static Token parseString(StringType stringType = StringType::NoString)
+{
int quoteChar = yyCh;
bool tripleQuote = false;
bool singleQuote = true;
@@ -207,48 +242,8 @@ static Token parseString()
}
if (yyCh == '\\') {
- yyCh = getChar();
-
- if (yyCh == 'x') {
- QByteArray hex = "0";
-
- yyCh = getChar();
- while (std::isxdigit(yyCh)) {
- hex += char(yyCh);
- yyCh = getChar();
- }
- uint n;
-#ifdef Q_CC_MSVC
- sscanf_s(hex, "%x", &n);
-#else
- std::sscanf(hex, "%x", &n);
-#endif
- if (yyStringLen < sizeof(yyString) - 1)
- yyString[yyStringLen++] = char(n);
- } else if (yyCh >= '0' && yyCh < '8') {
- QByteArray oct;
- int n = 0;
-
- do {
- oct += char(yyCh);
- ++n;
- yyCh = getChar();
- } while (yyCh >= '0' && yyCh < '8' && n < 3);
-#ifdef Q_CC_MSVC
- sscanf_s(oct, "%o", &n);
-#else
- std::sscanf(oct, "%o", &n);
-#endif
- if (yyStringLen < sizeof(yyString) - 1)
- yyString[yyStringLen++] = char(n);
- } else {
- const char *p = std::strchr(tab, yyCh);
- if (yyStringLen < sizeof(yyString) - 1) {
- yyString[yyStringLen++] = (p == nullptr)
- ? char(yyCh) : backTab[p - tab];
- }
- yyCh = getChar();
- }
+ if (!parseStringEscape(quoteChar, stringType))
+ return Tok_Eof;
} else {
char *yStart = yyString + yyStringLen;
char *yp = yStart;
@@ -287,7 +282,7 @@ static QByteArray readLine()
return result;
}
-static Token getToken()
+static Token getToken(StringType stringType = StringType::NoString)
{
yyIdent.clear();
yyCommentLen = 0;
@@ -313,6 +308,7 @@ static Token getToken()
id = readLine().trimmed();
break;
case EOF:
+ return Tok_Eof;
case '\n':
break;
default:
@@ -324,7 +320,7 @@ static Token getToken()
break;
case '"':
case '\'':
- return parseString();
+ return parseString(stringType);
case '(':
yyParenDepth++;
yyCh = getChar();
@@ -396,15 +392,34 @@ static bool match(Token t)
return matches;
}
+static bool matchStringStart()
+{
+ if (yyTok == Tok_String)
+ return true;
+ // Check for f"bla{var}" and raw strings r"bla".
+ if (yyTok == Tok_Ident && yyIdent.size() == 1) {
+ switch (yyIdent.at(0)) {
+ case 'r':
+ yyTok = getToken(StringType::RawString);
+ return yyTok == Tok_String;
+ case 'f':
+ yyTok = getToken(StringType::FormatString);
+ return yyTok == Tok_String;
+ }
+ }
+ return false;
+}
+
static bool matchString(QByteArray *s)
{
- const bool matches = (yyTok == Tok_String);
s->clear();
- while (yyTok == Tok_String) {
+ bool ok = false;
+ while (matchStringStart()) {
*s += yyString;
yyTok = getToken();
+ ok = true;
}
- return matches;
+ return ok;
}
static bool matchEncoding(bool *utf8)
@@ -515,33 +530,57 @@ static bool parseTranslate(QByteArray *text, QByteArray *context, QByteArray *co
if (match(Tok_RightParen))
return true;
- // look for comment
- if (!match(Tok_Comma) || !matchStringOrNone(comment))
+ // not a comma or a right paren, illegal syntax
+ if (!match(Tok_Comma))
return false;
+ // python accepts trailing commas within parenthesis, so allow a comma with nothing after
+ if (match(Tok_RightParen))
+ return true;
+
+ // check for comment
+ if (!matchStringOrNone(comment))
+ return false; // not a comment, or a trailing comma... something is wrong
+
if (match(Tok_RightParen))
return true;
- // look for encoding
+ // not a comma or a right paren, illegal syntax
if (!match(Tok_Comma))
return false;
- if (matchEncoding(utf8)) {
- if (!match(Tok_RightParen)) {
- // look for the plural quantifier,
- // this can be a number, an identifier or a function call,
- // so for simplicity we mark it as plural if we know we have a comma instead of an
- // right parentheses.
- *plural = match(Tok_Comma);
- }
+ // python accepts trailing commas within parenthesis, so allow a comma with nothing after
+ if (match(Tok_RightParen))
return true;
+
+ // look for optional encoding information
+ if (matchEncoding(utf8)) {
+ if (match(Tok_RightParen))
+ return true;
+
+ // not a comma or a right paren, illegal syntax
+ if (!match(Tok_Comma))
+ return false;
+
+ // python accepts trailing commas within parenthesis, so allow a comma with nothing after
+ if (match(Tok_RightParen))
+ return true;
}
- // This can be a QTranslator::translate("context", "source", "comment", n) plural translation
- if (!matchExpression() || !match(Tok_RightParen))
+ // Must be a plural expression
+ if (!matchExpression())
return false;
+
*plural = true;
- return true;
+
+ // Ignore any trailing comma here
+ match(Tok_Comma);
+
+ // This must be the end, or there are too many parameters
+ if (match(Tok_RightParen))
+ return true;
+
+ return false;
}
static inline void setMessageParameters(TranslatorMessage *message)
@@ -566,22 +605,33 @@ static void parse(Translator &tor, ConversionData &cd,
QByteArray prefix;
bool utf8 = false;
- yyContextStack.push({initialContext, 0});
-
yyTok = getToken();
while (yyTok != Tok_Eof) {
- if (yyContextPops > 0) {
- for ( int i = 0; i < yyContextPops; i++)
- yyContextStack.pop();
- yyContextPops = 0;
- }
-
switch (yyTok) {
- case Tok_class:
+ case Tok_class: {
+ if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
+ yyIndentationSize = yyContinuousSpaceCount; // First indented "class"
+ const int indent = yyIndentationSize > 0
+ ? yyContinuousSpaceCount / yyIndentationSize : 0;
+ while (!yyContextStack.isEmpty() && yyContextStack.top().second >= indent)
+ yyContextStack.pop();
+ yyTok = getToken();
+ yyContextStack.push({yyIdent, indent});
yyTok = getToken();
- yyContextStack.push({yyIdent, 0});
- yyContinuousSpaceCount = 0;
+ }
+ break;
+ case Tok_def:
+ if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
+ yyIndentationSize = yyContinuousSpaceCount; // First indented "def"
+ if (!yyContextStack.isEmpty()) {
+ // Pop classes if the function is further outdented than the class on the top
+ // (end of a nested class).
+ const int classIndent = yyIndentationSize > 0
+ ? yyContinuousSpaceCount / yyIndentationSize - 1 : 0;
+ while (!yyContextStack.isEmpty() && yyContextStack.top().second > classIndent)
+ yyContextStack.pop();
+ }
yyTok = getToken();
break;
case Tok_tr:
@@ -607,7 +657,8 @@ static void parse(Translator &tor, ConversionData &cd,
if (prefix.isEmpty())
context = defaultContext;
else if (prefix == "self")
- context = yyContextStack.top().first;
+ context = yyContextStack.isEmpty()
+ ? initialContext : yyContextStack.top().first;
else
context = prefix;
@@ -649,8 +700,8 @@ static void parse(Translator &tor, ConversionData &cd,
case Tok_Comment:
comment = yyComment;
comment = comment.simplified();
- if (comment.left(sizeof(MagicComment) - 1) == MagicComment) {
- comment.remove(0, sizeof(MagicComment) - 1);
+ if (comment.left(sizeof(PythonMagicComment) - 1) == PythonMagicComment) {
+ comment.remove(0, sizeof(PythonMagicComment) - 1);
int k = comment.indexOf(' ');
if (k == -1) {
context = comment;