diff options
Diffstat (limited to 'src/linguist/lupdate/python.cpp')
-rw-r--r-- | src/linguist/lupdate/python.cpp | 293 |
1 files changed, 172 insertions, 121 deletions
diff --git a/src/linguist/lupdate/python.cpp b/src/linguist/lupdate/python.cpp index 9ed3457e6..0bc3bf5e8 100644 --- a/src/linguist/lupdate/python.cpp +++ b/src/linguist/lupdate/python.cpp @@ -1,31 +1,6 @@ -/**************************************************************************** -** -** Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de> -** Copyright (C) 2021 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the Qt Linguist of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de> +// Copyright (C) 2021 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #include <translator.h> #include "lupdate.h" @@ -42,19 +17,27 @@ QT_BEGIN_NAMESPACE -static const char MagicComment[] = "TRANSLATOR "; +static const char PythonMagicComment[] = "TRANSLATOR "; /* The first part of this source file is the Python tokenizer. We skip most of Python; the only tokens that interest us are defined here. */ -enum Token { Tok_Eof, Tok_class, Tok_return, Tok_tr, +enum Token { Tok_Eof, Tok_class, Tok_def, Tok_return, Tok_tr, Tok_trUtf8, Tok_translate, Tok_Ident, Tok_Comment, Tok_Dot, Tok_String, Tok_LeftParen, Tok_RightParen, Tok_Comma, Tok_None, Tok_Integer}; +enum class StringType +{ + NoString, + String, + FormatString, + RawString +}; + /* The tokenizer maintains the following global variables. The names should be self-explanatory. @@ -76,6 +59,7 @@ static QByteArray id; QHash<QByteArray, Token> tokens = { {"None", Tok_None}, {"class", Tok_class}, + {"def", Tok_def}, {"return", Tok_return}, {"__tr", Tok_tr}, // Legacy? {"__trUtf8", Tok_trUtf8} @@ -101,8 +85,6 @@ using ContextPair = QPair<QByteArray, int>; using ContextStack = QStack<ContextPair>; static ContextStack yyContextStack; -static int yyContextPops; - static int getCharFromFile() { int c; @@ -120,17 +102,6 @@ static int getCharFromFile() } else if (yyCountingIndentation && (c == 32 || c == 9)) { yyContinuousSpaceCount++; } else { - if (yyIndentationSize == 1 && yyContinuousSpaceCount > yyIndentationSize) - yyIndentationSize = yyContinuousSpaceCount; - if (yyCountingIndentation && yyContextStack.count() > 1) { - ContextPair& top = yyContextStack.top(); - if (top.second == 0 && yyContinuousSpaceCount > 0) { - top.second = yyContinuousSpaceCount; - yyContinuousSpaceCount = 0; - } else if (yyContinuousSpaceCount < top.second) { - yyContextPops = (top.second - yyContinuousSpaceCount) / yyIndentationSize; - } - } yyCountingIndentation = false; } return c; @@ -156,18 +127,82 @@ static void startTokenizer(const QString &fileName, int (*getCharFunc)(), yyParenDepth = 0; yyCurLineNo = 1; - yyIndentationSize = 1; + yyIndentationSize = -1; yyContinuousSpaceCount = 0; yyCountingIndentation = false; yyContextStack.clear(); - yyContextPops = 0; } -static Token parseString() +static bool parseStringEscape(int quoteChar, StringType stringType) { static const char tab[] = "abfnrtv"; static const char backTab[] = "\a\b\f\n\r\t\v"; + yyCh = getChar(); + if (yyCh == EOF) + return false; + + if (stringType == StringType::RawString) { + if (yyCh != quoteChar) // Only quotes can be escaped in raw strings + yyString[yyStringLen++] = '\\'; + yyString[yyStringLen++] = yyCh; + yyCh = getChar(); + return true; + } + + if (yyCh == 'x') { + QByteArray hex = "0"; + yyCh = getChar(); + if (yyCh == EOF) + return false; + while (std::isxdigit(yyCh)) { + hex += char(yyCh); + yyCh = getChar(); + if (yyCh == EOF) + return false; + } + uint n; +#ifdef Q_CC_MSVC + sscanf_s(hex, "%x", &n); +#else + std::sscanf(hex, "%x", &n); +#endif + if (yyStringLen < sizeof(yyString) - 1) + yyString[yyStringLen++] = char(n); + return true; + } + + if (yyCh >= '0' && yyCh < '8') { + QByteArray oct; + int n = 0; + do { + oct += char(yyCh); + ++n; + yyCh = getChar(); + if (yyCh == EOF) + return false; + } while (yyCh >= '0' && yyCh < '8' && n < 3); +#ifdef Q_CC_MSVC + sscanf_s(oct, "%o", &n); +#else + std::sscanf(oct, "%o", &n); +#endif + if (yyStringLen < sizeof(yyString) - 1) + yyString[yyStringLen++] = char(n); + return true; + } + + const char *p = std::strchr(tab, yyCh); + if (yyStringLen < sizeof(yyString) - 1) { + yyString[yyStringLen++] = p == nullptr + ? char(yyCh) : backTab[p - tab]; + } + yyCh = getChar(); + return true; +} + +static Token parseString(StringType stringType = StringType::NoString) +{ int quoteChar = yyCh; bool tripleQuote = false; bool singleQuote = true; @@ -207,48 +242,8 @@ static Token parseString() } if (yyCh == '\\') { - yyCh = getChar(); - - if (yyCh == 'x') { - QByteArray hex = "0"; - - yyCh = getChar(); - while (std::isxdigit(yyCh)) { - hex += char(yyCh); - yyCh = getChar(); - } - uint n; -#ifdef Q_CC_MSVC - sscanf_s(hex, "%x", &n); -#else - std::sscanf(hex, "%x", &n); -#endif - if (yyStringLen < sizeof(yyString) - 1) - yyString[yyStringLen++] = char(n); - } else if (yyCh >= '0' && yyCh < '8') { - QByteArray oct; - int n = 0; - - do { - oct += char(yyCh); - ++n; - yyCh = getChar(); - } while (yyCh >= '0' && yyCh < '8' && n < 3); -#ifdef Q_CC_MSVC - sscanf_s(oct, "%o", &n); -#else - std::sscanf(oct, "%o", &n); -#endif - if (yyStringLen < sizeof(yyString) - 1) - yyString[yyStringLen++] = char(n); - } else { - const char *p = std::strchr(tab, yyCh); - if (yyStringLen < sizeof(yyString) - 1) { - yyString[yyStringLen++] = (p == nullptr) - ? char(yyCh) : backTab[p - tab]; - } - yyCh = getChar(); - } + if (!parseStringEscape(quoteChar, stringType)) + return Tok_Eof; } else { char *yStart = yyString + yyStringLen; char *yp = yStart; @@ -287,7 +282,7 @@ static QByteArray readLine() return result; } -static Token getToken() +static Token getToken(StringType stringType = StringType::NoString) { yyIdent.clear(); yyCommentLen = 0; @@ -313,6 +308,7 @@ static Token getToken() id = readLine().trimmed(); break; case EOF: + return Tok_Eof; case '\n': break; default: @@ -324,7 +320,7 @@ static Token getToken() break; case '"': case '\'': - return parseString(); + return parseString(stringType); case '(': yyParenDepth++; yyCh = getChar(); @@ -396,15 +392,34 @@ static bool match(Token t) return matches; } +static bool matchStringStart() +{ + if (yyTok == Tok_String) + return true; + // Check for f"bla{var}" and raw strings r"bla". + if (yyTok == Tok_Ident && yyIdent.size() == 1) { + switch (yyIdent.at(0)) { + case 'r': + yyTok = getToken(StringType::RawString); + return yyTok == Tok_String; + case 'f': + yyTok = getToken(StringType::FormatString); + return yyTok == Tok_String; + } + } + return false; +} + static bool matchString(QByteArray *s) { - const bool matches = (yyTok == Tok_String); s->clear(); - while (yyTok == Tok_String) { + bool ok = false; + while (matchStringStart()) { *s += yyString; yyTok = getToken(); + ok = true; } - return matches; + return ok; } static bool matchEncoding(bool *utf8) @@ -515,33 +530,57 @@ static bool parseTranslate(QByteArray *text, QByteArray *context, QByteArray *co if (match(Tok_RightParen)) return true; - // look for comment - if (!match(Tok_Comma) || !matchStringOrNone(comment)) + // not a comma or a right paren, illegal syntax + if (!match(Tok_Comma)) return false; + // python accepts trailing commas within parenthesis, so allow a comma with nothing after + if (match(Tok_RightParen)) + return true; + + // check for comment + if (!matchStringOrNone(comment)) + return false; // not a comment, or a trailing comma... something is wrong + if (match(Tok_RightParen)) return true; - // look for encoding + // not a comma or a right paren, illegal syntax if (!match(Tok_Comma)) return false; - if (matchEncoding(utf8)) { - if (!match(Tok_RightParen)) { - // look for the plural quantifier, - // this can be a number, an identifier or a function call, - // so for simplicity we mark it as plural if we know we have a comma instead of an - // right parentheses. - *plural = match(Tok_Comma); - } + // python accepts trailing commas within parenthesis, so allow a comma with nothing after + if (match(Tok_RightParen)) return true; + + // look for optional encoding information + if (matchEncoding(utf8)) { + if (match(Tok_RightParen)) + return true; + + // not a comma or a right paren, illegal syntax + if (!match(Tok_Comma)) + return false; + + // python accepts trailing commas within parenthesis, so allow a comma with nothing after + if (match(Tok_RightParen)) + return true; } - // This can be a QTranslator::translate("context", "source", "comment", n) plural translation - if (!matchExpression() || !match(Tok_RightParen)) + // Must be a plural expression + if (!matchExpression()) return false; + *plural = true; - return true; + + // Ignore any trailing comma here + match(Tok_Comma); + + // This must be the end, or there are too many parameters + if (match(Tok_RightParen)) + return true; + + return false; } static inline void setMessageParameters(TranslatorMessage *message) @@ -566,22 +605,33 @@ static void parse(Translator &tor, ConversionData &cd, QByteArray prefix; bool utf8 = false; - yyContextStack.push({initialContext, 0}); - yyTok = getToken(); while (yyTok != Tok_Eof) { - if (yyContextPops > 0) { - for ( int i = 0; i < yyContextPops; i++) - yyContextStack.pop(); - yyContextPops = 0; - } - switch (yyTok) { - case Tok_class: + case Tok_class: { + if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0) + yyIndentationSize = yyContinuousSpaceCount; // First indented "class" + const int indent = yyIndentationSize > 0 + ? yyContinuousSpaceCount / yyIndentationSize : 0; + while (!yyContextStack.isEmpty() && yyContextStack.top().second >= indent) + yyContextStack.pop(); + yyTok = getToken(); + yyContextStack.push({yyIdent, indent}); yyTok = getToken(); - yyContextStack.push({yyIdent, 0}); - yyContinuousSpaceCount = 0; + } + break; + case Tok_def: + if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0) + yyIndentationSize = yyContinuousSpaceCount; // First indented "def" + if (!yyContextStack.isEmpty()) { + // Pop classes if the function is further outdented than the class on the top + // (end of a nested class). + const int classIndent = yyIndentationSize > 0 + ? yyContinuousSpaceCount / yyIndentationSize - 1 : 0; + while (!yyContextStack.isEmpty() && yyContextStack.top().second > classIndent) + yyContextStack.pop(); + } yyTok = getToken(); break; case Tok_tr: @@ -607,7 +657,8 @@ static void parse(Translator &tor, ConversionData &cd, if (prefix.isEmpty()) context = defaultContext; else if (prefix == "self") - context = yyContextStack.top().first; + context = yyContextStack.isEmpty() + ? initialContext : yyContextStack.top().first; else context = prefix; @@ -649,8 +700,8 @@ static void parse(Translator &tor, ConversionData &cd, case Tok_Comment: comment = yyComment; comment = comment.simplified(); - if (comment.left(sizeof(MagicComment) - 1) == MagicComment) { - comment.remove(0, sizeof(MagicComment) - 1); + if (comment.left(sizeof(PythonMagicComment) - 1) == PythonMagicComment) { + comment.remove(0, sizeof(PythonMagicComment) - 1); int k = comment.indexOf(' '); if (k == -1) { context = comment; |