diff options
author | Lars Knoll <lars.knoll@qt.io> | 2020-04-17 14:29:46 +0200 |
---|---|---|
committer | Lars Knoll <lars.knoll@qt.io> | 2020-05-14 07:46:45 +0200 |
commit | 94e210faeaf7ec6b8a41d7f707405d99be25e3f0 (patch) | |
tree | 8a461181c6e56a27b516e6c888e5b8ebf8b14228 | |
parent | ea0a08c898fed9cfd8d8eb16613e352740d3eb02 (diff) |
Move local8bit conversion over to qutfsupport
Local8Bit is always UTF-8 except for Windows platforms.
Also add a Locale encoding to QStringConverter.
Change-Id: I8d729931fd4c1d7fc6857696b6442a44def3fd9d
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r-- | src/corelib/codecs/qwindowscodec.cpp | 181 | ||||
-rw-r--r-- | src/corelib/codecs/qwindowscodec_p.h | 1 | ||||
-rw-r--r-- | src/corelib/text/qstringconverter.cpp | 206 | ||||
-rw-r--r-- | src/corelib/text/qstringconverter.h | 3 | ||||
-rw-r--r-- | src/corelib/text/qstringconverter_p.h | 13 |
5 files changed, 224 insertions, 180 deletions
diff --git a/src/corelib/codecs/qwindowscodec.cpp b/src/corelib/codecs/qwindowscodec.cpp index 710935a65a..d8a0088d6a 100644 --- a/src/corelib/codecs/qwindowscodec.cpp +++ b/src/corelib/codecs/qwindowscodec.cpp @@ -38,10 +38,7 @@ ****************************************************************************/ #include "qwindowscodec_p.h" -#include <qvarlengtharray.h> -#include <qstring.h> -#include <qbytearray.h> -#include <qt_windows.h> +#include "private/qstringconverter_p.h" QT_BEGIN_NAMESPACE @@ -55,184 +52,14 @@ QWindowsLocalCodec::~QWindowsLocalCodec() QString QWindowsLocalCodec::convertToUnicode(const char *chars, int length, ConverterState *state) const { - const char *mb = chars; - int mblen = length; - - if (!mb || !mblen) - return QString(); - - QVarLengthArray<wchar_t, 4096> wc(4096); - int len; - QString sp; - bool prepend = false; - char state_data = 0; - int remainingChars = 0; - - //save the current state information - if (state) { - state_data = (char)state->state_data[0]; - remainingChars = state->remainingChars; - } - - //convert the pending charcter (if available) - if (state && remainingChars) { - char prev[3] = {0}; - prev[0] = state_data; - prev[1] = mb[0]; - remainingChars = 0; - len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, - prev, 2, wc.data(), wc.length()); - if (len) { - sp.append(QChar(wc[0])); - if (mblen == 1) { - state->remainingChars = 0; - return sp; - } - prepend = true; - mb++; - mblen--; - wc[0] = 0; - } - } - - while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, - mb, mblen, wc.data(), wc.length()))) { - int r = GetLastError(); - if (r == ERROR_INSUFFICIENT_BUFFER) { - const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, - mb, mblen, 0, 0); - wc.resize(wclen); - } else if (r == ERROR_NO_UNICODE_TRANSLATION) { - //find the last non NULL character - while (mblen > 1 && !(mb[mblen-1])) - mblen--; - //check whether, we hit an invalid character in the middle - if ((mblen <= 1) || (remainingChars && state_data)) - return convertToUnicodeCharByChar(chars, length, state); - //Remove the last character and try again... - state_data = mb[mblen-1]; - remainingChars = 1; - mblen--; - } else { - // Fail. - qWarning("MultiByteToWideChar: Cannot convert multibyte text"); - break; - } - } - - if (len <= 0) - return QString(); - - if (wc[len-1] == 0) // len - 1: we don't want terminator - --len; - - //save the new state information - if (state) { - state->state_data[0] = (char)state_data; - state->remainingChars = remainingChars; - } - QString s((QChar*)wc.data(), len); - if (prepend) { - return sp+s; - } - return s; -} - -QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const -{ - if (!chars || !length) - return QString(); - - int copyLocation = 0; - int extra = 2; - if (state && state->remainingChars) { - copyLocation = state->remainingChars; - extra += copyLocation; - } - int newLength = length + extra; - char *mbcs = new char[newLength]; - //ensure that we have a NULL terminated string - mbcs[newLength-1] = 0; - mbcs[newLength-2] = 0; - memcpy(&(mbcs[copyLocation]), chars, length); - if (copyLocation) { - //copy the last character from the state - mbcs[0] = (char)state->state_data[0]; - state->remainingChars = 0; - } - const char *mb = mbcs; -#if !defined(Q_OS_WINRT) - const char *next = 0; - QString s; - while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) { - wchar_t wc[2] ={0}; - int charlength = next - mb; - int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2); - if (len>0) { - s.append(QChar(wc[0])); - } else { - int r = GetLastError(); - //check if the character being dropped is the last character - if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) { - state->remainingChars = 1; - state->state_data[0] = (char)*mb; - } - } - mb = next; - } -#else - QString s; - size_t size = mbstowcs(NULL, mb, length); - if (size == size_t(-1)) { - Q_ASSERT("Error in CE TextCodec"); - return QString(); - } - wchar_t* ws = new wchar_t[size + 2]; - ws[size +1] = 0; - ws[size] = 0; - size = mbstowcs(ws, mb, length); - for (size_t i = 0; i < size; i++) - s.append(QChar(ws[i])); - delete [] ws; -#endif - delete [] mbcs; - return s; + return QLocal8Bit::convertToUnicode(chars, length, state); } -QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *ch, int uclen, ConverterState *) const +QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *ch, int uclen, ConverterState *state) const { - if (!ch) - return QByteArray(); - if (uclen == 0) - return QByteArray(""); - BOOL used_def; - QByteArray mb(4096, 0); - int len; - while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen, - mb.data(), mb.size()-1, 0, &used_def))) - { - int r = GetLastError(); - if (r == ERROR_INSUFFICIENT_BUFFER) { - mb.resize(1+WideCharToMultiByte(CP_ACP, 0, - (const wchar_t*)ch, uclen, - 0, 0, 0, &used_def)); - // and try again... - } else { - // Fail. Probably can't happen in fact (dwFlags is 0). -#ifndef QT_NO_DEBUG - // Can't use qWarning(), as it'll recurse to handle %ls - fprintf(stderr, - "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n", - r, reinterpret_cast<const wchar_t*>(QString(ch, uclen).utf16())); -#endif - break; - } - } - mb.resize(len); - return mb; + return QLocal8Bit::convertFromUnicode(ch, uclen, state); } - QByteArray QWindowsLocalCodec::name() const { return "System"; diff --git a/src/corelib/codecs/qwindowscodec_p.h b/src/corelib/codecs/qwindowscodec_p.h index 5bcab0ce66..8c34dac1c7 100644 --- a/src/corelib/codecs/qwindowscodec_p.h +++ b/src/corelib/codecs/qwindowscodec_p.h @@ -65,7 +65,6 @@ public: QString convertToUnicode(const char *, int, ConverterState *) const override; QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; - QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const; QByteArray name() const override; int mibEnum() const override; diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index 36567f5106..92cb327577 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -45,6 +45,10 @@ #include "private/qsimd_p.h" #include "private/qstringiterator_p.h" +#ifdef Q_OS_WIN +#include <qt_windows.h> +#endif + QT_BEGIN_NAMESPACE enum { Endian = 0, Data = 1 }; @@ -987,6 +991,190 @@ QString qFromUtfEncoded(const QByteArray &ba) return QUtf8::convertToUnicode(ba.constData(), ba.length()); } +#if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED) +static QString convertToUnicodeCharByChar(const char *chars, qsizetype length, QStringConverter::State *state) +{ + if (!chars || !length) + return QString(); + + int copyLocation = 0; + int extra = 2; + if (state && state->remainingChars) { + copyLocation = state->remainingChars; + extra += copyLocation; + } + int newLength = length + extra; + char *mbcs = new char[newLength]; + //ensure that we have a NULL terminated string + mbcs[newLength-1] = 0; + mbcs[newLength-2] = 0; + memcpy(&(mbcs[copyLocation]), chars, length); + if (copyLocation) { + //copy the last character from the state + mbcs[0] = (char)state->state_data[0]; + state->remainingChars = 0; + } + const char *mb = mbcs; +#if !defined(Q_OS_WINRT) + const char *next = 0; + QString s; + while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) { + wchar_t wc[2] ={0}; + int charlength = next - mb; + int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2); + if (len>0) { + s.append(QChar(wc[0])); + } else { + int r = GetLastError(); + //check if the character being dropped is the last character + if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) { + state->remainingChars = 1; + state->state_data[0] = (char)*mb; + } + } + mb = next; + } +#else + QString s; + size_t size = mbstowcs(NULL, mb, length); + if (size == size_t(-1)) { + Q_ASSERT("Error in CE TextCodec"); + return QString(); + } + wchar_t* ws = new wchar_t[size + 2]; + ws[size +1] = 0; + ws[size] = 0; + size = mbstowcs(ws, mb, length); + for (size_t i = 0; i < size; i++) + s.append(QChar(ws[i])); + delete [] ws; +#endif + delete [] mbcs; + return s; +} + + +QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStringConverter::State *state) +{ + Q_ASSERT(length < INT_MAX); // ### FIXME + const char *mb = chars; + int mblen = length; + + if (!mb || !mblen) + return QString(); + + QVarLengthArray<wchar_t, 4096> wc(4096); + int len; + QString sp; + bool prepend = false; + char state_data = 0; + int remainingChars = 0; + + //save the current state information + if (state) { + state_data = (char)state->state_data[0]; + remainingChars = state->remainingChars; + } + + //convert the pending character (if available) + if (state && remainingChars) { + char prev[3] = {0}; + prev[0] = state_data; + prev[1] = mb[0]; + remainingChars = 0; + len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, + prev, 2, wc.data(), wc.length()); + if (len) { + sp.append(QChar(wc[0])); + if (mblen == 1) { + state->remainingChars = 0; + return sp; + } + prepend = true; + mb++; + mblen--; + wc[0] = 0; + } + } + + while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, + mb, mblen, wc.data(), wc.length()))) { + int r = GetLastError(); + if (r == ERROR_INSUFFICIENT_BUFFER) { + const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, + mb, mblen, 0, 0); + wc.resize(wclen); + } else if (r == ERROR_NO_UNICODE_TRANSLATION) { + //find the last non NULL character + while (mblen > 1 && !(mb[mblen-1])) + mblen--; + //check whether, we hit an invalid character in the middle + if ((mblen <= 1) || (remainingChars && state_data)) + return convertToUnicodeCharByChar(chars, length, state); + //Remove the last character and try again... + state_data = mb[mblen-1]; + remainingChars = 1; + mblen--; + } else { + // Fail. + qWarning("MultiByteToWideChar: Cannot convert multibyte text"); + break; + } + } + + if (len <= 0) + return QString(); + + if (wc[len-1] == 0) // len - 1: we don't want terminator + --len; + + //save the new state information + if (state) { + state->state_data[0] = (char)state_data; + state->remainingChars = remainingChars; + } + QString s((QChar*)wc.data(), len); + if (prepend) { + return sp+s; + } + return s; +} + +QByteArray QLocal8Bit::convertFromUnicode(const QChar *ch, qsizetype uclen, QStringConverter::State *state) +{ + Q_ASSERT(uclen < INT_MAX); // ### FIXME + if (!ch) + return QByteArray(); + if (uclen == 0) + return QByteArray(""); + BOOL used_def; + QByteArray mb(4096, 0); + int len; + while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen, + mb.data(), mb.size()-1, 0, &used_def))) + { + int r = GetLastError(); + if (r == ERROR_INSUFFICIENT_BUFFER) { + mb.resize(1+WideCharToMultiByte(CP_ACP, 0, + (const wchar_t*)ch, uclen, + 0, 0, 0, &used_def)); + // and try again... + } else { + // Fail. Probably can't happen in fact (dwFlags is 0). +#ifndef QT_NO_DEBUG + // Can't use qWarning(), as it'll recurse to handle %ls + fprintf(stderr, + "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n", + r, reinterpret_cast<const wchar_t*>(QString(ch, uclen).utf16())); +#endif + break; + } + } + mb.resize(len); + return mb; +} +#endif + /*! \enum QStringConverter::Flag @@ -1108,6 +1296,21 @@ static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state return out + s.length(); } +static QChar *fromLocal8Bit(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +{ + QString s = QLocal8Bit::convertToUnicode(in, length, state); + memcpy(out, s.constData(), s.length()*sizeof(QChar)); + return out + s.length(); +} + +static char *toLocal8Bit(char *out, QStringView in, QStringConverter::State *state) +{ + QByteArray s = QLocal8Bit::convertFromUnicode(in.data(), in.length(), state); + memcpy(out, s.constData(), s.length()); + return out + s.length(); +} + + static qsizetype fromUtf8Len(qsizetype l) { return l + 1; } static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); } @@ -1125,7 +1328,8 @@ const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringCo { fromUtf16BE, fromUtf16Len, toUtf16BE, toUtf16Len }, { fromUtf32, fromUtf32Len, toUtf32, toUtf32Len }, { fromUtf32LE, fromUtf32Len, toUtf32LE, toUtf32Len }, - { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len } + { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len }, + { fromLocal8Bit, fromUtf8Len, toLocal8Bit, toUtf8Len } }; QT_END_NAMESPACE diff --git a/src/corelib/text/qstringconverter.h b/src/corelib/text/qstringconverter.h index e91975b70f..d3c0e9a502 100644 --- a/src/corelib/text/qstringconverter.h +++ b/src/corelib/text/qstringconverter.h @@ -96,7 +96,8 @@ public: Utf32, Utf32LE, Utf32BE, - LastEncoding = Utf32BE + Locale, + LastEncoding = Locale }; protected: diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h index 5764979542..763e3761d5 100644 --- a/src/corelib/text/qstringconverter_p.h +++ b/src/corelib/text/qstringconverter_p.h @@ -312,6 +312,19 @@ struct QUtf32 static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); }; +struct QLocal8Bit +{ +#if !defined(Q_OS_WIN) || defined(QT_BOOTSTRAPPED) + static QString convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state) + { return QUtf8::convertToUnicode(chars, len, state); } + static QByteArray convertFromUnicode(const QChar *chars, qsizetype len, QStringConverter::State *state) + { return QUtf8::convertFromUnicode(chars, len, state); } +#else + static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *); + static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *); +#endif +}; + /* Converts from different utf encodings looking at a possible byte order mark at the beginning of the string. If no BOM exists, utf-8 is assumed. |