diff options
author | Lars Knoll <lars.knoll@qt.io> | 2020-04-17 14:29:46 +0200 |
---|---|---|
committer | Lars Knoll <lars.knoll@qt.io> | 2020-05-14 07:46:45 +0200 |
commit | 94e210faeaf7ec6b8a41d7f707405d99be25e3f0 (patch) | |
tree | 8a461181c6e56a27b516e6c888e5b8ebf8b14228 /src/corelib/text/qstringconverter.cpp | |
parent | ea0a08c898fed9cfd8d8eb16613e352740d3eb02 (diff) |
Move local8bit conversion over to qutfsupport
Local8Bit is always UTF-8 except for Windows platforms.
Also add a Locale encoding to QStringConverter.
Change-Id: I8d729931fd4c1d7fc6857696b6442a44def3fd9d
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/text/qstringconverter.cpp')
-rw-r--r-- | src/corelib/text/qstringconverter.cpp | 206 |
1 files changed, 205 insertions, 1 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index 36567f5106..92cb327577 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -45,6 +45,10 @@ #include "private/qsimd_p.h" #include "private/qstringiterator_p.h" +#ifdef Q_OS_WIN +#include <qt_windows.h> +#endif + QT_BEGIN_NAMESPACE enum { Endian = 0, Data = 1 }; @@ -987,6 +991,190 @@ QString qFromUtfEncoded(const QByteArray &ba) return QUtf8::convertToUnicode(ba.constData(), ba.length()); } +#if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED) +static QString convertToUnicodeCharByChar(const char *chars, qsizetype length, QStringConverter::State *state) +{ + if (!chars || !length) + return QString(); + + int copyLocation = 0; + int extra = 2; + if (state && state->remainingChars) { + copyLocation = state->remainingChars; + extra += copyLocation; + } + int newLength = length + extra; + char *mbcs = new char[newLength]; + //ensure that we have a NULL terminated string + mbcs[newLength-1] = 0; + mbcs[newLength-2] = 0; + memcpy(&(mbcs[copyLocation]), chars, length); + if (copyLocation) { + //copy the last character from the state + mbcs[0] = (char)state->state_data[0]; + state->remainingChars = 0; + } + const char *mb = mbcs; +#if !defined(Q_OS_WINRT) + const char *next = 0; + QString s; + while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) { + wchar_t wc[2] ={0}; + int charlength = next - mb; + int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2); + if (len>0) { + s.append(QChar(wc[0])); + } else { + int r = GetLastError(); + //check if the character being dropped is the last character + if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) { + state->remainingChars = 1; + state->state_data[0] = (char)*mb; + } + } + mb = next; + } +#else + QString s; + size_t size = mbstowcs(NULL, mb, length); + if (size == size_t(-1)) { + Q_ASSERT("Error in CE TextCodec"); + return QString(); + } + wchar_t* ws = new wchar_t[size + 2]; + ws[size +1] = 0; + ws[size] = 0; + size = mbstowcs(ws, mb, length); + for (size_t i = 0; i < size; i++) + s.append(QChar(ws[i])); + delete [] ws; +#endif + delete [] mbcs; + return s; +} + + +QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStringConverter::State *state) +{ + Q_ASSERT(length < INT_MAX); // ### FIXME + const char *mb = chars; + int mblen = length; + + if (!mb || !mblen) + return QString(); + + QVarLengthArray<wchar_t, 4096> wc(4096); + int len; + QString sp; + bool prepend = false; + char state_data = 0; + int remainingChars = 0; + + //save the current state information + if (state) { + state_data = (char)state->state_data[0]; + remainingChars = state->remainingChars; + } + + //convert the pending character (if available) + if (state && remainingChars) { + char prev[3] = {0}; + prev[0] = state_data; + prev[1] = mb[0]; + remainingChars = 0; + len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, + prev, 2, wc.data(), wc.length()); + if (len) { + sp.append(QChar(wc[0])); + if (mblen == 1) { + state->remainingChars = 0; + return sp; + } + prepend = true; + mb++; + mblen--; + wc[0] = 0; + } + } + + while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, + mb, mblen, wc.data(), wc.length()))) { + int r = GetLastError(); + if (r == ERROR_INSUFFICIENT_BUFFER) { + const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, + mb, mblen, 0, 0); + wc.resize(wclen); + } else if (r == ERROR_NO_UNICODE_TRANSLATION) { + //find the last non NULL character + while (mblen > 1 && !(mb[mblen-1])) + mblen--; + //check whether, we hit an invalid character in the middle + if ((mblen <= 1) || (remainingChars && state_data)) + return convertToUnicodeCharByChar(chars, length, state); + //Remove the last character and try again... + state_data = mb[mblen-1]; + remainingChars = 1; + mblen--; + } else { + // Fail. + qWarning("MultiByteToWideChar: Cannot convert multibyte text"); + break; + } + } + + if (len <= 0) + return QString(); + + if (wc[len-1] == 0) // len - 1: we don't want terminator + --len; + + //save the new state information + if (state) { + state->state_data[0] = (char)state_data; + state->remainingChars = remainingChars; + } + QString s((QChar*)wc.data(), len); + if (prepend) { + return sp+s; + } + return s; +} + +QByteArray QLocal8Bit::convertFromUnicode(const QChar *ch, qsizetype uclen, QStringConverter::State *state) +{ + Q_ASSERT(uclen < INT_MAX); // ### FIXME + if (!ch) + return QByteArray(); + if (uclen == 0) + return QByteArray(""); + BOOL used_def; + QByteArray mb(4096, 0); + int len; + while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen, + mb.data(), mb.size()-1, 0, &used_def))) + { + int r = GetLastError(); + if (r == ERROR_INSUFFICIENT_BUFFER) { + mb.resize(1+WideCharToMultiByte(CP_ACP, 0, + (const wchar_t*)ch, uclen, + 0, 0, 0, &used_def)); + // and try again... + } else { + // Fail. Probably can't happen in fact (dwFlags is 0). +#ifndef QT_NO_DEBUG + // Can't use qWarning(), as it'll recurse to handle %ls + fprintf(stderr, + "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n", + r, reinterpret_cast<const wchar_t*>(QString(ch, uclen).utf16())); +#endif + break; + } + } + mb.resize(len); + return mb; +} +#endif + /*! \enum QStringConverter::Flag @@ -1108,6 +1296,21 @@ static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state return out + s.length(); } +static QChar *fromLocal8Bit(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +{ + QString s = QLocal8Bit::convertToUnicode(in, length, state); + memcpy(out, s.constData(), s.length()*sizeof(QChar)); + return out + s.length(); +} + +static char *toLocal8Bit(char *out, QStringView in, QStringConverter::State *state) +{ + QByteArray s = QLocal8Bit::convertFromUnicode(in.data(), in.length(), state); + memcpy(out, s.constData(), s.length()); + return out + s.length(); +} + + static qsizetype fromUtf8Len(qsizetype l) { return l + 1; } static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); } @@ -1125,7 +1328,8 @@ const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringCo { fromUtf16BE, fromUtf16Len, toUtf16BE, toUtf16Len }, { fromUtf32, fromUtf32Len, toUtf32, toUtf32Len }, { fromUtf32LE, fromUtf32Len, toUtf32LE, toUtf32Len }, - { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len } + { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len }, + { fromLocal8Bit, fromUtf8Len, toLocal8Bit, toUtf8Len } }; QT_END_NAMESPACE |