summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qstringconverter.cpp
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-04-17 14:29:46 +0200
committerLars Knoll <lars.knoll@qt.io>2020-05-14 07:46:45 +0200
commit94e210faeaf7ec6b8a41d7f707405d99be25e3f0 (patch)
tree8a461181c6e56a27b516e6c888e5b8ebf8b14228 /src/corelib/text/qstringconverter.cpp
parentea0a08c898fed9cfd8d8eb16613e352740d3eb02 (diff)
Move local8bit conversion over to qutfsupport
Local8Bit is always UTF-8 except for Windows platforms. Also add a Locale encoding to QStringConverter. Change-Id: I8d729931fd4c1d7fc6857696b6442a44def3fd9d Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/text/qstringconverter.cpp')
-rw-r--r--src/corelib/text/qstringconverter.cpp206
1 files changed, 205 insertions, 1 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 36567f5106..92cb327577 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -45,6 +45,10 @@
#include "private/qsimd_p.h"
#include "private/qstringiterator_p.h"
+#ifdef Q_OS_WIN
+#include <qt_windows.h>
+#endif
+
QT_BEGIN_NAMESPACE
enum { Endian = 0, Data = 1 };
@@ -987,6 +991,190 @@ QString qFromUtfEncoded(const QByteArray &ba)
return QUtf8::convertToUnicode(ba.constData(), ba.length());
}
+#if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED)
+static QString convertToUnicodeCharByChar(const char *chars, qsizetype length, QStringConverter::State *state)
+{
+ if (!chars || !length)
+ return QString();
+
+ int copyLocation = 0;
+ int extra = 2;
+ if (state && state->remainingChars) {
+ copyLocation = state->remainingChars;
+ extra += copyLocation;
+ }
+ int newLength = length + extra;
+ char *mbcs = new char[newLength];
+ //ensure that we have a NULL terminated string
+ mbcs[newLength-1] = 0;
+ mbcs[newLength-2] = 0;
+ memcpy(&(mbcs[copyLocation]), chars, length);
+ if (copyLocation) {
+ //copy the last character from the state
+ mbcs[0] = (char)state->state_data[0];
+ state->remainingChars = 0;
+ }
+ const char *mb = mbcs;
+#if !defined(Q_OS_WINRT)
+ const char *next = 0;
+ QString s;
+ while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
+ wchar_t wc[2] ={0};
+ int charlength = next - mb;
+ int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
+ if (len>0) {
+ s.append(QChar(wc[0]));
+ } else {
+ int r = GetLastError();
+ //check if the character being dropped is the last character
+ if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
+ state->remainingChars = 1;
+ state->state_data[0] = (char)*mb;
+ }
+ }
+ mb = next;
+ }
+#else
+ QString s;
+ size_t size = mbstowcs(NULL, mb, length);
+ if (size == size_t(-1)) {
+ Q_ASSERT("Error in CE TextCodec");
+ return QString();
+ }
+ wchar_t* ws = new wchar_t[size + 2];
+ ws[size +1] = 0;
+ ws[size] = 0;
+ size = mbstowcs(ws, mb, length);
+ for (size_t i = 0; i < size; i++)
+ s.append(QChar(ws[i]));
+ delete [] ws;
+#endif
+ delete [] mbcs;
+ return s;
+}
+
+
+QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStringConverter::State *state)
+{
+ Q_ASSERT(length < INT_MAX); // ### FIXME
+ const char *mb = chars;
+ int mblen = length;
+
+ if (!mb || !mblen)
+ return QString();
+
+ QVarLengthArray<wchar_t, 4096> wc(4096);
+ int len;
+ QString sp;
+ bool prepend = false;
+ char state_data = 0;
+ int remainingChars = 0;
+
+ //save the current state information
+ if (state) {
+ state_data = (char)state->state_data[0];
+ remainingChars = state->remainingChars;
+ }
+
+ //convert the pending character (if available)
+ if (state && remainingChars) {
+ char prev[3] = {0};
+ prev[0] = state_data;
+ prev[1] = mb[0];
+ remainingChars = 0;
+ len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
+ prev, 2, wc.data(), wc.length());
+ if (len) {
+ sp.append(QChar(wc[0]));
+ if (mblen == 1) {
+ state->remainingChars = 0;
+ return sp;
+ }
+ prepend = true;
+ mb++;
+ mblen--;
+ wc[0] = 0;
+ }
+ }
+
+ while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
+ mb, mblen, wc.data(), wc.length()))) {
+ int r = GetLastError();
+ if (r == ERROR_INSUFFICIENT_BUFFER) {
+ const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
+ mb, mblen, 0, 0);
+ wc.resize(wclen);
+ } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
+ //find the last non NULL character
+ while (mblen > 1 && !(mb[mblen-1]))
+ mblen--;
+ //check whether, we hit an invalid character in the middle
+ if ((mblen <= 1) || (remainingChars && state_data))
+ return convertToUnicodeCharByChar(chars, length, state);
+ //Remove the last character and try again...
+ state_data = mb[mblen-1];
+ remainingChars = 1;
+ mblen--;
+ } else {
+ // Fail.
+ qWarning("MultiByteToWideChar: Cannot convert multibyte text");
+ break;
+ }
+ }
+
+ if (len <= 0)
+ return QString();
+
+ if (wc[len-1] == 0) // len - 1: we don't want terminator
+ --len;
+
+ //save the new state information
+ if (state) {
+ state->state_data[0] = (char)state_data;
+ state->remainingChars = remainingChars;
+ }
+ QString s((QChar*)wc.data(), len);
+ if (prepend) {
+ return sp+s;
+ }
+ return s;
+}
+
+QByteArray QLocal8Bit::convertFromUnicode(const QChar *ch, qsizetype uclen, QStringConverter::State *state)
+{
+ Q_ASSERT(uclen < INT_MAX); // ### FIXME
+ if (!ch)
+ return QByteArray();
+ if (uclen == 0)
+ return QByteArray("");
+ BOOL used_def;
+ QByteArray mb(4096, 0);
+ int len;
+ while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
+ mb.data(), mb.size()-1, 0, &used_def)))
+ {
+ int r = GetLastError();
+ if (r == ERROR_INSUFFICIENT_BUFFER) {
+ mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
+ (const wchar_t*)ch, uclen,
+ 0, 0, 0, &used_def));
+ // and try again...
+ } else {
+ // Fail. Probably can't happen in fact (dwFlags is 0).
+#ifndef QT_NO_DEBUG
+ // Can't use qWarning(), as it'll recurse to handle %ls
+ fprintf(stderr,
+ "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n",
+ r, reinterpret_cast<const wchar_t*>(QString(ch, uclen).utf16()));
+#endif
+ break;
+ }
+ }
+ mb.resize(len);
+ return mb;
+}
+#endif
+
/*!
\enum QStringConverter::Flag
@@ -1108,6 +1296,21 @@ static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state
return out + s.length();
}
+static QChar *fromLocal8Bit(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
+{
+ QString s = QLocal8Bit::convertToUnicode(in, length, state);
+ memcpy(out, s.constData(), s.length()*sizeof(QChar));
+ return out + s.length();
+}
+
+static char *toLocal8Bit(char *out, QStringView in, QStringConverter::State *state)
+{
+ QByteArray s = QLocal8Bit::convertFromUnicode(in.data(), in.length(), state);
+ memcpy(out, s.constData(), s.length());
+ return out + s.length();
+}
+
+
static qsizetype fromUtf8Len(qsizetype l) { return l + 1; }
static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); }
@@ -1125,7 +1328,8 @@ const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringCo
{ fromUtf16BE, fromUtf16Len, toUtf16BE, toUtf16Len },
{ fromUtf32, fromUtf32Len, toUtf32, toUtf32Len },
{ fromUtf32LE, fromUtf32Len, toUtf32LE, toUtf32Len },
- { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len }
+ { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len },
+ { fromLocal8Bit, fromUtf8Len, toLocal8Bit, toUtf8Len }
};
QT_END_NAMESPACE