summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-04-17 14:29:46 +0200
committerLars Knoll <lars.knoll@qt.io>2020-05-14 07:46:45 +0200
commit94e210faeaf7ec6b8a41d7f707405d99be25e3f0 (patch)
tree8a461181c6e56a27b516e6c888e5b8ebf8b14228
parentea0a08c898fed9cfd8d8eb16613e352740d3eb02 (diff)
Move local8bit conversion over to qutfsupport
Local8Bit is always UTF-8 except for Windows platforms. Also add a Locale encoding to QStringConverter. Change-Id: I8d729931fd4c1d7fc6857696b6442a44def3fd9d Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/codecs/qwindowscodec.cpp181
-rw-r--r--src/corelib/codecs/qwindowscodec_p.h1
-rw-r--r--src/corelib/text/qstringconverter.cpp206
-rw-r--r--src/corelib/text/qstringconverter.h3
-rw-r--r--src/corelib/text/qstringconverter_p.h13
5 files changed, 224 insertions, 180 deletions
diff --git a/src/corelib/codecs/qwindowscodec.cpp b/src/corelib/codecs/qwindowscodec.cpp
index 710935a65a..d8a0088d6a 100644
--- a/src/corelib/codecs/qwindowscodec.cpp
+++ b/src/corelib/codecs/qwindowscodec.cpp
@@ -38,10 +38,7 @@
****************************************************************************/
#include "qwindowscodec_p.h"
-#include <qvarlengtharray.h>
-#include <qstring.h>
-#include <qbytearray.h>
-#include <qt_windows.h>
+#include "private/qstringconverter_p.h"
QT_BEGIN_NAMESPACE
@@ -55,184 +52,14 @@ QWindowsLocalCodec::~QWindowsLocalCodec()
QString QWindowsLocalCodec::convertToUnicode(const char *chars, int length, ConverterState *state) const
{
- const char *mb = chars;
- int mblen = length;
-
- if (!mb || !mblen)
- return QString();
-
- QVarLengthArray<wchar_t, 4096> wc(4096);
- int len;
- QString sp;
- bool prepend = false;
- char state_data = 0;
- int remainingChars = 0;
-
- //save the current state information
- if (state) {
- state_data = (char)state->state_data[0];
- remainingChars = state->remainingChars;
- }
-
- //convert the pending charcter (if available)
- if (state && remainingChars) {
- char prev[3] = {0};
- prev[0] = state_data;
- prev[1] = mb[0];
- remainingChars = 0;
- len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
- prev, 2, wc.data(), wc.length());
- if (len) {
- sp.append(QChar(wc[0]));
- if (mblen == 1) {
- state->remainingChars = 0;
- return sp;
- }
- prepend = true;
- mb++;
- mblen--;
- wc[0] = 0;
- }
- }
-
- while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
- mb, mblen, wc.data(), wc.length()))) {
- int r = GetLastError();
- if (r == ERROR_INSUFFICIENT_BUFFER) {
- const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
- mb, mblen, 0, 0);
- wc.resize(wclen);
- } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
- //find the last non NULL character
- while (mblen > 1 && !(mb[mblen-1]))
- mblen--;
- //check whether, we hit an invalid character in the middle
- if ((mblen <= 1) || (remainingChars && state_data))
- return convertToUnicodeCharByChar(chars, length, state);
- //Remove the last character and try again...
- state_data = mb[mblen-1];
- remainingChars = 1;
- mblen--;
- } else {
- // Fail.
- qWarning("MultiByteToWideChar: Cannot convert multibyte text");
- break;
- }
- }
-
- if (len <= 0)
- return QString();
-
- if (wc[len-1] == 0) // len - 1: we don't want terminator
- --len;
-
- //save the new state information
- if (state) {
- state->state_data[0] = (char)state_data;
- state->remainingChars = remainingChars;
- }
- QString s((QChar*)wc.data(), len);
- if (prepend) {
- return sp+s;
- }
- return s;
-}
-
-QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const
-{
- if (!chars || !length)
- return QString();
-
- int copyLocation = 0;
- int extra = 2;
- if (state && state->remainingChars) {
- copyLocation = state->remainingChars;
- extra += copyLocation;
- }
- int newLength = length + extra;
- char *mbcs = new char[newLength];
- //ensure that we have a NULL terminated string
- mbcs[newLength-1] = 0;
- mbcs[newLength-2] = 0;
- memcpy(&(mbcs[copyLocation]), chars, length);
- if (copyLocation) {
- //copy the last character from the state
- mbcs[0] = (char)state->state_data[0];
- state->remainingChars = 0;
- }
- const char *mb = mbcs;
-#if !defined(Q_OS_WINRT)
- const char *next = 0;
- QString s;
- while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
- wchar_t wc[2] ={0};
- int charlength = next - mb;
- int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
- if (len>0) {
- s.append(QChar(wc[0]));
- } else {
- int r = GetLastError();
- //check if the character being dropped is the last character
- if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
- state->remainingChars = 1;
- state->state_data[0] = (char)*mb;
- }
- }
- mb = next;
- }
-#else
- QString s;
- size_t size = mbstowcs(NULL, mb, length);
- if (size == size_t(-1)) {
- Q_ASSERT("Error in CE TextCodec");
- return QString();
- }
- wchar_t* ws = new wchar_t[size + 2];
- ws[size +1] = 0;
- ws[size] = 0;
- size = mbstowcs(ws, mb, length);
- for (size_t i = 0; i < size; i++)
- s.append(QChar(ws[i]));
- delete [] ws;
-#endif
- delete [] mbcs;
- return s;
+ return QLocal8Bit::convertToUnicode(chars, length, state);
}
-QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *ch, int uclen, ConverterState *) const
+QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *ch, int uclen, ConverterState *state) const
{
- if (!ch)
- return QByteArray();
- if (uclen == 0)
- return QByteArray("");
- BOOL used_def;
- QByteArray mb(4096, 0);
- int len;
- while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
- mb.data(), mb.size()-1, 0, &used_def)))
- {
- int r = GetLastError();
- if (r == ERROR_INSUFFICIENT_BUFFER) {
- mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
- (const wchar_t*)ch, uclen,
- 0, 0, 0, &used_def));
- // and try again...
- } else {
- // Fail. Probably can't happen in fact (dwFlags is 0).
-#ifndef QT_NO_DEBUG
- // Can't use qWarning(), as it'll recurse to handle %ls
- fprintf(stderr,
- "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n",
- r, reinterpret_cast<const wchar_t*>(QString(ch, uclen).utf16()));
-#endif
- break;
- }
- }
- mb.resize(len);
- return mb;
+ return QLocal8Bit::convertFromUnicode(ch, uclen, state);
}
-
QByteArray QWindowsLocalCodec::name() const
{
return "System";
diff --git a/src/corelib/codecs/qwindowscodec_p.h b/src/corelib/codecs/qwindowscodec_p.h
index 5bcab0ce66..8c34dac1c7 100644
--- a/src/corelib/codecs/qwindowscodec_p.h
+++ b/src/corelib/codecs/qwindowscodec_p.h
@@ -65,7 +65,6 @@ public:
QString convertToUnicode(const char *, int, ConverterState *) const override;
QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
- QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const;
QByteArray name() const override;
int mibEnum() const override;
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 36567f5106..92cb327577 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -45,6 +45,10 @@
#include "private/qsimd_p.h"
#include "private/qstringiterator_p.h"
+#ifdef Q_OS_WIN
+#include <qt_windows.h>
+#endif
+
QT_BEGIN_NAMESPACE
enum { Endian = 0, Data = 1 };
@@ -987,6 +991,190 @@ QString qFromUtfEncoded(const QByteArray &ba)
return QUtf8::convertToUnicode(ba.constData(), ba.length());
}
+#if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED)
+static QString convertToUnicodeCharByChar(const char *chars, qsizetype length, QStringConverter::State *state)
+{
+ if (!chars || !length)
+ return QString();
+
+ int copyLocation = 0;
+ int extra = 2;
+ if (state && state->remainingChars) {
+ copyLocation = state->remainingChars;
+ extra += copyLocation;
+ }
+ int newLength = length + extra;
+ char *mbcs = new char[newLength];
+ //ensure that we have a NULL terminated string
+ mbcs[newLength-1] = 0;
+ mbcs[newLength-2] = 0;
+ memcpy(&(mbcs[copyLocation]), chars, length);
+ if (copyLocation) {
+ //copy the last character from the state
+ mbcs[0] = (char)state->state_data[0];
+ state->remainingChars = 0;
+ }
+ const char *mb = mbcs;
+#if !defined(Q_OS_WINRT)
+ const char *next = 0;
+ QString s;
+ while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
+ wchar_t wc[2] ={0};
+ int charlength = next - mb;
+ int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
+ if (len>0) {
+ s.append(QChar(wc[0]));
+ } else {
+ int r = GetLastError();
+ //check if the character being dropped is the last character
+ if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
+ state->remainingChars = 1;
+ state->state_data[0] = (char)*mb;
+ }
+ }
+ mb = next;
+ }
+#else
+ QString s;
+ size_t size = mbstowcs(NULL, mb, length);
+ if (size == size_t(-1)) {
+ Q_ASSERT("Error in CE TextCodec");
+ return QString();
+ }
+ wchar_t* ws = new wchar_t[size + 2];
+ ws[size +1] = 0;
+ ws[size] = 0;
+ size = mbstowcs(ws, mb, length);
+ for (size_t i = 0; i < size; i++)
+ s.append(QChar(ws[i]));
+ delete [] ws;
+#endif
+ delete [] mbcs;
+ return s;
+}
+
+
+QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStringConverter::State *state)
+{
+ Q_ASSERT(length < INT_MAX); // ### FIXME
+ const char *mb = chars;
+ int mblen = length;
+
+ if (!mb || !mblen)
+ return QString();
+
+ QVarLengthArray<wchar_t, 4096> wc(4096);
+ int len;
+ QString sp;
+ bool prepend = false;
+ char state_data = 0;
+ int remainingChars = 0;
+
+ //save the current state information
+ if (state) {
+ state_data = (char)state->state_data[0];
+ remainingChars = state->remainingChars;
+ }
+
+ //convert the pending character (if available)
+ if (state && remainingChars) {
+ char prev[3] = {0};
+ prev[0] = state_data;
+ prev[1] = mb[0];
+ remainingChars = 0;
+ len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
+ prev, 2, wc.data(), wc.length());
+ if (len) {
+ sp.append(QChar(wc[0]));
+ if (mblen == 1) {
+ state->remainingChars = 0;
+ return sp;
+ }
+ prepend = true;
+ mb++;
+ mblen--;
+ wc[0] = 0;
+ }
+ }
+
+ while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
+ mb, mblen, wc.data(), wc.length()))) {
+ int r = GetLastError();
+ if (r == ERROR_INSUFFICIENT_BUFFER) {
+ const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
+ mb, mblen, 0, 0);
+ wc.resize(wclen);
+ } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
+ //find the last non NULL character
+ while (mblen > 1 && !(mb[mblen-1]))
+ mblen--;
+ //check whether, we hit an invalid character in the middle
+ if ((mblen <= 1) || (remainingChars && state_data))
+ return convertToUnicodeCharByChar(chars, length, state);
+ //Remove the last character and try again...
+ state_data = mb[mblen-1];
+ remainingChars = 1;
+ mblen--;
+ } else {
+ // Fail.
+ qWarning("MultiByteToWideChar: Cannot convert multibyte text");
+ break;
+ }
+ }
+
+ if (len <= 0)
+ return QString();
+
+ if (wc[len-1] == 0) // len - 1: we don't want terminator
+ --len;
+
+ //save the new state information
+ if (state) {
+ state->state_data[0] = (char)state_data;
+ state->remainingChars = remainingChars;
+ }
+ QString s((QChar*)wc.data(), len);
+ if (prepend) {
+ return sp+s;
+ }
+ return s;
+}
+
+QByteArray QLocal8Bit::convertFromUnicode(const QChar *ch, qsizetype uclen, QStringConverter::State *state)
+{
+ Q_ASSERT(uclen < INT_MAX); // ### FIXME
+ if (!ch)
+ return QByteArray();
+ if (uclen == 0)
+ return QByteArray("");
+ BOOL used_def;
+ QByteArray mb(4096, 0);
+ int len;
+ while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
+ mb.data(), mb.size()-1, 0, &used_def)))
+ {
+ int r = GetLastError();
+ if (r == ERROR_INSUFFICIENT_BUFFER) {
+ mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
+ (const wchar_t*)ch, uclen,
+ 0, 0, 0, &used_def));
+ // and try again...
+ } else {
+ // Fail. Probably can't happen in fact (dwFlags is 0).
+#ifndef QT_NO_DEBUG
+ // Can't use qWarning(), as it'll recurse to handle %ls
+ fprintf(stderr,
+ "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n",
+ r, reinterpret_cast<const wchar_t*>(QString(ch, uclen).utf16()));
+#endif
+ break;
+ }
+ }
+ mb.resize(len);
+ return mb;
+}
+#endif
+
/*!
\enum QStringConverter::Flag
@@ -1108,6 +1296,21 @@ static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state
return out + s.length();
}
+static QChar *fromLocal8Bit(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
+{
+ QString s = QLocal8Bit::convertToUnicode(in, length, state);
+ memcpy(out, s.constData(), s.length()*sizeof(QChar));
+ return out + s.length();
+}
+
+static char *toLocal8Bit(char *out, QStringView in, QStringConverter::State *state)
+{
+ QByteArray s = QLocal8Bit::convertFromUnicode(in.data(), in.length(), state);
+ memcpy(out, s.constData(), s.length());
+ return out + s.length();
+}
+
+
static qsizetype fromUtf8Len(qsizetype l) { return l + 1; }
static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); }
@@ -1125,7 +1328,8 @@ const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringCo
{ fromUtf16BE, fromUtf16Len, toUtf16BE, toUtf16Len },
{ fromUtf32, fromUtf32Len, toUtf32, toUtf32Len },
{ fromUtf32LE, fromUtf32Len, toUtf32LE, toUtf32Len },
- { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len }
+ { fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len },
+ { fromLocal8Bit, fromUtf8Len, toLocal8Bit, toUtf8Len }
};
QT_END_NAMESPACE
diff --git a/src/corelib/text/qstringconverter.h b/src/corelib/text/qstringconverter.h
index e91975b70f..d3c0e9a502 100644
--- a/src/corelib/text/qstringconverter.h
+++ b/src/corelib/text/qstringconverter.h
@@ -96,7 +96,8 @@ public:
Utf32,
Utf32LE,
Utf32BE,
- LastEncoding = Utf32BE
+ Locale,
+ LastEncoding = Locale
};
protected:
diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h
index 5764979542..763e3761d5 100644
--- a/src/corelib/text/qstringconverter_p.h
+++ b/src/corelib/text/qstringconverter_p.h
@@ -312,6 +312,19 @@ struct QUtf32
static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
};
+struct QLocal8Bit
+{
+#if !defined(Q_OS_WIN) || defined(QT_BOOTSTRAPPED)
+ static QString convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state)
+ { return QUtf8::convertToUnicode(chars, len, state); }
+ static QByteArray convertFromUnicode(const QChar *chars, qsizetype len, QStringConverter::State *state)
+ { return QUtf8::convertFromUnicode(chars, len, state); }
+#else
+ static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *);
+ static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *);
+#endif
+};
+
/*
Converts from different utf encodings looking at a possible byte order mark at the
beginning of the string. If no BOM exists, utf-8 is assumed.