summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qstringconverter.cpp
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-04-30 10:30:40 +0200
committerLars Knoll <lars.knoll@qt.io>2020-05-14 07:48:55 +0200
commit13af1312f7416dd23baf512dcb9e51dce3d936fc (patch)
tree64ec3db34cc4df585724ec7ca9d2c91f7915ed60 /src/corelib/text/qstringconverter.cpp
parenta639bcda1e42f48fa32885ede77f9fd320ce731c (diff)
Add QStringConverter::encodingForData()
Add method that tries to determine the encoding of the data from an initial byte order mark. Change-Id: I348c51a3d4db9b434af53359b739a7e17acfc760 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/text/qstringconverter.cpp')
-rw-r--r--src/corelib/text/qstringconverter.cpp40
1 files changed, 40 insertions, 0 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 4972bcde4f..9a3d92dbaa 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -1491,6 +1491,46 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForName(cons
return std::nullopt;
}
+std::optional<QStringConverter::Encoding> QStringConverter::encodingForData(const char *buf, qsizetype arraySize, char16_t expectedFirstCharacter)
+{
+ if (arraySize > 3) {
+ uint uc = qFromUnaligned<uint>(buf);
+ if (uc == qToBigEndian(uint(QChar::ByteOrderMark)))
+ return QStringConverter::Utf32BE;
+ if (uc == qToLittleEndian(uint(QChar::ByteOrderMark)))
+ return QStringConverter::Utf32LE;
+ if (expectedFirstCharacter) {
+ // catch also anything starting with the expected character
+ if (qToLittleEndian(uc) == expectedFirstCharacter)
+ return QStringConverter::Utf32LE;
+ else if (qToBigEndian(uc) == expectedFirstCharacter)
+ return QStringConverter::Utf32BE;
+ }
+ }
+
+ if (arraySize > 2) {
+ static const char utf8bom[] = "\xef\xbb\xbf";
+ if (memcmp(buf, utf8bom, sizeof(utf8bom) - 1) == 0)
+ return QStringConverter::Utf8;
+ }
+
+ if (arraySize > 1) {
+ ushort uc = qFromUnaligned<ushort>(buf);
+ if (uc == qToBigEndian(ushort(QChar::ByteOrderMark)))
+ return QStringConverter::Utf16BE;
+ if (uc == qToLittleEndian(ushort(QChar::ByteOrderMark)))
+ return QStringConverter::Utf16LE;
+ if (expectedFirstCharacter) {
+ // catch also anything starting with the expected character
+ if (qToLittleEndian(uc) == expectedFirstCharacter)
+ return QStringConverter::Utf16LE;
+ else if (qToBigEndian(uc) == expectedFirstCharacter)
+ return QStringConverter::Utf16BE;
+ }
+ }
+ return std::nullopt;
+}
+
const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e)
{
return encodingInterfaces[int(e)].name;