diff options
author | Lars Knoll <lars.knoll@qt.io> | 2020-04-30 10:30:40 +0200 |
---|---|---|
committer | Lars Knoll <lars.knoll@qt.io> | 2020-05-14 07:48:55 +0200 |
commit | 13af1312f7416dd23baf512dcb9e51dce3d936fc (patch) | |
tree | 64ec3db34cc4df585724ec7ca9d2c91f7915ed60 /src/corelib/text/qstringconverter.cpp | |
parent | a639bcda1e42f48fa32885ede77f9fd320ce731c (diff) |
Add QStringConverter::encodingForData()
Add method that tries to determine the encoding of the data
from an initial byte order mark.
Change-Id: I348c51a3d4db9b434af53359b739a7e17acfc760
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/text/qstringconverter.cpp')
-rw-r--r-- | src/corelib/text/qstringconverter.cpp | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index 4972bcde4f..9a3d92dbaa 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -1491,6 +1491,46 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForName(cons return std::nullopt; } +std::optional<QStringConverter::Encoding> QStringConverter::encodingForData(const char *buf, qsizetype arraySize, char16_t expectedFirstCharacter) +{ + if (arraySize > 3) { + uint uc = qFromUnaligned<uint>(buf); + if (uc == qToBigEndian(uint(QChar::ByteOrderMark))) + return QStringConverter::Utf32BE; + if (uc == qToLittleEndian(uint(QChar::ByteOrderMark))) + return QStringConverter::Utf32LE; + if (expectedFirstCharacter) { + // catch also anything starting with the expected character + if (qToLittleEndian(uc) == expectedFirstCharacter) + return QStringConverter::Utf32LE; + else if (qToBigEndian(uc) == expectedFirstCharacter) + return QStringConverter::Utf32BE; + } + } + + if (arraySize > 2) { + static const char utf8bom[] = "\xef\xbb\xbf"; + if (memcmp(buf, utf8bom, sizeof(utf8bom) - 1) == 0) + return QStringConverter::Utf8; + } + + if (arraySize > 1) { + ushort uc = qFromUnaligned<ushort>(buf); + if (uc == qToBigEndian(ushort(QChar::ByteOrderMark))) + return QStringConverter::Utf16BE; + if (uc == qToLittleEndian(ushort(QChar::ByteOrderMark))) + return QStringConverter::Utf16LE; + if (expectedFirstCharacter) { + // catch also anything starting with the expected character + if (qToLittleEndian(uc) == expectedFirstCharacter) + return QStringConverter::Utf16LE; + else if (qToBigEndian(uc) == expectedFirstCharacter) + return QStringConverter::Utf16BE; + } + } + return std::nullopt; +} + const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e) { return encodingInterfaces[int(e)].name; |