diff options
Diffstat (limited to 'src/corelib/tools')
-rw-r--r-- | src/corelib/tools/qchar.cpp | 65 | ||||
-rw-r--r-- | src/corelib/tools/qunicodetables.cpp | 181 |
2 files changed, 176 insertions, 70 deletions
diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp index 89017fee58..358653a412 100644 --- a/src/corelib/tools/qchar.cpp +++ b/src/corelib/tools/qchar.cpp @@ -1446,7 +1446,17 @@ inline bool operator<(ushort u1, const UCS2Pair &ligature) inline bool operator<(const UCS2Pair &ligature, ushort u1) { return ligature.u1 < u1; } -static ushort ligatureHelper(ushort u1, ushort u2) +struct UCS2SurrogatePair { + UCS2Pair p1; + UCS2Pair p2; +}; + +inline bool operator<(uint u1, const UCS2SurrogatePair &ligature) +{ return u1 < QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2); } +inline bool operator<(const UCS2SurrogatePair &ligature, uint u1) +{ return QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2) < u1; } + +static uint inline ligatureHelper(uint u1, uint u2) { if (u1 >= Hangul_LBase && u1 <= Hangul_SBase + Hangul_SCount) { // compute Hangul syllable composition as per UAX #15 @@ -1471,9 +1481,14 @@ static ushort ligatureHelper(ushort u1, ushort u2) return 0; const unsigned short *ligatures = uc_ligature_map+index; ushort length = *ligatures++; - { + if (QChar::requiresSurrogates(u1)) { + const UCS2SurrogatePair *data = reinterpret_cast<const UCS2SurrogatePair *>(ligatures); + const UCS2SurrogatePair *r = qBinaryFind(data, data + length, u1); + if (r != data + length) + return QChar::surrogateToUcs4(r->p2.u1, r->p2.u2); + } else { const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures); - const UCS2Pair *r = qBinaryFind(data, data + length, u1); + const UCS2Pair *r = qBinaryFind(data, data + length, ushort(u1)); if (r != data + length) return r->u2; } @@ -1485,14 +1500,17 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, int from) { QString &s = *str; - if (s.length() - from < 2) + if (from < 0 || s.length() - from < 2) return; - // the loop can partly ignore high Unicode as all ligatures are in the BMP - int starter = 0; + int starter = 0; // starter position + uint stcode = 0; // starter code point + int next = -1; int lastCombining = 0; + int pos = from; while (pos < s.length()) { + int i = pos; uint uc = s.at(pos).unicode(); if (QChar(uc).isHighSurrogate() && pos < s.length()-1) { ushort low = s.at(pos+1).unicode(); @@ -1501,26 +1519,43 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, int from) ++pos; } } + const QUnicodeTables::Properties *p = qGetProp(uc); if (p->unicodeVersion > version || p->unicodeVersion == QChar::Unicode_Unassigned) { - starter = -1; // to prevent starter == pos - 1 - lastCombining = 0; + starter = -1; + next = -1; // to prevent i == next + lastCombining = 255; // to prevent combining > lastCombining ++pos; continue; } + int combining = p->combiningClass; - if (starter == pos - 1 || combining > lastCombining) { + if (i == next || combining > lastCombining) { + Q_ASSERT(starter >= from); // allowed to form ligature with S - QChar ligature = ligatureHelper(s.at(starter).unicode(), uc); - if (ligature.unicode()) { - s[starter] = ligature; - s.remove(pos, 1); + uint ligature = ligatureHelper(stcode, uc); + if (ligature) { + stcode = ligature; + QChar *d = s.data(); + // ligatureHelper() never changes planes + if (QChar::requiresSurrogates(ligature)) { + d[starter] = QChar::highSurrogate(ligature); + d[starter + 1] = QChar::lowSurrogate(ligature); + s.remove(i, 2); + } else { + d[starter] = ligature; + s.remove(i, 1); + } continue; } } - if (!combining) - starter = pos; + if (combining == 0) { + starter = i; + stcode = uc; + next = pos + 1; + } lastCombining = combining; + ++pos; } } diff --git a/src/corelib/tools/qunicodetables.cpp b/src/corelib/tools/qunicodetables.cpp index ea61d2090d..04031251e4 100644 --- a/src/corelib/tools/qunicodetables.cpp +++ b/src/corelib/tools/qunicodetables.cpp @@ -7616,58 +7616,92 @@ static const unsigned short uc_decomposition_map[] = { static const unsigned short uc_ligature_trie[] = { // 0 - 0x3100 - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 424, 456, 488, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 520, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 552, 392, 392, 392, 584, 616, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 648, 680, 392, 392, 712, 744, 392, - 392, 392, 776, 392, 392, 392, 808, 392, - 392, 840, 872, 392, 392, 392, 904, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - - 392, 936, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 968, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - - 392, 392, 392, 392, 1000, 392, 392, 392, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 663, 695, 727, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 759, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 791, 631, 631, 631, 823, 855, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 887, 919, 631, 631, 951, 983, 631, + 631, 631, 1015, 631, 631, 631, 1047, 631, + 631, 1079, 1111, 631, 631, 631, 1143, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + + 631, 1175, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 1207, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + + 631, 631, 631, 631, 1239, 631, 631, 631, + + // 0x3100 - 0x12000 + + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, @@ -7768,11 +7802,48 @@ static const unsigned short uc_ligature_trie[] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, - 0xffff, 0x700, 0x761, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff + 0xffff, 0x700, 0x761, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; -#define GET_LIGATURE_INDEX(u2) \ - (u2 < 0x3100 ? uc_ligature_trie[uc_ligature_trie[u2>>5] + (u2 & 0x1f)] : 0xffff); +#define GET_LIGATURE_INDEX(ucs4) \ + (ucs4 < 0x3100 \ + ? (uc_ligature_trie[uc_ligature_trie[ucs4>>5] + (ucs4 & 0x1f)]) \ + : (ucs4 < 0x12000\ + ? uc_ligature_trie[uc_ligature_trie[((ucs4 - 0x3100)>>8) + 0x188] + (ucs4 & 0xff)]\ + : 0xffff)) static const unsigned short uc_ligature_map[] = { 0x54, 0x41, 0xc0, 0x45, 0xc8, 0x49, 0xcc, 0x4e, |