summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools
diff options
context:
space:
mode:
authorKonstantin Ritt <ritt.ks@gmail.com>2012-04-26 19:29:08 +0300
committerQt by Nokia <qt-info@nokia.com>2012-05-04 15:24:52 +0200
commit46b78113b22428e6f8540193fcf0e00591dbd724 (patch)
treef7c0259844635c6a086e978d793deae16628a7bb /src/corelib/tools
parent41b1fb918b7428aaebb2e237caa2f62a1667f0b1 (diff)
add support for non-BMP ligatures
> http://www.unicode.org/versions/Unicode5.2.0/ D. Character Additions: There are three new characters in the newly-encoded Kaithi script that will require changes in implementations which make hard-coded assumptions about composition during normalization. Most new characters added to the standard with decompositions cannot be generated by the operations toNFC() or toNFKC), but these three can. Implementers should check their code carefully to ensure that it handles these three characters correctly. U+1109A KAITHI LETTER DDDHA U+1109C KAITHI LETTER RHA U+110AB KAITHI LETTER VA UCD 6.1 adds two more of them: U+1112E CHAKMA VOWEL SIGN O U+1112F CHAKMA VOWEL SIGN AU Change-Id: I781a26848078d8b83a182b0fd4e681be2a6d9a27 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
Diffstat (limited to 'src/corelib/tools')
-rw-r--r--src/corelib/tools/qchar.cpp65
-rw-r--r--src/corelib/tools/qunicodetables.cpp181
2 files changed, 176 insertions, 70 deletions
diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp
index 89017fee58..358653a412 100644
--- a/src/corelib/tools/qchar.cpp
+++ b/src/corelib/tools/qchar.cpp
@@ -1446,7 +1446,17 @@ inline bool operator<(ushort u1, const UCS2Pair &ligature)
inline bool operator<(const UCS2Pair &ligature, ushort u1)
{ return ligature.u1 < u1; }
-static ushort ligatureHelper(ushort u1, ushort u2)
+struct UCS2SurrogatePair {
+ UCS2Pair p1;
+ UCS2Pair p2;
+};
+
+inline bool operator<(uint u1, const UCS2SurrogatePair &ligature)
+{ return u1 < QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2); }
+inline bool operator<(const UCS2SurrogatePair &ligature, uint u1)
+{ return QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2) < u1; }
+
+static uint inline ligatureHelper(uint u1, uint u2)
{
if (u1 >= Hangul_LBase && u1 <= Hangul_SBase + Hangul_SCount) {
// compute Hangul syllable composition as per UAX #15
@@ -1471,9 +1481,14 @@ static ushort ligatureHelper(ushort u1, ushort u2)
return 0;
const unsigned short *ligatures = uc_ligature_map+index;
ushort length = *ligatures++;
- {
+ if (QChar::requiresSurrogates(u1)) {
+ const UCS2SurrogatePair *data = reinterpret_cast<const UCS2SurrogatePair *>(ligatures);
+ const UCS2SurrogatePair *r = qBinaryFind(data, data + length, u1);
+ if (r != data + length)
+ return QChar::surrogateToUcs4(r->p2.u1, r->p2.u2);
+ } else {
const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures);
- const UCS2Pair *r = qBinaryFind(data, data + length, u1);
+ const UCS2Pair *r = qBinaryFind(data, data + length, ushort(u1));
if (r != data + length)
return r->u2;
}
@@ -1485,14 +1500,17 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
{
QString &s = *str;
- if (s.length() - from < 2)
+ if (from < 0 || s.length() - from < 2)
return;
- // the loop can partly ignore high Unicode as all ligatures are in the BMP
- int starter = 0;
+ int starter = 0; // starter position
+ uint stcode = 0; // starter code point
+ int next = -1;
int lastCombining = 0;
+
int pos = from;
while (pos < s.length()) {
+ int i = pos;
uint uc = s.at(pos).unicode();
if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
ushort low = s.at(pos+1).unicode();
@@ -1501,26 +1519,43 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
++pos;
}
}
+
const QUnicodeTables::Properties *p = qGetProp(uc);
if (p->unicodeVersion > version || p->unicodeVersion == QChar::Unicode_Unassigned) {
- starter = -1; // to prevent starter == pos - 1
- lastCombining = 0;
+ starter = -1;
+ next = -1; // to prevent i == next
+ lastCombining = 255; // to prevent combining > lastCombining
++pos;
continue;
}
+
int combining = p->combiningClass;
- if (starter == pos - 1 || combining > lastCombining) {
+ if (i == next || combining > lastCombining) {
+ Q_ASSERT(starter >= from);
// allowed to form ligature with S
- QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
- if (ligature.unicode()) {
- s[starter] = ligature;
- s.remove(pos, 1);
+ uint ligature = ligatureHelper(stcode, uc);
+ if (ligature) {
+ stcode = ligature;
+ QChar *d = s.data();
+ // ligatureHelper() never changes planes
+ if (QChar::requiresSurrogates(ligature)) {
+ d[starter] = QChar::highSurrogate(ligature);
+ d[starter + 1] = QChar::lowSurrogate(ligature);
+ s.remove(i, 2);
+ } else {
+ d[starter] = ligature;
+ s.remove(i, 1);
+ }
continue;
}
}
- if (!combining)
- starter = pos;
+ if (combining == 0) {
+ starter = i;
+ stcode = uc;
+ next = pos + 1;
+ }
lastCombining = combining;
+
++pos;
}
}
diff --git a/src/corelib/tools/qunicodetables.cpp b/src/corelib/tools/qunicodetables.cpp
index ea61d2090d..04031251e4 100644
--- a/src/corelib/tools/qunicodetables.cpp
+++ b/src/corelib/tools/qunicodetables.cpp
@@ -7616,58 +7616,92 @@ static const unsigned short uc_decomposition_map[] = {
static const unsigned short uc_ligature_trie[] = {
// 0 - 0x3100
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 424, 456, 488, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 520, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 552, 392, 392, 392, 584, 616, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 648, 680, 392, 392, 712, 744, 392,
- 392, 392, 776, 392, 392, 392, 808, 392,
- 392, 840, 872, 392, 392, 392, 904, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
-
- 392, 936, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 968, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
-
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
- 392, 392, 392, 392, 392, 392, 392, 392,
-
- 392, 392, 392, 392, 1000, 392, 392, 392,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 663, 695, 727, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 759, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 791, 631, 631, 631, 823, 855, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 887, 919, 631, 631, 951, 983, 631,
+ 631, 631, 1015, 631, 631, 631, 1047, 631,
+ 631, 1079, 1111, 631, 631, 631, 1143, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+
+ 631, 1175, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 1207, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631,
+
+ 631, 631, 631, 631, 1239, 631, 631, 631,
+
+ // 0x3100 - 0x12000
+
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271,
+ 1271, 1271, 1271, 1271, 1271, 1271, 1271,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
@@ -7768,11 +7802,48 @@ static const unsigned short uc_ligature_trie[] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
- 0xffff, 0x700, 0x761, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff
+ 0xffff, 0x700, 0x761, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff
};
-#define GET_LIGATURE_INDEX(u2) \
- (u2 < 0x3100 ? uc_ligature_trie[uc_ligature_trie[u2>>5] + (u2 & 0x1f)] : 0xffff);
+#define GET_LIGATURE_INDEX(ucs4) \
+ (ucs4 < 0x3100 \
+ ? (uc_ligature_trie[uc_ligature_trie[ucs4>>5] + (ucs4 & 0x1f)]) \
+ : (ucs4 < 0x12000\
+ ? uc_ligature_trie[uc_ligature_trie[((ucs4 - 0x3100)>>8) + 0x188] + (ucs4 & 0xff)]\
+ : 0xffff))
static const unsigned short uc_ligature_map[] = {
0x54, 0x41, 0xc0, 0x45, 0xc8, 0x49, 0xcc, 0x4e,