From 46b78113b22428e6f8540193fcf0e00591dbd724 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Thu, 26 Apr 2012 19:29:08 +0300 Subject: add support for non-BMP ligatures > http://www.unicode.org/versions/Unicode5.2.0/ D. Character Additions: There are three new characters in the newly-encoded Kaithi script that will require changes in implementations which make hard-coded assumptions about composition during normalization. Most new characters added to the standard with decompositions cannot be generated by the operations toNFC() or toNFKC), but these three can. Implementers should check their code carefully to ensure that it handles these three characters correctly. U+1109A KAITHI LETTER DDDHA U+1109C KAITHI LETTER RHA U+110AB KAITHI LETTER VA UCD 6.1 adds two more of them: U+1112E CHAKMA VOWEL SIGN O U+1112F CHAKMA VOWEL SIGN AU Change-Id: I781a26848078d8b83a182b0fd4e681be2a6d9a27 Reviewed-by: Lars Knoll --- src/corelib/tools/qunicodetables.cpp | 181 ++++++++++++++++++++++++----------- 1 file changed, 126 insertions(+), 55 deletions(-) (limited to 'src/corelib/tools/qunicodetables.cpp') diff --git a/src/corelib/tools/qunicodetables.cpp b/src/corelib/tools/qunicodetables.cpp index ea61d2090d..04031251e4 100644 --- a/src/corelib/tools/qunicodetables.cpp +++ b/src/corelib/tools/qunicodetables.cpp @@ -7616,58 +7616,92 @@ static const unsigned short uc_decomposition_map[] = { static const unsigned short uc_ligature_trie[] = { // 0 - 0x3100 - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 424, 456, 488, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 520, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 552, 392, 392, 392, 584, 616, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 648, 680, 392, 392, 712, 744, 392, - 392, 392, 776, 392, 392, 392, 808, 392, - 392, 840, 872, 392, 392, 392, 904, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - - 392, 936, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 968, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - 392, 392, 392, 392, 392, 392, 392, 392, - - 392, 392, 392, 392, 1000, 392, 392, 392, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 663, 695, 727, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 759, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 791, 631, 631, 631, 823, 855, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 887, 919, 631, 631, 951, 983, 631, + 631, 631, 1015, 631, 631, 631, 1047, 631, + 631, 1079, 1111, 631, 631, 631, 1143, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + + 631, 1175, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 1207, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, + + 631, 631, 631, 631, 1239, 631, 631, 631, + + // 0x3100 - 0x12000 + + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 1271, + 1271, 1271, 1271, 1271, 1271, 1271, 1271, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, @@ -7768,11 +7802,48 @@ static const unsigned short uc_ligature_trie[] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, - 0xffff, 0x700, 0x761, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff + 0xffff, 0x700, 0x761, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; -#define GET_LIGATURE_INDEX(u2) \ - (u2 < 0x3100 ? uc_ligature_trie[uc_ligature_trie[u2>>5] + (u2 & 0x1f)] : 0xffff); +#define GET_LIGATURE_INDEX(ucs4) \ + (ucs4 < 0x3100 \ + ? (uc_ligature_trie[uc_ligature_trie[ucs4>>5] + (ucs4 & 0x1f)]) \ + : (ucs4 < 0x12000\ + ? uc_ligature_trie[uc_ligature_trie[((ucs4 - 0x3100)>>8) + 0x188] + (ucs4 & 0xff)]\ + : 0xffff)) static const unsigned short uc_ligature_map[] = { 0x54, 0x41, 0xc0, 0x45, 0xc8, 0x49, 0xcc, 0x4e, -- cgit v1.2.3