From 3b778df102e82a7cf4b1db7fca0bcfeadfe98857 Mon Sep 17 00:00:00 2001
From: Konstantin Ritt <ritt.ks@gmail.com>
Date: Thu, 5 Apr 2012 02:28:37 +0300
Subject: minor improvement for NormalizationCorrections

let's don't hardcode the latests affected version value and simply use
the one parsed from NormalizationCorrections.txt

Change-Id: I37021e8238d77deada4c5ba7a2d160c87186b9dd
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
---
 util/unicode/main.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'util')
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index 4a11ec667b..2b4a76363f 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -1078,7 +1078,7 @@ static void readDerivedNormalizationProps()
 struct NormalizationCorrection {
     uint codepoint;
     uint mapped;
-    uint version;
+    int version;
 };
 
 static QByteArray createNormalizationCorrections()
@@ -1099,6 +1099,7 @@ static QByteArray createNormalizationCorrections()
 
            "static const NormalizationCorrection uc_normalization_corrections[] = {\n";
 
+    int maxVersion = 0;
     int numCorrections = 0;
     while (!f.atEnd()) {
         QByteArray line;
@@ -1135,11 +1136,13 @@ static QByteArray createNormalizationCorrections()
         out += "    { 0x" + QByteArray::number(c.codepoint, 16) + ", 0x" + QByteArray::number(c.mapped, 16)
              + ", " + QString::number(c.version) + " },\n";
         ++numCorrections;
+        maxVersion = qMax(c.version, maxVersion);
     }
 
     out += "};\n\n"
 
-           "enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n\n";
+           "enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n"
+           "enum { NormalizationCorrectionsVersionMax = " + QByteArray::number(maxVersion) + " };\n\n";
 
     return out;
 }
-- 
cgit v1.2.3


From 50fefebc8403c0f293210c6dc5a98adb19776b76 Mon Sep 17 00:00:00 2001
From: Konstantin Ritt <ritt.ks@gmail.com>
Date: Sun, 8 Apr 2012 10:18:45 +0300
Subject: replace hardcoded values with a surrogate handling methods

Change-Id: Iba079953c46a29404232d2dacbe0c90170097d51
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@nokia.com>
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
---
 util/unicode/main.cpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'util')

diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index 2b4a76363f..d769176c6f 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -465,7 +465,7 @@ static int appendToSpecialCaseMap(const QList<int> &map)
     QList<int> utf16map;
     for (int i = 0; i < map.size(); ++i) {
         int val = map.at(i);
-        if (val >= 0x10000) {
+        if (QChar::requiresSurrogates(val)) {
             utf16map << QChar::highSurrogate(val);
             utf16map << QChar::lowSurrogate(val);
         } else {
@@ -789,7 +789,7 @@ static void readUnicodeData()
                 qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << ")";
             data.p.upperCaseDiff = diff;
             maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff));
-            if (codepoint >= 0x10000 || upperCase >= 0x10000) {
+            if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(upperCase)) {
                 // if the conditions below doesn't hold anymore we need to modify our upper casing code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
                 Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase));
@@ -803,7 +803,7 @@ static void readUnicodeData()
                 qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << ")";
             data.p.lowerCaseDiff = diff;
             maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff));
-            if (codepoint >= 0x10000 || lowerCase >= 0x10000) {
+            if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(lowerCase)) {
                 // if the conditions below doesn't hold anymore we need to modify our lower casing code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase));
                 Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(lowerCase));
@@ -820,7 +820,7 @@ static void readUnicodeData()
                 qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << ")";
             data.p.titleCaseDiff = diff;
             maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff));
-            if (codepoint >= 0x10000 || titleCase >= 0x10000) {
+            if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(titleCase)) {
                 // if the conditions below doesn't hold anymore we need to modify our title casing code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase));
                 Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase));
@@ -1253,7 +1253,7 @@ static void readSpecialCasing()
 
         // if the condition below doesn't hold anymore we need to modify our
         // lower/upper/title casing code and case folding code
-        Q_ASSERT(codepoint < 0x10000);
+        Q_ASSERT(!QChar::requiresSurrogates(codepoint));
 
 //         qDebug() << "codepoint" << hex << codepoint;
 //         qDebug() << line;
@@ -1359,7 +1359,7 @@ static void readCaseFolding()
                 qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << ")";
             ud.p.caseFoldDiff = diff;
             maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff));
-            if (codepoint >= 0x10000 || caseFolded >= 0x10000) {
+            if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(caseFolded)) {
                 // if the conditions below doesn't hold anymore we need to modify our case folding code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));
                 Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded));
@@ -2317,11 +2317,11 @@ static QByteArray createCompositionInfo()
             if (!d.decomposition.isEmpty()) {
                 int utf16Chars = 0;
                 for (int j = 0; j < d.decomposition.size(); ++j)
-                    utf16Chars += d.decomposition.at(j) >= 0x10000 ? 2 : 1;
+                    utf16Chars += QChar::requiresSurrogates(d.decomposition.at(j)) ? 2 : 1;
                 decompositions.append(d.decompositionType + (utf16Chars<<8));
                 for (int j = 0; j < d.decomposition.size(); ++j) {
                     int code = d.decomposition.at(j);
-                    if (code >= 0x10000) {
+                    if (QChar::requiresSurrogates(code)) {
                         // save as surrogate pair
                         ushort high = QChar::highSurrogate(code);
                         ushort low = QChar::lowSurrogate(code);
@@ -2358,11 +2358,11 @@ static QByteArray createCompositionInfo()
             if (!d.decomposition.isEmpty()) {
                 int utf16Chars = 0;
                 for (int j = 0; j < d.decomposition.size(); ++j)
-                    utf16Chars += d.decomposition.at(j) >= 0x10000 ? 2 : 1;
+                    utf16Chars += QChar::requiresSurrogates(d.decomposition.at(j)) ? 2 : 1;
                 decompositions.append(d.decompositionType + (utf16Chars<<8));
                 for (int j = 0; j < d.decomposition.size(); ++j) {
                     int code = d.decomposition.at(j);
-                    if (code >= 0x10000) {
+                    if (QChar::requiresSurrogates(code)) {
                         // save as surrogate pair
                         ushort high = QChar::highSurrogate(code);
                         ushort low = QChar::lowSurrogate(code);
-- 
cgit v1.2.3


From 73b24486edd6b64165821bfb587cec9c9b078796 Mon Sep 17 00:00:00 2001
From: Konstantin Ritt <ritt.ks@gmail.com>
Date: Tue, 10 Apr 2012 23:39:40 +0300
Subject: UCD-5.0: apply Corrigendum #6

http://unicode.org/versions/corrigendum6.html:
> in Unicode 5.0, the list of characters with the Bidi_Mirrored property
> was made consistent for brackets and quotation marks, in preparation for
> new constraints on bidi mirroring. However, after publication of
> Unicode 5.0.0 it was discovered that this change adversely affected
> several quotation mark characters in deployed data.

Task-number: QTBUG-25169
Change-Id: Id49caf401af2d5a1e6dbcc32b2f350aa20b7f901
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
---
 util/unicode/data/BidiMirroring.txt | 17 +++--------------
 util/unicode/data/UnicodeData.txt   | 22 +++++++++++-----------
 2 files changed, 14 insertions(+), 25 deletions(-)

(limited to 'util')

diff --git a/util/unicode/data/BidiMirroring.txt b/util/unicode/data/BidiMirroring.txt
index f9a09c1345..9a1b25fa4d 100644
--- a/util/unicode/data/BidiMirroring.txt
+++ b/util/unicode/data/BidiMirroring.txt
@@ -1,12 +1,12 @@
 # BidiMirroring-5.0.0.txt
-# Date: 2006-02-16, 16:11:00 PST [KW]
+# Date: 2007-08-22, 14:40:00 PDT [KW]
 #
-# Bidi_Mirroring_Glyph Property
+# Bidi_Mirroring_Glyph Property (Corrigendum #6 applied)
 # 
 # This file is an informative contributory data file in the
 # Unicode Character Database.
 #
-# Copyright (c) 1991-2006 Unicode, Inc.
+# Copyright (c) 1991-2007 Unicode, Inc.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
 # This data file lists characters that have the mirrored property
@@ -52,10 +52,6 @@
 0F3D; 0F3C # TIBETAN MARK ANG KHANG GYAS
 169B; 169C # OGHAM FEATHER MARK
 169C; 169B # OGHAM REVERSED FEATHER MARK
-2018; 2019 # [BEST FIT] LEFT SINGLE QUOTATION MARK
-2019; 2018 # [BEST FIT] RIGHT SINGLE QUOTATION MARK
-201C; 201D # [BEST FIT] LEFT DOUBLE QUOTATION MARK
-201D; 201C # [BEST FIT] RIGHT DOUBLE QUOTATION MARK
 2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 2045; 2046 # LEFT SQUARE BRACKET WITH QUILL
@@ -366,8 +362,6 @@
 3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET
 301A; 301B # LEFT WHITE SQUARE BRACKET
 301B; 301A # RIGHT WHITE SQUARE BRACKET
-301D; 301E # REVERSED DOUBLE PRIME QUOTATION MARK
-301E; 301D # DOUBLE PRIME QUOTATION MARK
 FE59; FE5A # SMALL LEFT PARENTHESIS
 FE5A; FE59 # SMALL RIGHT PARENTHESIS
 FE5B; FE5C # SMALL LEFT CURLY BRACKET
@@ -393,10 +387,6 @@ FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET
 # For these characters it is up to the rendering system
 #   to provide mirrored glyphs.
 
-# 201A; SINGLE LOW-9 QUOTATION MARK
-# 201B; SINGLE HIGH-REVERSED-9 QUOTATION MARK
-# 201E; DOUBLE LOW-9 QUOTATION MARK
-# 201F; DOUBLE HIGH-REVERSED-9 QUOTATION MARK
 # 2140; DOUBLE-STRUCK N-ARY SUMMATION
 # 2201; COMPLEMENT
 # 2202; PARTIAL DIFFERENTIAL
@@ -572,7 +562,6 @@ FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET
 # 2AF3; PARALLEL WITH TILDE OPERATOR
 # 2AFB; TRIPLE SOLIDUS BINARY RELATION
 # 2AFD; DOUBLE SOLIDUS OPERATOR
-# 301F; LOW DOUBLE PRIME QUOTATION MARK
 # 1D6DB; MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
 # 1D715; MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
 # 1D74F; MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
diff --git a/util/unicode/data/UnicodeData.txt b/util/unicode/data/UnicodeData.txt
index 299f0e57d1..1588852004 100644
--- a/util/unicode/data/UnicodeData.txt
+++ b/util/unicode/data/UnicodeData.txt
@@ -6118,14 +6118,14 @@
 2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;;
 2016;DOUBLE VERTICAL LINE;Po;0;ON;;;;;N;DOUBLE VERTICAL BAR;;;;
 2017;DOUBLE LOW LINE;Po;0;ON;<compat> 0020 0333;;;;N;SPACING DOUBLE UNDERSCORE;;;;
-2018;LEFT SINGLE QUOTATION MARK;Pi;0;ON;;;;;Y;SINGLE TURNED COMMA QUOTATION MARK;;;;
-2019;RIGHT SINGLE QUOTATION MARK;Pf;0;ON;;;;;Y;SINGLE COMMA QUOTATION MARK;;;;
-201A;SINGLE LOW-9 QUOTATION MARK;Ps;0;ON;;;;;Y;LOW SINGLE COMMA QUOTATION MARK;;;;
-201B;SINGLE HIGH-REVERSED-9 QUOTATION MARK;Pi;0;ON;;;;;Y;SINGLE REVERSED COMMA QUOTATION MARK;;;;
-201C;LEFT DOUBLE QUOTATION MARK;Pi;0;ON;;;;;Y;DOUBLE TURNED COMMA QUOTATION MARK;;;;
-201D;RIGHT DOUBLE QUOTATION MARK;Pf;0;ON;;;;;Y;DOUBLE COMMA QUOTATION MARK;;;;
-201E;DOUBLE LOW-9 QUOTATION MARK;Ps;0;ON;;;;;Y;LOW DOUBLE COMMA QUOTATION MARK;;;;
-201F;DOUBLE HIGH-REVERSED-9 QUOTATION MARK;Pi;0;ON;;;;;Y;DOUBLE REVERSED COMMA QUOTATION MARK;;;;
+2018;LEFT SINGLE QUOTATION MARK;Pi;0;ON;;;;;N;SINGLE TURNED COMMA QUOTATION MARK;;;;
+2019;RIGHT SINGLE QUOTATION MARK;Pf;0;ON;;;;;N;SINGLE COMMA QUOTATION MARK;;;;
+201A;SINGLE LOW-9 QUOTATION MARK;Ps;0;ON;;;;;N;LOW SINGLE COMMA QUOTATION MARK;;;;
+201B;SINGLE HIGH-REVERSED-9 QUOTATION MARK;Pi;0;ON;;;;;N;SINGLE REVERSED COMMA QUOTATION MARK;;;;
+201C;LEFT DOUBLE QUOTATION MARK;Pi;0;ON;;;;;N;DOUBLE TURNED COMMA QUOTATION MARK;;;;
+201D;RIGHT DOUBLE QUOTATION MARK;Pf;0;ON;;;;;N;DOUBLE COMMA QUOTATION MARK;;;;
+201E;DOUBLE LOW-9 QUOTATION MARK;Ps;0;ON;;;;;N;LOW DOUBLE COMMA QUOTATION MARK;;;;
+201F;DOUBLE HIGH-REVERSED-9 QUOTATION MARK;Pi;0;ON;;;;;N;DOUBLE REVERSED COMMA QUOTATION MARK;;;;
 2020;DAGGER;Po;0;ON;;;;;N;;;;;
 2021;DOUBLE DAGGER;Po;0;ON;;;;;N;;;;;
 2022;BULLET;Po;0;ON;;;;;N;;;;;
@@ -9480,9 +9480,9 @@
 301A;LEFT WHITE SQUARE BRACKET;Ps;0;ON;;;;;Y;OPENING WHITE SQUARE BRACKET;;;;
 301B;RIGHT WHITE SQUARE BRACKET;Pe;0;ON;;;;;Y;CLOSING WHITE SQUARE BRACKET;;;;
 301C;WAVE DASH;Pd;0;ON;;;;;N;;;;;
-301D;REVERSED DOUBLE PRIME QUOTATION MARK;Ps;0;ON;;;;;Y;;;;;
-301E;DOUBLE PRIME QUOTATION MARK;Pe;0;ON;;;;;Y;;;;;
-301F;LOW DOUBLE PRIME QUOTATION MARK;Pe;0;ON;;;;;Y;;;;;
+301D;REVERSED DOUBLE PRIME QUOTATION MARK;Ps;0;ON;;;;;N;;;;;
+301E;DOUBLE PRIME QUOTATION MARK;Pe;0;ON;;;;;N;;;;;
+301F;LOW DOUBLE PRIME QUOTATION MARK;Pe;0;ON;;;;;N;;;;;
 3020;POSTAL MARK FACE;So;0;ON;;;;;N;;;;;
 3021;HANGZHOU NUMERAL ONE;Nl;0;L;;;;1;N;;;;;
 3022;HANGZHOU NUMERAL TWO;Nl;0;L;;;;2;N;;;;;
-- 
cgit v1.2.3