From 3b778df102e82a7cf4b1db7fca0bcfeadfe98857 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Thu, 5 Apr 2012 02:28:37 +0300 Subject: minor improvement for NormalizationCorrections let's don't hardcode the latests affected version value and simply use the one parsed from NormalizationCorrections.txt Change-Id: I37021e8238d77deada4c5ba7a2d160c87186b9dd Reviewed-by: Lars Knoll --- util/unicode/main.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'util') diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 4a11ec667b..2b4a76363f 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -1078,7 +1078,7 @@ static void readDerivedNormalizationProps() struct NormalizationCorrection { uint codepoint; uint mapped; - uint version; + int version; }; static QByteArray createNormalizationCorrections() @@ -1099,6 +1099,7 @@ static QByteArray createNormalizationCorrections() "static const NormalizationCorrection uc_normalization_corrections[] = {\n"; + int maxVersion = 0; int numCorrections = 0; while (!f.atEnd()) { QByteArray line; @@ -1135,11 +1136,13 @@ static QByteArray createNormalizationCorrections() out += " { 0x" + QByteArray::number(c.codepoint, 16) + ", 0x" + QByteArray::number(c.mapped, 16) + ", " + QString::number(c.version) + " },\n"; ++numCorrections; + maxVersion = qMax(c.version, maxVersion); } out += "};\n\n" - "enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n\n"; + "enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n" + "enum { NormalizationCorrectionsVersionMax = " + QByteArray::number(maxVersion) + " };\n\n"; return out; } -- cgit v1.2.3 From 50fefebc8403c0f293210c6dc5a98adb19776b76 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Sun, 8 Apr 2012 10:18:45 +0300 Subject: replace hardcoded values with a surrogate handling methods Change-Id: Iba079953c46a29404232d2dacbe0c90170097d51 Reviewed-by: Oswald Buddenhagen Reviewed-by: Lars Knoll --- util/unicode/main.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'util') diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 2b4a76363f..d769176c6f 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -465,7 +465,7 @@ static int appendToSpecialCaseMap(const QList &map) QList utf16map; for (int i = 0; i < map.size(); ++i) { int val = map.at(i); - if (val >= 0x10000) { + if (QChar::requiresSurrogates(val)) { utf16map << QChar::highSurrogate(val); utf16map << QChar::lowSurrogate(val); } else { @@ -789,7 +789,7 @@ static void readUnicodeData() qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << ")"; data.p.upperCaseDiff = diff; maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff)); - if (codepoint >= 0x10000 || upperCase >= 0x10000) { + if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(upperCase)) { // if the conditions below doesn't hold anymore we need to modify our upper casing code Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase)); Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase)); @@ -803,7 +803,7 @@ static void readUnicodeData() qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << ")"; data.p.lowerCaseDiff = diff; maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff)); - if (codepoint >= 0x10000 || lowerCase >= 0x10000) { + if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(lowerCase)) { // if the conditions below doesn't hold anymore we need to modify our lower casing code Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase)); Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(lowerCase)); @@ -820,7 +820,7 @@ static void readUnicodeData() qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << ")"; data.p.titleCaseDiff = diff; maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff)); - if (codepoint >= 0x10000 || titleCase >= 0x10000) { + if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(titleCase)) { // if the conditions below doesn't hold anymore we need to modify our title casing code Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase)); Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase)); @@ -1253,7 +1253,7 @@ static void readSpecialCasing() // if the condition below doesn't hold anymore we need to modify our // lower/upper/title casing code and case folding code - Q_ASSERT(codepoint < 0x10000); + Q_ASSERT(!QChar::requiresSurrogates(codepoint)); // qDebug() << "codepoint" << hex << codepoint; // qDebug() << line; @@ -1359,7 +1359,7 @@ static void readCaseFolding() qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << ")"; ud.p.caseFoldDiff = diff; maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff)); - if (codepoint >= 0x10000 || caseFolded >= 0x10000) { + if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(caseFolded)) { // if the conditions below doesn't hold anymore we need to modify our case folding code Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded)); Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded)); @@ -2317,11 +2317,11 @@ static QByteArray createCompositionInfo() if (!d.decomposition.isEmpty()) { int utf16Chars = 0; for (int j = 0; j < d.decomposition.size(); ++j) - utf16Chars += d.decomposition.at(j) >= 0x10000 ? 2 : 1; + utf16Chars += QChar::requiresSurrogates(d.decomposition.at(j)) ? 2 : 1; decompositions.append(d.decompositionType + (utf16Chars<<8)); for (int j = 0; j < d.decomposition.size(); ++j) { int code = d.decomposition.at(j); - if (code >= 0x10000) { + if (QChar::requiresSurrogates(code)) { // save as surrogate pair ushort high = QChar::highSurrogate(code); ushort low = QChar::lowSurrogate(code); @@ -2358,11 +2358,11 @@ static QByteArray createCompositionInfo() if (!d.decomposition.isEmpty()) { int utf16Chars = 0; for (int j = 0; j < d.decomposition.size(); ++j) - utf16Chars += d.decomposition.at(j) >= 0x10000 ? 2 : 1; + utf16Chars += QChar::requiresSurrogates(d.decomposition.at(j)) ? 2 : 1; decompositions.append(d.decompositionType + (utf16Chars<<8)); for (int j = 0; j < d.decomposition.size(); ++j) { int code = d.decomposition.at(j); - if (code >= 0x10000) { + if (QChar::requiresSurrogates(code)) { // save as surrogate pair ushort high = QChar::highSurrogate(code); ushort low = QChar::lowSurrogate(code); -- cgit v1.2.3 From 73b24486edd6b64165821bfb587cec9c9b078796 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Tue, 10 Apr 2012 23:39:40 +0300 Subject: UCD-5.0: apply Corrigendum #6 http://unicode.org/versions/corrigendum6.html: > in Unicode 5.0, the list of characters with the Bidi_Mirrored property > was made consistent for brackets and quotation marks, in preparation for > new constraints on bidi mirroring. However, after publication of > Unicode 5.0.0 it was discovered that this change adversely affected > several quotation mark characters in deployed data. Task-number: QTBUG-25169 Change-Id: Id49caf401af2d5a1e6dbcc32b2f350aa20b7f901 Reviewed-by: Lars Knoll --- util/unicode/data/BidiMirroring.txt | 17 +++-------------- util/unicode/data/UnicodeData.txt | 22 +++++++++++----------- 2 files changed, 14 insertions(+), 25 deletions(-) (limited to 'util') diff --git a/util/unicode/data/BidiMirroring.txt b/util/unicode/data/BidiMirroring.txt index f9a09c1345..9a1b25fa4d 100644 --- a/util/unicode/data/BidiMirroring.txt +++ b/util/unicode/data/BidiMirroring.txt @@ -1,12 +1,12 @@ # BidiMirroring-5.0.0.txt -# Date: 2006-02-16, 16:11:00 PST [KW] +# Date: 2007-08-22, 14:40:00 PDT [KW] # -# Bidi_Mirroring_Glyph Property +# Bidi_Mirroring_Glyph Property (Corrigendum #6 applied) # # This file is an informative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2006 Unicode, Inc. +# Copyright (c) 1991-2007 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # This data file lists characters that have the mirrored property @@ -52,10 +52,6 @@ 0F3D; 0F3C # TIBETAN MARK ANG KHANG GYAS 169B; 169C # OGHAM FEATHER MARK 169C; 169B # OGHAM REVERSED FEATHER MARK -2018; 2019 # [BEST FIT] LEFT SINGLE QUOTATION MARK -2019; 2018 # [BEST FIT] RIGHT SINGLE QUOTATION MARK -201C; 201D # [BEST FIT] LEFT DOUBLE QUOTATION MARK -201D; 201C # [BEST FIT] RIGHT DOUBLE QUOTATION MARK 2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK 203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 2045; 2046 # LEFT SQUARE BRACKET WITH QUILL @@ -366,8 +362,6 @@ 3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET 301A; 301B # LEFT WHITE SQUARE BRACKET 301B; 301A # RIGHT WHITE SQUARE BRACKET -301D; 301E # REVERSED DOUBLE PRIME QUOTATION MARK -301E; 301D # DOUBLE PRIME QUOTATION MARK FE59; FE5A # SMALL LEFT PARENTHESIS FE5A; FE59 # SMALL RIGHT PARENTHESIS FE5B; FE5C # SMALL LEFT CURLY BRACKET @@ -393,10 +387,6 @@ FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET # For these characters it is up to the rendering system # to provide mirrored glyphs. -# 201A; SINGLE LOW-9 QUOTATION MARK -# 201B; SINGLE HIGH-REVERSED-9 QUOTATION MARK -# 201E; DOUBLE LOW-9 QUOTATION MARK -# 201F; DOUBLE HIGH-REVERSED-9 QUOTATION MARK # 2140; DOUBLE-STRUCK N-ARY SUMMATION # 2201; COMPLEMENT # 2202; PARTIAL DIFFERENTIAL @@ -572,7 +562,6 @@ FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET # 2AF3; PARALLEL WITH TILDE OPERATOR # 2AFB; TRIPLE SOLIDUS BINARY RELATION # 2AFD; DOUBLE SOLIDUS OPERATOR -# 301F; LOW DOUBLE PRIME QUOTATION MARK # 1D6DB; MATHEMATICAL BOLD PARTIAL DIFFERENTIAL # 1D715; MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL # 1D74F; MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL diff --git a/util/unicode/data/UnicodeData.txt b/util/unicode/data/UnicodeData.txt index 299f0e57d1..1588852004 100644 --- a/util/unicode/data/UnicodeData.txt +++ b/util/unicode/data/UnicodeData.txt @@ -6118,14 +6118,14 @@ 2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;; 2016;DOUBLE VERTICAL LINE;Po;0;ON;;;;;N;DOUBLE VERTICAL BAR;;;; 2017;DOUBLE LOW LINE;Po;0;ON; 0020 0333;;;;N;SPACING DOUBLE UNDERSCORE;;;; -2018;LEFT SINGLE QUOTATION MARK;Pi;0;ON;;;;;Y;SINGLE TURNED COMMA QUOTATION MARK;;;; -2019;RIGHT SINGLE QUOTATION MARK;Pf;0;ON;;;;;Y;SINGLE COMMA QUOTATION MARK;;;; -201A;SINGLE LOW-9 QUOTATION MARK;Ps;0;ON;;;;;Y;LOW SINGLE COMMA QUOTATION MARK;;;; -201B;SINGLE HIGH-REVERSED-9 QUOTATION MARK;Pi;0;ON;;;;;Y;SINGLE REVERSED COMMA QUOTATION MARK;;;; -201C;LEFT DOUBLE QUOTATION MARK;Pi;0;ON;;;;;Y;DOUBLE TURNED COMMA QUOTATION MARK;;;; -201D;RIGHT DOUBLE QUOTATION MARK;Pf;0;ON;;;;;Y;DOUBLE COMMA QUOTATION MARK;;;; -201E;DOUBLE LOW-9 QUOTATION MARK;Ps;0;ON;;;;;Y;LOW DOUBLE COMMA QUOTATION MARK;;;; -201F;DOUBLE HIGH-REVERSED-9 QUOTATION MARK;Pi;0;ON;;;;;Y;DOUBLE REVERSED COMMA QUOTATION MARK;;;; +2018;LEFT SINGLE QUOTATION MARK;Pi;0;ON;;;;;N;SINGLE TURNED COMMA QUOTATION MARK;;;; +2019;RIGHT SINGLE QUOTATION MARK;Pf;0;ON;;;;;N;SINGLE COMMA QUOTATION MARK;;;; +201A;SINGLE LOW-9 QUOTATION MARK;Ps;0;ON;;;;;N;LOW SINGLE COMMA QUOTATION MARK;;;; +201B;SINGLE HIGH-REVERSED-9 QUOTATION MARK;Pi;0;ON;;;;;N;SINGLE REVERSED COMMA QUOTATION MARK;;;; +201C;LEFT DOUBLE QUOTATION MARK;Pi;0;ON;;;;;N;DOUBLE TURNED COMMA QUOTATION MARK;;;; +201D;RIGHT DOUBLE QUOTATION MARK;Pf;0;ON;;;;;N;DOUBLE COMMA QUOTATION MARK;;;; +201E;DOUBLE LOW-9 QUOTATION MARK;Ps;0;ON;;;;;N;LOW DOUBLE COMMA QUOTATION MARK;;;; +201F;DOUBLE HIGH-REVERSED-9 QUOTATION MARK;Pi;0;ON;;;;;N;DOUBLE REVERSED COMMA QUOTATION MARK;;;; 2020;DAGGER;Po;0;ON;;;;;N;;;;; 2021;DOUBLE DAGGER;Po;0;ON;;;;;N;;;;; 2022;BULLET;Po;0;ON;;;;;N;;;;; @@ -9480,9 +9480,9 @@ 301A;LEFT WHITE SQUARE BRACKET;Ps;0;ON;;;;;Y;OPENING WHITE SQUARE BRACKET;;;; 301B;RIGHT WHITE SQUARE BRACKET;Pe;0;ON;;;;;Y;CLOSING WHITE SQUARE BRACKET;;;; 301C;WAVE DASH;Pd;0;ON;;;;;N;;;;; -301D;REVERSED DOUBLE PRIME QUOTATION MARK;Ps;0;ON;;;;;Y;;;;; -301E;DOUBLE PRIME QUOTATION MARK;Pe;0;ON;;;;;Y;;;;; -301F;LOW DOUBLE PRIME QUOTATION MARK;Pe;0;ON;;;;;Y;;;;; +301D;REVERSED DOUBLE PRIME QUOTATION MARK;Ps;0;ON;;;;;N;;;;; +301E;DOUBLE PRIME QUOTATION MARK;Pe;0;ON;;;;;N;;;;; +301F;LOW DOUBLE PRIME QUOTATION MARK;Pe;0;ON;;;;;N;;;;; 3020;POSTAL MARK FACE;So;0;ON;;;;;N;;;;; 3021;HANGZHOU NUMERAL ONE;Nl;0;L;;;;1;N;;;;; 3022;HANGZHOU NUMERAL TWO;Nl;0;L;;;;2;N;;;;; -- cgit v1.2.3