summaryrefslogtreecommitdiffstats
path: root/util/unicode/main.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'util/unicode/main.cpp')
-rw-r--r--util/unicode/main.cpp154
1 files changed, 97 insertions, 57 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index 2f6e28bd9f..d1713795dd 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -43,8 +43,8 @@
#include <private/qunicodetables_p.h>
#endif
-#define DATA_VERSION_S "7.0"
-#define DATA_VERSION_STR "QChar::Unicode_7_0"
+#define DATA_VERSION_S "8.0"
+#define DATA_VERSION_STR "QChar::Unicode_8_0"
static QHash<QByteArray, QChar::UnicodeVersion> age_map;
@@ -71,6 +71,7 @@ static void initAgeMap()
{ QChar::Unicode_6_2, "6.2" },
{ QChar::Unicode_6_3, "6.3" },
{ QChar::Unicode_7_0, "7.0" },
+ { QChar::Unicode_8_0, "8.0" },
{ QChar::Unicode_Unassigned, 0 }
};
AgeMap *d = ageMap;
@@ -719,6 +720,13 @@ static void initScriptMap()
{ QChar::Script_Khudawadi, "Khudawadi" },
{ QChar::Script_Tirhuta, "Tirhuta" },
{ QChar::Script_WarangCiti, "WarangCiti" },
+ // 8.0
+ { QChar::Script_Ahom, "Ahom" },
+ { QChar::Script_AnatolianHieroglyphs, "AnatolianHieroglyphs" },
+ { QChar::Script_Hatran, "Hatran" },
+ { QChar::Script_Multani, "Multani" },
+ { QChar::Script_OldHungarian, "OldHungarian" },
+ { QChar::Script_SignWriting, "SignWriting" },
// unhandled
{ QChar::Script_Unknown, 0 }
};
@@ -757,6 +765,38 @@ static const char *property_string =
"};\n\n"
"Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4) Q_DECL_NOTHROW;\n"
"Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2) Q_DECL_NOTHROW;\n"
+ "\n"
+ "struct LowercaseTraits\n"
+ "{\n"
+ " static inline signed short caseDiff(const Properties *prop)\n"
+ " { return prop->lowerCaseDiff; }\n"
+ " static inline bool caseSpecial(const Properties *prop)\n"
+ " { return prop->lowerCaseSpecial; }\n"
+ "};\n"
+ "\n"
+ "struct UppercaseTraits\n"
+ "{\n"
+ " static inline signed short caseDiff(const Properties *prop)\n"
+ " { return prop->upperCaseDiff; }\n"
+ " static inline bool caseSpecial(const Properties *prop)\n"
+ " { return prop->upperCaseSpecial; }\n"
+ "};\n"
+ "\n"
+ "struct TitlecaseTraits\n"
+ "{\n"
+ " static inline signed short caseDiff(const Properties *prop)\n"
+ " { return prop->titleCaseDiff; }\n"
+ " static inline bool caseSpecial(const Properties *prop)\n"
+ " { return prop->titleCaseSpecial; }\n"
+ "};\n"
+ "\n"
+ "struct CasefoldTraits\n"
+ "{\n"
+ " static inline signed short caseDiff(const Properties *prop)\n"
+ " { return prop->caseFoldDiff; }\n"
+ " static inline bool caseSpecial(const Properties *prop)\n"
+ " { return prop->caseFoldSpecial; }\n"
+ "};\n"
"\n";
static const char *methods =
@@ -840,12 +880,14 @@ static int appendToSpecialCaseMap(const QList<int> &map)
{
QList<int> utf16map;
for (int i = 0; i < map.size(); ++i) {
- int val = map.at(i);
- if (QChar::requiresSurrogates(val)) {
- utf16map << QChar::highSurrogate(val);
- utf16map << QChar::lowSurrogate(val);
+ uint codepoint = map.at(i);
+ // if the condition below doesn't hold anymore we need to modify our special case mapping code
+ Q_ASSERT(!QChar::requiresSurrogates(codepoint));
+ if (QChar::requiresSurrogates(codepoint)) {
+ utf16map << QChar::highSurrogate(codepoint);
+ utf16map << QChar::lowSurrogate(codepoint);
} else {
- utf16map << val;
+ utf16map << codepoint;
}
}
int length = utf16map.size();
@@ -946,13 +988,16 @@ struct UnicodeData {
p.lineBreakClass = LineBreak_AL; // XX -> AL
// LineBreak.txt
// The unassigned code points that default to "ID" include ranges in the following blocks:
- // [U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2A6DF, U+2A700..U+2B73F, U+2B740..U+2B81F, U+2F800..U+2FA1F, U+20000..U+2FFFD, U+30000..U+3FFFD]
+ // [U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2A6DF, U+2A700..U+2B73F, U+2B740..U+2B81F, U+2B820..U+2CEAF, U+2F800..U+2FA1F]
+ // and any other reserved code points on
+ // [U+20000..U+2FFFD, U+30000..U+3FFFD]
if ((codepoint >= 0x3400 && codepoint <= 0x4DBF)
|| (codepoint >= 0x4E00 && codepoint <= 0x9FFF)
|| (codepoint >= 0xF900 && codepoint <= 0xFAFF)
|| (codepoint >= 0x20000 && codepoint <= 0x2A6DF)
|| (codepoint >= 0x2A700 && codepoint <= 0x2B73F)
|| (codepoint >= 0x2B740 && codepoint <= 0x2B81F)
+ || (codepoint >= 0x2B820 && codepoint <= 0x2CEAF)
|| (codepoint >= 0x2F800 && codepoint <= 0x2FA1F)
|| (codepoint >= 0x20000 && codepoint <= 0x2FFFD)
|| (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
@@ -1122,41 +1167,39 @@ static void readUnicodeData()
int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
Q_ASSERT(ok);
int diff = upperCase - codepoint;
+ // if the conditions below doesn't hold anymore we need to modify our upper casing code
+ Q_ASSERT(QChar::requiresSurrogates(codepoint) == QChar::requiresSurrogates(upperCase));
+ if (QChar::requiresSurrogates(codepoint)) {
+ Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase));
+ }
if (qAbs(diff) >= (1<<13)) {
qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << "); map it for special case";
- // if the condition below doesn't hold anymore we need to modify our special upper casing code in qchar.cpp
- Q_ASSERT(!QChar::requiresSurrogates(codepoint) && !QChar::requiresSurrogates(upperCase));
data.p.upperCaseSpecial = true;
data.p.upperCaseDiff = appendToSpecialCaseMap(QList<int>() << upperCase);
} else {
data.p.upperCaseDiff = diff;
maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff));
}
- if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(upperCase)) {
- // if the conditions below doesn't hold anymore we need to modify our upper casing code
- Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
- Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase));
- }
}
if (!properties[UD_LowerCase].isEmpty()) {
int lowerCase = properties[UD_LowerCase].toInt(&ok, 16);
Q_ASSERT(ok);
int diff = lowerCase - codepoint;
+ // if the conditions below doesn't hold anymore we need to modify our lower casing code
+ Q_ASSERT(QChar::requiresSurrogates(codepoint) == QChar::requiresSurrogates(lowerCase));
+ if (QChar::requiresSurrogates(codepoint)) {
+ Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(lowerCase));
+ }
if (qAbs(diff) >= (1<<13)) {
qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << "); map it for special case";
- // if the condition below doesn't hold anymore we need to modify our special lower casing code in qchar.cpp
- Q_ASSERT(!QChar::requiresSurrogates(codepoint) && !QChar::requiresSurrogates(lowerCase));
data.p.lowerCaseSpecial = true;
data.p.lowerCaseDiff = appendToSpecialCaseMap(QList<int>() << lowerCase);
} else {
data.p.lowerCaseDiff = diff;
maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff));
}
- if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(lowerCase)) {
- // if the conditions below doesn't hold anymore we need to modify our lower casing code
- Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase));
- Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(lowerCase));
- }
}
// we want toTitleCase to map to ToUpper in case we don't have any titlecase.
if (properties[UD_TitleCase].isEmpty())
@@ -1165,21 +1208,20 @@ static void readUnicodeData()
int titleCase = properties[UD_TitleCase].toInt(&ok, 16);
Q_ASSERT(ok);
int diff = titleCase - codepoint;
+ // if the conditions below doesn't hold anymore we need to modify our title casing code
+ Q_ASSERT(QChar::requiresSurrogates(codepoint) == QChar::requiresSurrogates(titleCase));
+ if (QChar::requiresSurrogates(codepoint)) {
+ Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase));
+ }
if (qAbs(diff) >= (1<<13)) {
qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << "); map it for special case";
- // if the condition below doesn't hold anymore we need to modify our special title casing code in qchar.cpp
- Q_ASSERT(!QChar::requiresSurrogates(codepoint) && !QChar::requiresSurrogates(titleCase));
data.p.titleCaseSpecial = true;
data.p.titleCaseDiff = appendToSpecialCaseMap(QList<int>() << titleCase);
} else {
data.p.titleCaseDiff = diff;
maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff));
}
- if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(titleCase)) {
- // if the conditions below doesn't hold anymore we need to modify our title casing code
- Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase));
- Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase));
- }
}
if (!properties[UD_DigitValue].isEmpty())
@@ -1535,8 +1577,10 @@ static QByteArray createNormalizationCorrections()
++numCorrections;
maxVersion = qMax(c.version, maxVersion);
}
+ if (out.endsWith(",\n"))
+ out.chop(2);
- out += "};\n\n"
+ out += "\n};\n\n"
"enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n"
"enum { NormalizationCorrectionsVersionMax = " + QByteArray::number(maxVersion) + " };\n\n";
@@ -1731,23 +1775,20 @@ static void readCaseFolding()
if (foldMap.size() == 1) {
int caseFolded = foldMap.at(0);
int diff = caseFolded - codepoint;
+ // if the conditions below doesn't hold anymore we need to modify our case folding code
+ Q_ASSERT(QChar::requiresSurrogates(codepoint) == QChar::requiresSurrogates(caseFolded));
+ if (QChar::requiresSurrogates(codepoint)) {
+ Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded));
+ }
if (qAbs(diff) >= (1<<13)) {
qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << "); map it for special case";
- // if the condition below doesn't hold anymore we need to modify our special case folding code in qchar.cpp
- Q_ASSERT(!QChar::requiresSurrogates(codepoint) && !QChar::requiresSurrogates(caseFolded));
ud.p.caseFoldSpecial = true;
ud.p.caseFoldDiff = appendToSpecialCaseMap(foldMap);
} else {
ud.p.caseFoldDiff = diff;
maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff));
}
- if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(caseFolded)) {
- // if the conditions below doesn't hold anymore we need to modify our case folding code
- Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));
- Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded));
- }
-// if (caseFolded != codepoint + ud.p.lowerCaseDiff)
-// qDebug() << hex << codepoint;
} else {
qFatal("we currently don't support full case foldings");
// qDebug() << "special" << hex << foldMap;
@@ -2290,7 +2331,7 @@ static QByteArray createPropertyInfo()
out += "static const unsigned short uc_property_trie[] = {\n";
// first write the map
- out += " // 0 - 0x" + QByteArray::number(BMP_END, 16);
+ out += " // [0x0..0x" + QByteArray::number(BMP_END, 16) + ")";
for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
if (!(i % 8)) {
if (out.endsWith(' '))
@@ -2304,7 +2345,7 @@ static QByteArray createPropertyInfo()
}
if (out.endsWith(' '))
out.chop(1);
- out += "\n\n // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";
+ out += "\n\n // [0x" + QByteArray::number(BMP_END, 16) + "..0x" + QByteArray::number(SMP_END, 16) + ")\n";
for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) {
if (!(i % 8)) {
if (out.endsWith(' '))
@@ -2335,8 +2376,8 @@ static QByteArray createPropertyInfo()
out += ", ";
}
}
- if (out.endsWith(' '))
- out.chop(1);
+ if (out.endsWith(", "))
+ out.chop(2);
out += "\n};\n\n";
out += "#define GET_PROP_INDEX(ucs4) \\\n"
@@ -2419,20 +2460,19 @@ static QByteArray createPropertyInfo()
out += QByteArray::number( p.script );
out += " },";
}
- out.chop(1);
+ if (out.endsWith(','))
+ out.chop(1);
out += "\n};\n\n";
out += "Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(uint ucs4) Q_DECL_NOTHROW\n"
"{\n"
- " const int index = GET_PROP_INDEX(ucs4);\n"
- " return uc_properties + index;\n"
+ " return uc_properties + GET_PROP_INDEX(ucs4);\n"
"}\n"
"\n"
"Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(ushort ucs2) Q_DECL_NOTHROW\n"
"{\n"
- " const int index = GET_PROP_INDEX_UCS2(ucs2);\n"
- " return uc_properties + index;\n"
+ " return uc_properties + GET_PROP_INDEX_UCS2(ucs2);\n"
"}\n"
"\n"
"Q_DECL_CONST_FUNCTION Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4) Q_DECL_NOTHROW\n"
@@ -2447,22 +2487,22 @@ static QByteArray createPropertyInfo()
out += "Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(uint ucs4) Q_DECL_NOTHROW\n"
"{\n"
- " return (GraphemeBreakClass)qGetProp(ucs4)->graphemeBreakClass;\n"
+ " return static_cast<GraphemeBreakClass>(qGetProp(ucs4)->graphemeBreakClass);\n"
"}\n"
"\n"
"Q_CORE_EXPORT WordBreakClass QT_FASTCALL wordBreakClass(uint ucs4) Q_DECL_NOTHROW\n"
"{\n"
- " return (WordBreakClass)qGetProp(ucs4)->wordBreakClass;\n"
+ " return static_cast<WordBreakClass>(qGetProp(ucs4)->wordBreakClass);\n"
"}\n"
"\n"
"Q_CORE_EXPORT SentenceBreakClass QT_FASTCALL sentenceBreakClass(uint ucs4) Q_DECL_NOTHROW\n"
"{\n"
- " return (SentenceBreakClass)qGetProp(ucs4)->sentenceBreakClass;\n"
+ " return static_cast<SentenceBreakClass>(qGetProp(ucs4)->sentenceBreakClass);\n"
"}\n"
"\n"
"Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4) Q_DECL_NOTHROW\n"
"{\n"
- " return (LineBreakClass)qGetProp(ucs4)->lineBreakClass;\n"
+ " return static_cast<LineBreakClass>(qGetProp(ucs4)->lineBreakClass);\n"
"}\n"
"\n";
@@ -2475,7 +2515,7 @@ static QByteArray createSpecialCaseMap()
QByteArray out;
- out += "static const ushort specialCaseMap[] = {\n"
+ out += "static const unsigned short specialCaseMap[] = {\n"
" 0x0, // placeholder";
int i = 1;
while (i < specialCaseMap.size()) {
@@ -2675,10 +2715,10 @@ static QByteArray createCompositionInfo()
" (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + " \\\n"
" ? (uc_decomposition_trie[uc_decomposition_trie[ucs4>>" + QByteArray::number(BMP_SHIFT) +
"] + (ucs4 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")]) \\\n"
- " : (ucs4 < 0x" + QByteArray::number(SMP_END, 16) + "\\\n"
+ " : (ucs4 < 0x" + QByteArray::number(SMP_END, 16) + " \\\n"
" ? uc_decomposition_trie[uc_decomposition_trie[((ucs4 - 0x" + QByteArray::number(BMP_END, 16) +
")>>" + QByteArray::number(SMP_SHIFT) + ") + 0x" + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "]"
- " + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")]\\\n"
+ " + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")] \\\n"
" : 0xffff))\n\n";
out += "static const unsigned short uc_decomposition_map[] = {";
@@ -2874,10 +2914,10 @@ static QByteArray createLigatureInfo()
" (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + " \\\n"
" ? (uc_ligature_trie[uc_ligature_trie[ucs4>>" + QByteArray::number(BMP_SHIFT) +
"] + (ucs4 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")]) \\\n"
- " : (ucs4 < 0x" + QByteArray::number(SMP_END, 16) + "\\\n"
+ " : (ucs4 < 0x" + QByteArray::number(SMP_END, 16) + " \\\n"
" ? uc_ligature_trie[uc_ligature_trie[((ucs4 - 0x" + QByteArray::number(BMP_END, 16) +
")>>" + QByteArray::number(SMP_SHIFT) + ") + 0x" + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "]"
- " + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")]\\\n"
+ " + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")] \\\n"
" : 0xffff))\n\n";
out += "static const unsigned short uc_ligature_map[] = {";