summaryrefslogtreecommitdiffstats
path: root/util/unicode/main.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'util/unicode/main.cpp')
-rw-r--r--util/unicode/main.cpp211
1 files changed, 121 insertions, 90 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index f2ebe7c886..cfe5956ace 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -403,7 +403,7 @@ struct PropertyFlags {
// from DerivedAge.txt
QChar::UnicodeVersion age : 4;
int digitValue;
- uint line_break_class : 6;
+ LineBreakClass line_break_class;
int mirrorDiff : 16;
@@ -429,7 +429,7 @@ static int appendToSpecialCaseMap(const QList<int> &map)
QList<int> utf16map;
for (int i = 0; i < map.size(); ++i) {
int val = map.at(i);
- if (val > 0xffff) {
+ if (val >= 0x10000) {
utf16map << QChar::highSurrogate(val);
utf16map << QChar::lowSurrogate(val);
} else {
@@ -505,7 +505,7 @@ struct UnicodeData {
// from BidiMirroring.txt
int mirroredChar;
- // CompositionExclusions.txt
+ // DerivedNormalizationProps.txt
bool excludedComposition;
// computed position of unicode property set
@@ -726,8 +726,8 @@ static void readUnicodeData()
data.p.category = categoryMap.value(properties[UD_Category], QChar::NoCategory);
if (data.p.category == QChar::NoCategory)
qFatal("unassigned char category: %s", properties[UD_Category].constData());
- data.p.combiningClass = properties[UD_CombiningClass].toInt();
+ data.p.combiningClass = properties[UD_CombiningClass].toInt();
if (!combiningClassUsage.contains(data.p.combiningClass))
combiningClassUsage[data.p.combiningClass] = 1;
else
@@ -738,27 +738,29 @@ static void readUnicodeData()
if (!properties[UD_UpperCase].isEmpty()) {
int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
Q_ASSERT(ok);
- if (qAbs(upperCase - codepoint) >= (1<<14))
+ int diff = upperCase - codepoint;
+ if (qAbs(diff) >= (1<<14))
qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << ")";
- data.p.upperCaseDiff = upperCase - codepoint;
- maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(data.p.upperCaseDiff));
- if (codepoint > 0xffff) {
- // if the condition below doesn't hold anymore we need to modify our case folding code
- //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
+ data.p.upperCaseDiff = diff;
+ maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff));
+ if (codepoint >= 0x10000 || upperCase >= 0x10000) {
+ // if the conditions below doesn't hold anymore we need to modify our upper casing code
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase));
}
}
if (!properties[UD_LowerCase].isEmpty()) {
int lowerCase = properties[UD_LowerCase].toInt(&ok, 16);
Q_ASSERT(ok);
- if (qAbs(lowerCase - codepoint) >= (1<<14))
+ int diff = lowerCase - codepoint;
+ if (qAbs(diff) >= (1<<14))
qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << ")";
- data.p.lowerCaseDiff = lowerCase - codepoint;
- maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(data.p.lowerCaseDiff));
- if (codepoint > 0xffff) {
- // if the condition below doesn't hold anymore we need to modify our case folding code
- //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
+ data.p.lowerCaseDiff = diff;
+ maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff));
+ if (codepoint >= 0x10000 || lowerCase >= 0x10000) {
+ // if the conditions below doesn't hold anymore we need to modify our lower casing code
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(lowerCase));
}
}
// we want toTitleCase to map to ToUpper in case we don't have any titlecase.
@@ -767,14 +769,15 @@ static void readUnicodeData()
if (!properties[UD_TitleCase].isEmpty()) {
int titleCase = properties[UD_TitleCase].toInt(&ok, 16);
Q_ASSERT(ok);
- if (qAbs(titleCase - codepoint) >= (1<<14))
+ int diff = titleCase - codepoint;
+ if (qAbs(diff) >= (1<<14))
qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << ")";
- data.p.titleCaseDiff = titleCase - codepoint;
- maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(data.p.titleCaseDiff));
- if (codepoint > 0xffff) {
- // if the condition below doesn't hold anymore we need to modify our case folding code
- //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
+ data.p.titleCaseDiff = diff;
+ maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff));
+ if (codepoint >= 0x10000 || titleCase >= 0x10000) {
+ // if the conditions below doesn't hold anymore we need to modify our title casing code
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase));
}
}
@@ -1003,16 +1006,16 @@ static void readDerivedNormalizationProps()
&& d.decomposition.size() > 1) {
Q_ASSERT(d.decomposition.size() == 2);
- uint part1 = d.decomposition.at(0);
- uint part2 = d.decomposition.at(1);
+ int part1 = d.decomposition.at(0);
+ int part2 = d.decomposition.at(1);
// all non-starters are listed in DerivedNormalizationProps.txt
// and already excluded from composition
Q_ASSERT(unicodeData.value(part1, UnicodeData(part1)).p.combiningClass == 0);
++numLigatures;
- highestLigature = qMax(highestLigature, (int)part1);
- Ligature l = {(ushort)part1, (ushort)part2, codepoint};
+ highestLigature = qMax(highestLigature, part1);
+ Ligature l = {(ushort)part1, (ushort)part2, (ushort)codepoint};
ligatureHashes[part2].append(l);
}
}
@@ -1109,6 +1112,7 @@ static void computeUniqueProperties()
static void readLineBreak()
{
+ qDebug() << "Reading LineBreak.txt";
QFile f("data/LineBreak.txt");
if (!f.exists())
qFatal("Couldn't find LineBreak.txt");
@@ -1145,7 +1149,7 @@ static void readLineBreak()
Q_ASSERT(ok);
}
- LineBreakClass lb = line_break_map.value(l[1].trimmed(), LineBreak_Unassigned);
+ LineBreakClass lb = line_break_map.value(l[1], LineBreak_Unassigned);
if (lb == LineBreak_Unassigned)
qFatal("unassigned line break class: %s", l[1].constData());
@@ -1190,7 +1194,10 @@ static void readSpecialCasing()
bool ok;
int codepoint = l[0].trimmed().toInt(&ok, 16);
Q_ASSERT(ok);
- Q_ASSERT(codepoint <= 0xffff);
+
+ // if the condition below doesn't hold anymore we need to modify our
+ // lower/upper/title casing code and case folding code
+ Q_ASSERT(codepoint < 0x10000);
// qDebug() << "codepoint" << hex << codepoint;
// qDebug() << line;
@@ -1290,16 +1297,18 @@ static void readCaseFolding()
UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
if (foldMap.size() == 1) {
- if (qAbs(foldMap.at(0) - codepoint) >= (1<<14))
- qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << foldMap.at(0) << ")";
- ud.p.caseFoldDiff = foldMap.at(0) - codepoint;
- maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(ud.p.caseFoldDiff));
- if (codepoint > 0xffff) {
- // if the condition below doesn't hold anymore we need to modify our case folding code
- //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
- Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(foldMap.at(0)));
+ int caseFolded = foldMap.at(0);
+ int diff = caseFolded - codepoint;
+ if (qAbs(diff) >= (1<<14))
+ qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << ")";
+ ud.p.caseFoldDiff = diff;
+ maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff));
+ if (codepoint >= 0x10000 || caseFolded >= 0x10000) {
+ // if the conditions below doesn't hold anymore we need to modify our case folding code
+ Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded));
}
- if (foldMap.at(0) != codepoint + ud.p.lowerCaseDiff)
+ if (caseFolded != codepoint + ud.p.lowerCaseDiff)
qDebug() << hex << codepoint;
} else {
qFatal("we currently don't support full case foldings");
@@ -1329,13 +1338,15 @@ static void readGraphemeBreak()
int comment = line.indexOf('#');
if (comment >= 0)
line = line.left(comment);
+ line.replace(" ", "");
if (line.isEmpty())
continue;
QList<QByteArray> l = line.split(';');
+ Q_ASSERT(l.size() == 2);
- QByteArray codes = l[0].trimmed();
+ QByteArray codes = l[0];
codes.replace("..", ".");
QList<QByteArray> cl = codes.split('.');
@@ -1348,7 +1359,7 @@ static void readGraphemeBreak()
Q_ASSERT(ok);
}
- GraphemeBreak brk = grapheme_break_map.value(l[1].trimmed(), GraphemeBreak_Unassigned);
+ GraphemeBreak brk = grapheme_break_map.value(l[1], GraphemeBreak_Unassigned);
if (brk == GraphemeBreak_Unassigned)
qFatal("unassigned grapheme break class: %s", l[1].constData());
@@ -1378,13 +1389,15 @@ static void readWordBreak()
int comment = line.indexOf('#');
if (comment >= 0)
line = line.left(comment);
+ line.replace(" ", "");
if (line.isEmpty())
continue;
QList<QByteArray> l = line.split(';');
+ Q_ASSERT(l.size() == 2);
- QByteArray codes = l[0].trimmed();
+ QByteArray codes = l[0];
codes.replace("..", ".");
QList<QByteArray> cl = codes.split('.');
@@ -1397,7 +1410,7 @@ static void readWordBreak()
Q_ASSERT(ok);
}
- WordBreak brk = word_break_map.value(l[1].trimmed(), WordBreak_Unassigned);
+ WordBreak brk = word_break_map.value(l[1], WordBreak_Unassigned);
if (brk == WordBreak_Unassigned)
qFatal("unassigned word break class: %s", l[1].constData());
@@ -1427,13 +1440,15 @@ static void readSentenceBreak()
int comment = line.indexOf('#');
if (comment >= 0)
line = line.left(comment);
+ line.replace(" ", "");
if (line.isEmpty())
continue;
QList<QByteArray> l = line.split(';');
+ Q_ASSERT(l.size() == 2);
- QByteArray codes = l[0].trimmed();
+ QByteArray codes = l[0];
codes.replace("..", ".");
QList<QByteArray> cl = codes.split('.');
@@ -1446,7 +1461,7 @@ static void readSentenceBreak()
Q_ASSERT(ok);
}
- SentenceBreak brk = sentence_break_map.value(l[1].trimmed(), SentenceBreak_Unassigned);
+ SentenceBreak brk = sentence_break_map.value(l[1], SentenceBreak_Unassigned);
if (brk == SentenceBreak_Unassigned)
qFatal("unassigned sentence break class: %s", l[1].constData());
@@ -1624,17 +1639,22 @@ static void readBlocks()
QByteArray blockName = line.mid(semicolon + 1);
int blockIndex = blockNames.indexOf(blockName);
- if (blockIndex < 0) {
+ if (blockIndex == -1) {
+ blockIndex = blockNames.size();
blockNames.append(blockName);
- blockIndex = blockNames.indexOf(blockName);
- Q_ASSERT(blockIndex >= 0);
}
- int dotdot = codePoints.indexOf("..");
- Q_ASSERT(dotdot >= 0);
- bool unused;
- int first = codePoints.left(dotdot).toInt(&unused, 16);
- int last = codePoints.mid(dotdot + 2).toInt(&unused, 16);
+ codePoints.replace("..", ".");
+ QList<QByteArray> cl = codePoints.split('.');
+
+ bool ok;
+ int first = cl[0].toInt(&ok, 16);
+ Q_ASSERT(ok);
+ int last = first;
+ if (cl.size() == 2) {
+ last = cl[1].toInt(&ok, 16);
+ Q_ASSERT(ok);
+ }
BlockInfo blockInfo = { blockIndex, first, last };
blockInfoList.append(blockInfo);
@@ -1670,7 +1690,6 @@ static void readScripts()
if (!f.exists())
qFatal("Couldn't find %s", files[i]);
-
f.open(QFile::ReadOnly);
while (!f.atEnd()) {
@@ -1693,28 +1712,25 @@ static void readScripts()
QByteArray scriptName = line.mid(semicolon + 1);
int scriptIndex = scriptNames.indexOf(scriptName);
- if (scriptIndex < 0) {
+ if (scriptIndex == -1) {
+ scriptIndex = scriptNames.size();
scriptNames.append(scriptName);
- scriptIndex = scriptNames.indexOf(scriptName);
- Q_ASSERT(scriptIndex >= 0);
}
- int dotdot = codePoints.indexOf("..");
- bool unused;
- int first = -1, last = -1;
- if (dotdot >= 0) {
- first = codePoints.left(dotdot).toInt(&unused, 16);
- last = codePoints.mid(dotdot + 2).toInt(&unused, 16);
- } else {
- first = codePoints.toInt(&unused, 16);
- }
+ codePoints.replace("..", ".");
+ QList<QByteArray> cl = codePoints.split('.');
- if (last != -1) {
- for (int i = first; i <= last; ++i)
- scriptAssignment[i] = scriptIndex;
- } else {
- scriptAssignment[first] = scriptIndex;
+ bool ok;
+ int first = cl[0].toInt(&ok, 16);
+ Q_ASSERT(ok);
+ int last = first;
+ if (cl.size() == 2) {
+ last = cl[1].toInt(&ok, 16);
+ Q_ASSERT(ok);
}
+
+ for (int i = first; i <= last; ++i)
+ scriptAssignment[i] = scriptIndex;
}
}
}
@@ -1849,21 +1865,18 @@ QByteArray createScriptTableDeclaration()
declaration += ", /* U+";
declaration += QByteArray::number(block, 16).rightJustified(4, '0');
declaration += '-';
- declaration +=
- QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
+ declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
declaration += " */\n";
} else {
const int value = extraBlockList.size() + scriptSentinel;
- const int offset =
- ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
+ const int offset = ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
declaration += " ";
declaration += QByteArray::number(value);
declaration += ", /* U+";
declaration += QByteArray::number(block, 16).rightJustified(4, '0');
declaration += '-';
- declaration +=
- QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
+ declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
declaration += " at offset ";
declaration += QByteArray::number(offset);
declaration += " */\n";
@@ -1880,16 +1893,14 @@ QByteArray createScriptTableDeclaration()
for (int i = 0; i < extraBlockList.size(); ++i) {
const int value = i + scriptSentinel;
- const int offset =
- ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
+ const int offset = ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
const ExtraBlock &extraBlock = extraBlockList.at(i);
const int block = extraBlock.block;
declaration += "\n\n /* U+";
declaration += QByteArray::number(block, 16).rightJustified(4, '0');
declaration += '-';
- declaration +=
- QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
+ declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
declaration += " at offset ";
declaration += QByteArray::number(offset);
declaration += " */\n ";
@@ -1905,9 +1916,24 @@ QByteArray createScriptTableDeclaration()
else
declaration += ' ';
}
+ if (declaration.endsWith(' '))
+ declaration.chop(1);
}
declaration += "\n};\n\n} // namespace QUnicodeTables\n\n";
+ declaration +=
+ "Q_CORE_EXPORT int QT_FASTCALL QUnicodeTables::script(uint ucs4)\n"
+ "{\n"
+ " if (ucs4 > 0xffff)\n"
+ " return Common;\n"
+ " int script = uc_scripts[ucs4 >> 7];\n"
+ " if (script < ScriptSentinel)\n"
+ " return script;\n"
+ " script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount);\n"
+ " script = uc_scripts[script + (ucs4 & 0x7f)];\n"
+ " return script;\n"
+ "}\n\n";
+
qDebug("createScriptTableDeclaration: table size is %d bytes",
unicodeBlockCount + (extraBlockList.size() * unicodeBlockSize));
@@ -2168,6 +2194,11 @@ static QByteArray createPropertyInfo()
" return uc_properties + index;\n"
"}\n\n";
+ out += "Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)\n"
+ "{\n"
+ " return (QUnicodeTables::LineBreakClass)qGetProp(ucs4)->line_break_class;\n"
+ "}\n\n";
+
out += "static const ushort specialCaseMap[] = {\n ";
for (int i = 0; i < specialCaseMap.size(); ++i) {
out += QByteArray(" 0x") + QByteArray::number(specialCaseMap.at(i), 16);
@@ -2204,7 +2235,7 @@ static QByteArray createCompositionInfo()
const int SMP_BLOCKSIZE = 256;
const int SMP_SHIFT = 8;
- if(SMP_END <= highestComposedCharacter)
+ if (SMP_END <= highestComposedCharacter)
qFatal("end of table smaller than highest composed character at %x", highestComposedCharacter);
QList<DecompositionBlock> blocks;
@@ -2417,15 +2448,15 @@ static QByteArray createLigatureInfo()
int uc = block*BMP_BLOCKSIZE + i;
QList<Ligature> l = ligatureHashes.value(uc);
if (!l.isEmpty()) {
- b.decompositionPositions.append(tableIndex);
qSort(l);
ligatures.append(l.size());
- for (int i = 0; i < l.size(); ++i) {
- Q_ASSERT(l.at(i).u2 == uc);
- ligatures.append(l.at(i).u1);
- ligatures.append(l.at(i).ligature);
+ for (int j = 0; j < l.size(); ++j) {
+ Q_ASSERT(l.at(j).u2 == uc);
+ ligatures.append(l.at(j).u1);
+ ligatures.append(l.at(j).ligature);
}
+ b.decompositionPositions.append(tableIndex);
tableIndex += 2*l.size() + 1;
} else {
b.decompositionPositions.append(0xffff);
@@ -2450,12 +2481,11 @@ static QByteArray createLigatureInfo()
qDebug(" %d unique blocks in BMP.", blocks.size());
qDebug(" block data uses: %d bytes", bmp_block_data);
qDebug(" trie data uses : %d bytes", bmp_trie);
- qDebug(" ligature data uses : %d bytes", ligatures.size()*2);
- qDebug(" memory usage: %d bytes", bmp_mem + ligatures.size() * 2);
+ qDebug("\n ligature data uses : %d bytes", ligatures.size()*2);
+ qDebug(" memory usage: %d bytes", bmp_mem + ligatures.size() * 2);
QByteArray out;
-
out += "static const unsigned short uc_ligature_trie[] = {\n";
// first write the map
@@ -2531,6 +2561,7 @@ QByteArray createCasingInfo()
return out;
}
+
int main(int, char **)
{
initAgeMap();
@@ -2650,14 +2681,14 @@ int main(int, char **)
f.write("\n");
f.write(scriptEnumDeclaration);
f.write("\n");
- f.write(lineBreakClass);
- f.write("\n");
f.write(grapheme_break_string);
f.write("\n");
f.write(word_break_string);
f.write("\n");
f.write(sentence_break_string);
f.write("\n");
+ f.write(lineBreakClass);
+ f.write("\n");
f.write(methods);
f.write("} // namespace QUnicodeTables\n\n"
"QT_END_NAMESPACE\n\n"