1 files changed, 121 insertions, 90 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index f2ebe7c886..cfe5956ace 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -403,7 +403,7 @@ struct PropertyFlags {
     // from DerivedAge.txt
     QChar::UnicodeVersion age : 4;
     int digitValue;
-    uint line_break_class : 6;
+    LineBreakClass line_break_class;
 
     int mirrorDiff : 16;
 
@@ -429,7 +429,7 @@ static int appendToSpecialCaseMap(const QList<int> &map)
     QList<int> utf16map;
     for (int i = 0; i < map.size(); ++i) {
         int val = map.at(i);
-        if (val > 0xffff) {
+        if (val >= 0x10000) {
             utf16map << QChar::highSurrogate(val);
             utf16map << QChar::lowSurrogate(val);
         } else {
@@ -505,7 +505,7 @@ struct UnicodeData {
     // from BidiMirroring.txt
     int mirroredChar;
 
-    // CompositionExclusions.txt
+    // DerivedNormalizationProps.txt
     bool excludedComposition;
 
     // computed position of unicode property set
@@ -726,8 +726,8 @@ static void readUnicodeData()
         data.p.category = categoryMap.value(properties[UD_Category], QChar::NoCategory);
         if (data.p.category == QChar::NoCategory)
             qFatal("unassigned char category: %s", properties[UD_Category].constData());
-        data.p.combiningClass = properties[UD_CombiningClass].toInt();
 
+        data.p.combiningClass = properties[UD_CombiningClass].toInt();
         if (!combiningClassUsage.contains(data.p.combiningClass))
             combiningClassUsage[data.p.combiningClass] = 1;
         else
@@ -738,27 +738,29 @@ static void readUnicodeData()
         if (!properties[UD_UpperCase].isEmpty()) {
             int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
             Q_ASSERT(ok);
-            if (qAbs(upperCase - codepoint) >= (1<<14))
+            int diff = upperCase - codepoint;
+            if (qAbs(diff) >= (1<<14))
                 qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << ")";
-            data.p.upperCaseDiff = upperCase - codepoint;
-            maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(data.p.upperCaseDiff));
-            if (codepoint > 0xffff) {
-                // if the condition below doesn't hold anymore we need to modify our case folding code
-                //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
+            data.p.upperCaseDiff = diff;
+            maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff));
+            if (codepoint >= 0x10000 || upperCase >= 0x10000) {
+                // if the conditions below doesn't hold anymore we need to modify our upper casing code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
+                Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase));
             }
         }
         if (!properties[UD_LowerCase].isEmpty()) {
             int lowerCase = properties[UD_LowerCase].toInt(&ok, 16);
             Q_ASSERT(ok);
-            if (qAbs(lowerCase - codepoint) >= (1<<14))
+            int diff = lowerCase - codepoint;
+            if (qAbs(diff) >= (1<<14))
                 qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << ")";
-            data.p.lowerCaseDiff = lowerCase - codepoint;
-            maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(data.p.lowerCaseDiff));
-            if (codepoint > 0xffff) {
-                // if the condition below doesn't hold anymore we need to modify our case folding code
-                //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
+            data.p.lowerCaseDiff = diff;
+            maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff));
+            if (codepoint >= 0x10000 || lowerCase >= 0x10000) {
+                // if the conditions below doesn't hold anymore we need to modify our lower casing code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase));
+                Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(lowerCase));
             }
         }
         // we want toTitleCase to map to ToUpper in case we don't have any titlecase.
@@ -767,14 +769,15 @@ static void readUnicodeData()
         if (!properties[UD_TitleCase].isEmpty()) {
             int titleCase = properties[UD_TitleCase].toInt(&ok, 16);
             Q_ASSERT(ok);
-            if (qAbs(titleCase - codepoint) >= (1<<14))
+            int diff = titleCase - codepoint;
+            if (qAbs(diff) >= (1<<14))
                 qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << ")";
-            data.p.titleCaseDiff = titleCase - codepoint;
-            maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(data.p.titleCaseDiff));
-            if (codepoint > 0xffff) {
-                // if the condition below doesn't hold anymore we need to modify our case folding code
-                //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
+            data.p.titleCaseDiff = diff;
+            maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff));
+            if (codepoint >= 0x10000 || titleCase >= 0x10000) {
+                // if the conditions below doesn't hold anymore we need to modify our title casing code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase));
+                Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase));
             }
         }
 
@@ -1003,16 +1006,16 @@ static void readDerivedNormalizationProps()
             && d.decomposition.size() > 1) {
             Q_ASSERT(d.decomposition.size() == 2);
 
-            uint part1 = d.decomposition.at(0);
-            uint part2 = d.decomposition.at(1);
+            int part1 = d.decomposition.at(0);
+            int part2 = d.decomposition.at(1);
 
             // all non-starters are listed in DerivedNormalizationProps.txt
             // and already excluded from composition
             Q_ASSERT(unicodeData.value(part1, UnicodeData(part1)).p.combiningClass == 0);
 
             ++numLigatures;
-            highestLigature = qMax(highestLigature, (int)part1);
-            Ligature l = {(ushort)part1, (ushort)part2, codepoint};
+            highestLigature = qMax(highestLigature, part1);
+            Ligature l = {(ushort)part1, (ushort)part2, (ushort)codepoint};
             ligatureHashes[part2].append(l);
         }
     }
@@ -1109,6 +1112,7 @@ static void computeUniqueProperties()
 
 static void readLineBreak()
 {
+    qDebug() << "Reading LineBreak.txt";
     QFile f("data/LineBreak.txt");
     if (!f.exists())
         qFatal("Couldn't find LineBreak.txt");
@@ -1145,7 +1149,7 @@ static void readLineBreak()
             Q_ASSERT(ok);
         }
 
-        LineBreakClass lb = line_break_map.value(l[1].trimmed(), LineBreak_Unassigned);
+        LineBreakClass lb = line_break_map.value(l[1], LineBreak_Unassigned);
         if (lb == LineBreak_Unassigned)
             qFatal("unassigned line break class: %s", l[1].constData());
 
@@ -1190,7 +1194,10 @@ static void readSpecialCasing()
         bool ok;
         int codepoint = l[0].trimmed().toInt(&ok, 16);
         Q_ASSERT(ok);
-        Q_ASSERT(codepoint <= 0xffff);
+
+        // if the condition below doesn't hold anymore we need to modify our
+        // lower/upper/title casing code and case folding code
+        Q_ASSERT(codepoint < 0x10000);
 
 //         qDebug() << "codepoint" << hex << codepoint;
 //         qDebug() << line;
@@ -1290,16 +1297,18 @@ static void readCaseFolding()
 
         UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
         if (foldMap.size() == 1) {
-            if (qAbs(foldMap.at(0) - codepoint) >= (1<<14))
-                qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << foldMap.at(0) << ")";
-            ud.p.caseFoldDiff = foldMap.at(0) - codepoint;
-            maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(ud.p.caseFoldDiff));
-            if (codepoint > 0xffff) {
-                // if the condition below doesn't hold anymore we need to modify our case folding code
-                //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
-                Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(foldMap.at(0)));
+            int caseFolded = foldMap.at(0);
+            int diff = caseFolded - codepoint;
+            if (qAbs(diff) >= (1<<14))
+                qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << ")";
+            ud.p.caseFoldDiff = diff;
+            maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff));
+            if (codepoint >= 0x10000 || caseFolded >= 0x10000) {
+                // if the conditions below doesn't hold anymore we need to modify our case folding code
+                Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));
+                Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded));
             }
-            if (foldMap.at(0) != codepoint + ud.p.lowerCaseDiff)
+            if (caseFolded != codepoint + ud.p.lowerCaseDiff)
                 qDebug() << hex << codepoint;
         } else {
             qFatal("we currently don't support full case foldings");
@@ -1329,13 +1338,15 @@ static void readGraphemeBreak()
         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
+        line.replace(" ", "");
 
         if (line.isEmpty())
             continue;
 
         QList<QByteArray> l = line.split(';');
+        Q_ASSERT(l.size() == 2);
 
-        QByteArray codes = l[0].trimmed();
+        QByteArray codes = l[0];
         codes.replace("..", ".");
         QList<QByteArray> cl = codes.split('.');
 
@@ -1348,7 +1359,7 @@ static void readGraphemeBreak()
             Q_ASSERT(ok);
         }
 
-        GraphemeBreak brk = grapheme_break_map.value(l[1].trimmed(), GraphemeBreak_Unassigned);
+        GraphemeBreak brk = grapheme_break_map.value(l[1], GraphemeBreak_Unassigned);
         if (brk == GraphemeBreak_Unassigned)
             qFatal("unassigned grapheme break class: %s", l[1].constData());
 
@@ -1378,13 +1389,15 @@ static void readWordBreak()
         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
+        line.replace(" ", "");
 
         if (line.isEmpty())
             continue;
 
         QList<QByteArray> l = line.split(';');
+        Q_ASSERT(l.size() == 2);
 
-        QByteArray codes = l[0].trimmed();
+        QByteArray codes = l[0];
         codes.replace("..", ".");
         QList<QByteArray> cl = codes.split('.');
 
@@ -1397,7 +1410,7 @@ static void readWordBreak()
             Q_ASSERT(ok);
         }
 
-        WordBreak brk = word_break_map.value(l[1].trimmed(), WordBreak_Unassigned);
+        WordBreak brk = word_break_map.value(l[1], WordBreak_Unassigned);
         if (brk == WordBreak_Unassigned)
             qFatal("unassigned word break class: %s", l[1].constData());
 
@@ -1427,13 +1440,15 @@ static void readSentenceBreak()
         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
+        line.replace(" ", "");
 
         if (line.isEmpty())
             continue;
 
         QList<QByteArray> l = line.split(';');
+        Q_ASSERT(l.size() == 2);
 
-        QByteArray codes = l[0].trimmed();
+        QByteArray codes = l[0];
         codes.replace("..", ".");
         QList<QByteArray> cl = codes.split('.');
 
@@ -1446,7 +1461,7 @@ static void readSentenceBreak()
             Q_ASSERT(ok);
         }
 
-        SentenceBreak brk = sentence_break_map.value(l[1].trimmed(), SentenceBreak_Unassigned);
+        SentenceBreak brk = sentence_break_map.value(l[1], SentenceBreak_Unassigned);
         if (brk == SentenceBreak_Unassigned)
             qFatal("unassigned sentence break class: %s", l[1].constData());
 
@@ -1624,17 +1639,22 @@ static void readBlocks()
         QByteArray blockName = line.mid(semicolon + 1);
 
         int blockIndex = blockNames.indexOf(blockName);
-        if (blockIndex < 0) {
+        if (blockIndex == -1) {
+            blockIndex = blockNames.size();
             blockNames.append(blockName);
-            blockIndex = blockNames.indexOf(blockName);
-            Q_ASSERT(blockIndex >= 0);
         }
 
-        int dotdot = codePoints.indexOf("..");
-        Q_ASSERT(dotdot >= 0);
-        bool unused;
-        int first = codePoints.left(dotdot).toInt(&unused, 16);
-        int last = codePoints.mid(dotdot + 2).toInt(&unused, 16);
+        codePoints.replace("..", ".");
+        QList<QByteArray> cl = codePoints.split('.');
+
+        bool ok;
+        int first = cl[0].toInt(&ok, 16);
+        Q_ASSERT(ok);
+        int last = first;
+        if (cl.size() == 2) {
+            last = cl[1].toInt(&ok, 16);
+            Q_ASSERT(ok);
+        }
 
         BlockInfo blockInfo = { blockIndex, first, last };
         blockInfoList.append(blockInfo);
@@ -1670,7 +1690,6 @@ static void readScripts()
         if (!f.exists())
             qFatal("Couldn't find %s", files[i]);
 
-
         f.open(QFile::ReadOnly);
 
         while (!f.atEnd()) {
@@ -1693,28 +1712,25 @@ static void readScripts()
             QByteArray scriptName = line.mid(semicolon + 1);
 
             int scriptIndex = scriptNames.indexOf(scriptName);
-            if (scriptIndex < 0) {
+            if (scriptIndex == -1) {
+                scriptIndex = scriptNames.size();
                 scriptNames.append(scriptName);
-                scriptIndex = scriptNames.indexOf(scriptName);
-                Q_ASSERT(scriptIndex >= 0);
             }
 
-            int dotdot = codePoints.indexOf("..");
-            bool unused;
-            int first = -1, last = -1;
-            if (dotdot >= 0) {
-                first = codePoints.left(dotdot).toInt(&unused, 16);
-                last = codePoints.mid(dotdot + 2).toInt(&unused, 16);
-            } else {
-                first = codePoints.toInt(&unused, 16);
-            }
+            codePoints.replace("..", ".");
+            QList<QByteArray> cl = codePoints.split('.');
 
-            if (last != -1) {
-                for (int i = first; i <= last; ++i)
-                    scriptAssignment[i] = scriptIndex;
-            } else {
-                scriptAssignment[first] = scriptIndex;
+            bool ok;
+            int first = cl[0].toInt(&ok, 16);
+            Q_ASSERT(ok);
+            int last = first;
+            if (cl.size() == 2) {
+                last = cl[1].toInt(&ok, 16);
+                Q_ASSERT(ok);
             }
+
+            for (int i = first; i <= last; ++i)
+                scriptAssignment[i] = scriptIndex;
         }
     }
 }
@@ -1849,21 +1865,18 @@ QByteArray createScriptTableDeclaration()
             declaration += ", /* U+";
             declaration += QByteArray::number(block, 16).rightJustified(4, '0');
             declaration += '-';
-            declaration +=
-                QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
+            declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
             declaration += " */\n";
         } else {
             const int value = extraBlockList.size() + scriptSentinel;
-            const int offset =
-                ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
+            const int offset = ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
 
             declaration += "    ";
             declaration += QByteArray::number(value);
             declaration += ", /* U+";
             declaration += QByteArray::number(block, 16).rightJustified(4, '0');
             declaration += '-';
-            declaration +=
-                QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
+            declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
             declaration += " at offset ";
             declaration += QByteArray::number(offset);
             declaration += " */\n";
@@ -1880,16 +1893,14 @@ QByteArray createScriptTableDeclaration()
 
     for (int i = 0; i < extraBlockList.size(); ++i) {
         const int value = i + scriptSentinel;
-        const int offset =
-            ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
+        const int offset = ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
         const ExtraBlock &extraBlock = extraBlockList.at(i);
         const int block = extraBlock.block;
 
         declaration += "\n\n    /* U+";
         declaration += QByteArray::number(block, 16).rightJustified(4, '0');
         declaration += '-';
-        declaration +=
-            QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
+        declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
         declaration += " at offset ";
         declaration += QByteArray::number(offset);
         declaration += " */\n    ";
@@ -1905,9 +1916,24 @@ QByteArray createScriptTableDeclaration()
             else
                 declaration += ' ';
         }
+        if (declaration.endsWith(' '))
+            declaration.chop(1);
     }
     declaration += "\n};\n\n} // namespace QUnicodeTables\n\n";
 
+    declaration += 
+            "Q_CORE_EXPORT int QT_FASTCALL QUnicodeTables::script(uint ucs4)\n"
+            "{\n"
+            "    if (ucs4 > 0xffff)\n"
+            "        return Common;\n"
+            "    int script = uc_scripts[ucs4 >> 7];\n"
+            "    if (script < ScriptSentinel)\n"
+            "        return script;\n"
+            "    script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount);\n"
+            "    script = uc_scripts[script + (ucs4 & 0x7f)];\n"
+            "    return script;\n"
+            "}\n\n";
+
     qDebug("createScriptTableDeclaration: table size is %d bytes",
            unicodeBlockCount + (extraBlockList.size() * unicodeBlockSize));
 
@@ -2168,6 +2194,11 @@ static QByteArray createPropertyInfo()
            "    return uc_properties + index;\n"
            "}\n\n";
 
+    out += "Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)\n"
+           "{\n"
+           "    return (QUnicodeTables::LineBreakClass)qGetProp(ucs4)->line_break_class;\n"
+           "}\n\n";
+
     out += "static const ushort specialCaseMap[] = {\n   ";
     for (int i = 0; i < specialCaseMap.size(); ++i) {
         out += QByteArray(" 0x") + QByteArray::number(specialCaseMap.at(i), 16);
@@ -2204,7 +2235,7 @@ static QByteArray createCompositionInfo()
     const int SMP_BLOCKSIZE = 256;
     const int SMP_SHIFT = 8;
 
-    if(SMP_END <= highestComposedCharacter)
+    if (SMP_END <= highestComposedCharacter)
         qFatal("end of table smaller than highest composed character at %x", highestComposedCharacter);
 
     QList<DecompositionBlock> blocks;
@@ -2417,15 +2448,15 @@ static QByteArray createLigatureInfo()
             int uc = block*BMP_BLOCKSIZE + i;
             QList<Ligature> l = ligatureHashes.value(uc);
             if (!l.isEmpty()) {
-                b.decompositionPositions.append(tableIndex);
                 qSort(l);
 
                 ligatures.append(l.size());
-                for (int i = 0; i < l.size(); ++i) {
-                    Q_ASSERT(l.at(i).u2 == uc);
-                    ligatures.append(l.at(i).u1);
-                    ligatures.append(l.at(i).ligature);
+                for (int j = 0; j < l.size(); ++j) {
+                    Q_ASSERT(l.at(j).u2 == uc);
+                    ligatures.append(l.at(j).u1);
+                    ligatures.append(l.at(j).ligature);
                 }
+                b.decompositionPositions.append(tableIndex);
                 tableIndex += 2*l.size() + 1;
             } else {
                 b.decompositionPositions.append(0xffff);
@@ -2450,12 +2481,11 @@ static QByteArray createLigatureInfo()
     qDebug("    %d unique blocks in BMP.", blocks.size());
     qDebug("        block data uses: %d bytes", bmp_block_data);
     qDebug("        trie data uses : %d bytes", bmp_trie);
-    qDebug("        ligature data uses : %d bytes", ligatures.size()*2);
-    qDebug("        memory usage: %d bytes", bmp_mem + ligatures.size() * 2);
+    qDebug("\n        ligature data uses : %d bytes", ligatures.size()*2);
+    qDebug("    memory usage: %d bytes", bmp_mem + ligatures.size() * 2);
 
     QByteArray out;
 
-
     out += "static const unsigned short uc_ligature_trie[] = {\n";
 
     // first write the map
@@ -2531,6 +2561,7 @@ QByteArray createCasingInfo()
     return out;
 }
 
+
 int main(int, char **)
 {
     initAgeMap();
@@ -2650,14 +2681,14 @@ int main(int, char **)
     f.write("\n");
     f.write(scriptEnumDeclaration);
     f.write("\n");
-    f.write(lineBreakClass);
-    f.write("\n");
     f.write(grapheme_break_string);
     f.write("\n");
     f.write(word_break_string);
     f.write("\n");
     f.write(sentence_break_string);
     f.write("\n");
+    f.write(lineBreakClass);
+    f.write("\n");
     f.write(methods);
     f.write("} // namespace QUnicodeTables\n\n"
             "QT_END_NAMESPACE\n\n"