3 files changed, 1495 insertions, 40 deletions
diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp
index 76072f8282..819d8a9c3b 100644
--- a/src/corelib/text/qunicodetools.cpp
+++ b/src/corelib/text/qunicodetools.cpp
@@ -1,6 +1,6 @@
 /****************************************************************************
 **
-** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2020 The Qt Company Ltd.
 ** Contact: https://www.qt.io/licensing/
 **
 ** This file is part of the QtCore module of the Qt Toolkit.
@@ -41,8 +41,7 @@
 
 #include "qunicodetables_p.h"
 #include "qvarlengtharray.h"
-
-#include "qharfbuzz_p.h"
+#include "qlibrary.h"
 
 #define FLAG(x) (1 << (x))
 
@@ -724,6 +723,1493 @@ static void getWhiteSpaces(const ushort *string, quint32 len, QCharAttributes *a
     }
 }
 
+namespace Tailored {
+
+using CharAttributeFunction = void (*)(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes);
+
+
+enum Form {
+    Invalid = 0x0,
+    UnknownForm = Invalid,
+    Consonant,
+    Nukta,
+    Halant,
+    Matra,
+    VowelMark,
+    StressMark,
+    IndependentVowel,
+    LengthMark,
+    Control,
+    Other
+};
+
+static const unsigned char indicForms[0xe00-0x900] = {
+    // Devangari
+    Invalid, VowelMark, VowelMark, VowelMark,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, UnknownForm, UnknownForm,
+    Nukta, Other, Matra, Matra,
+
+    Matra, Matra, Matra, Matra,
+    Matra, Matra, Matra, Matra,
+    Matra, Matra, Matra, Matra,
+    Matra, Halant, UnknownForm, UnknownForm,
+
+    Other, StressMark, StressMark, StressMark,
+    StressMark, UnknownForm, UnknownForm, UnknownForm,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Consonant,
+    Consonant, Consonant /* ??? */, Consonant, Consonant,
+
+    // Bengali
+    Invalid, VowelMark, VowelMark, VowelMark,
+    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, Invalid, Invalid, IndependentVowel,
+
+    IndependentVowel, Invalid, Invalid, IndependentVowel,
+    IndependentVowel, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Invalid, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Invalid, Consonant, Invalid,
+    Invalid, Invalid, Consonant, Consonant,
+    Consonant, Consonant, UnknownForm, UnknownForm,
+    Nukta, Other, Matra, Matra,
+
+    Matra, Matra, Matra, Matra,
+    Matra, Invalid, Invalid, Matra,
+    Matra, Invalid, Invalid, Matra,
+    Matra, Halant, Consonant, UnknownForm,
+
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Invalid, VowelMark,
+    Invalid, Invalid, Invalid, Invalid,
+    Consonant, Consonant, Invalid, Consonant,
+
+    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    Consonant, Consonant, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    // Gurmukhi
+    Invalid, VowelMark, VowelMark, VowelMark,
+    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
+    Invalid, Invalid, Invalid, IndependentVowel,
+
+    IndependentVowel, Invalid, Invalid, IndependentVowel,
+    IndependentVowel, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Invalid, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Invalid, Consonant, Consonant,
+    Invalid, Consonant, Consonant, Invalid,
+    Consonant, Consonant, UnknownForm, UnknownForm,
+    Nukta, Other, Matra, Matra,
+
+    Matra, Matra, Matra, Invalid,
+    Invalid, Invalid, Invalid, Matra,
+    Matra, Invalid, Invalid, Matra,
+    Matra, Halant, UnknownForm, UnknownForm,
+
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, UnknownForm, UnknownForm, UnknownForm,
+    Invalid, Consonant, Consonant, Consonant,
+    Consonant, Invalid, Consonant, Invalid,
+
+    Other, Other, Invalid, Invalid,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    StressMark, StressMark, Consonant, Consonant,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    // Gujarati
+    Invalid, VowelMark, VowelMark, VowelMark,
+    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
+
+    IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
+    IndependentVowel, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Invalid, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Invalid, Consonant, Consonant,
+    Invalid, Consonant, Consonant, Consonant,
+    Consonant, Consonant, UnknownForm, UnknownForm,
+    Nukta, Other, Matra, Matra,
+
+    Matra, Matra, Matra, Matra,
+    Matra, Matra, Invalid, Matra,
+    Matra, Matra, Invalid, Matra,
+    Matra, Halant, UnknownForm, UnknownForm,
+
+    Other, UnknownForm, UnknownForm, UnknownForm,
+    UnknownForm, UnknownForm, UnknownForm, UnknownForm,
+    UnknownForm, UnknownForm, UnknownForm, UnknownForm,
+    UnknownForm, UnknownForm, UnknownForm, UnknownForm,
+
+    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    // Oriya
+    Invalid, VowelMark, VowelMark, VowelMark,
+    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, Invalid, Invalid, IndependentVowel,
+
+    IndependentVowel, Invalid, Invalid, IndependentVowel,
+    IndependentVowel, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Invalid, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Invalid, Consonant, Consonant,
+    Invalid, Consonant, Consonant, Consonant,
+    Consonant, Consonant, UnknownForm, UnknownForm,
+    Nukta, Other, Matra, Matra,
+
+    Matra, Matra, Matra, Matra,
+    Invalid, Invalid, Invalid, Matra,
+    Matra, Invalid, Invalid, Matra,
+    Matra, Halant, UnknownForm, UnknownForm,
+
+    Other, Invalid, Invalid, Invalid,
+    Invalid, UnknownForm, LengthMark, LengthMark,
+    Invalid, Invalid, Invalid, Invalid,
+    Consonant, Consonant, Invalid, Consonant,
+
+    IndependentVowel, IndependentVowel, Invalid, Invalid,
+    Invalid, Invalid, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    Other, Consonant, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    //Tamil
+    Invalid, Invalid, VowelMark, Other,
+    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
+    Invalid, Invalid, IndependentVowel, IndependentVowel,
+
+    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+    IndependentVowel, Consonant, Invalid, Invalid,
+    Invalid, Consonant, Consonant, Invalid,
+    Consonant, Invalid, Consonant, Consonant,
+
+    Invalid, Invalid, Invalid, Consonant,
+    Consonant, Invalid, Invalid, Invalid,
+    Consonant, Consonant, Consonant, Invalid,
+    Invalid, Invalid, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, UnknownForm, UnknownForm,
+    Invalid, Invalid, Matra, Matra,
+
+    Matra, Matra, Matra, Invalid,
+    Invalid, Invalid, Matra, Matra,
+    Matra, Invalid, Matra, Matra,
+    Matra, Halant, Invalid, Invalid,
+
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Invalid, LengthMark,
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Invalid, Invalid,
+
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    // Telugu
+    Invalid, VowelMark, VowelMark, VowelMark,
+    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+
+    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+    IndependentVowel, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Invalid, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Invalid, Consonant, Consonant, Consonant,
+    Consonant, Consonant, UnknownForm, UnknownForm,
+    Invalid, Invalid, Matra, Matra,
+
+    Matra, Matra, Matra, Matra,
+    Matra, Invalid, Matra, Matra,
+    Matra, Invalid, Matra, Matra,
+    Matra, Halant, Invalid, Invalid,
+
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, LengthMark, Matra, Invalid,
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Invalid, Invalid,
+
+    IndependentVowel, IndependentVowel, Invalid, Invalid,
+    Invalid, Invalid, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    // Kannada
+    Invalid, Invalid, VowelMark, VowelMark,
+    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+
+    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+    IndependentVowel, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Invalid, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Invalid, Consonant, Consonant, Consonant,
+    Consonant, Consonant, UnknownForm, UnknownForm,
+    Nukta, Other, Matra, Matra,
+
+    Matra, Matra, Matra, Matra,
+    Matra, Invalid, Matra, Matra,
+    Matra, Invalid, Matra, Matra,
+    Matra, Halant, Invalid, Invalid,
+
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, LengthMark, LengthMark, Invalid,
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Consonant, Invalid,
+
+    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
+    Invalid, Invalid, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    // Malayalam
+    Invalid, Invalid, VowelMark, VowelMark,
+    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+
+    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+    IndependentVowel, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Invalid, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, UnknownForm, UnknownForm,
+    Invalid, Invalid, Matra, Matra,
+
+    Matra, Matra, Matra, Matra,
+    Invalid, Invalid, Matra, Matra,
+    Matra, Invalid, Matra, Matra,
+    Matra, Halant, Invalid, Invalid,
+
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Invalid, Matra,
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Invalid, Invalid,
+
+    IndependentVowel, IndependentVowel, Invalid, Invalid,
+    Invalid, Invalid, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+
+    // Sinhala
+    Invalid, Invalid, VowelMark, VowelMark,
+    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+
+    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+    IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
+    Invalid, Invalid, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+
+    Consonant, Consonant, Invalid, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Consonant,
+    Invalid, Consonant, Invalid, Invalid,
+
+    Consonant, Consonant, Consonant, Consonant,
+    Consonant, Consonant, Consonant, Invalid,
+    Invalid, Invalid, Halant, Invalid,
+    Invalid, Invalid, Invalid, Matra,
+
+    Matra, Matra, Matra, Matra,
+    Matra, Invalid, Matra, Invalid,
+    Matra, Matra, Matra, Matra,
+    Matra, Matra, Matra, Matra,
+
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Invalid, Invalid,
+    Invalid, Invalid, Invalid, Invalid,
+
+    Invalid, Invalid, Matra, Matra,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+    Other, Other, Other, Other,
+};
+
+static inline Form form(unsigned short uc) {
+    if (uc < 0x900 || uc > 0xdff) {
+        if (uc == 0x25cc)
+            return Consonant;
+        if (uc == 0x200c || uc == 0x200d)
+            return Control;
+        return Other;
+    }
+    return (Form)indicForms[uc-0x900];
+}
+
+// #define INDIC_DEBUG
+#ifdef INDIC_DEBUG
+#define IDEBUG qDebug
+#else
+#define IDEBUG if constexpr (1) ; else qDebug
+#endif
+
+/* syllables are of the form:
+
+   (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark?
+   (Consonant Nukta? Halant)* Consonant Halant
+   IndependentVowel VowelMark? StressMark?
+
+   We return syllable boundaries on invalid combinations aswell
+*/
+static int indic_nextSyllableBoundary(QChar::Script script, const ushort *s, int start, int end, bool *invalid)
+{
+    *invalid = false;
+    IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
+    const ushort *uc = s+start;
+
+    int pos = 0;
+    Form state = form(uc[pos]);
+    IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);
+    pos++;
+
+    if (state != Consonant && state != IndependentVowel) {
+        if (state != Other)
+            *invalid = true;
+        goto finish;
+    }
+
+    while (pos < end - start) {
+        Form newState = form(uc[pos]);
+        IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]);
+        switch (newState) {
+        case Control:
+            newState = state;
+        if (state == Halant && uc[pos] == 0x200d /* ZWJ */)
+        break;
+            // the control character should be the last char in the item
+        if (state == Consonant && script == QChar::Script_Bengali && uc[pos-1] == 0x09B0 && uc[pos] == 0x200d /* ZWJ */)
+        break;
+        if (state == Consonant && script == QChar::Script_Kannada && uc[pos-1] == 0x0CB0 && uc[pos] == 0x200d /* ZWJ */)
+        break;
+            // Bengali and Kannada has a special exception for rendering yaphala with ra (to avoid reph) see http://www.unicode.org/faq/indic.html#15
+            ++pos;
+            goto finish;
+        case Consonant:
+        if (state == Halant && (script != QChar::Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */))
+                break;
+            goto finish;
+        case Halant:
+            if (state == Nukta || state == Consonant)
+                break;
+            // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya
+            if (script == QChar::Script_Bengali && pos == 1 &&
+                 (uc[0] == 0x0985 || uc[0] == 0x098f))
+                break;
+            // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra.
+            if (script == QChar::Script_Sinhala && state == Matra) {
+                ++pos;
+                continue;
+            }
+            if (script == QChar::Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) {
+                ++pos;
+                continue;
+            }
+            goto finish;
+        case Nukta:
+            if (state == Consonant)
+                break;
+            goto finish;
+        case StressMark:
+            if (state == VowelMark)
+                break;
+            // fall through
+        case VowelMark:
+            if (state == Matra || state == LengthMark || state == IndependentVowel)
+                break;
+            // fall through
+        case Matra:
+            if (state == Consonant || state == Nukta)
+                break;
+            if (state == Matra) {
+                // ### needs proper testing for correct two/three part matras
+                break;
+            }
+            // ### not sure if this is correct. If it is, does it apply only to Bengali or should
+            // it work for all Indic languages?
+            // the combination Independent_A + Vowel Sign AA is allowed.
+            if (script == QChar::Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985)
+                break;
+            if (script == QChar::Script_Tamil && state == Matra) {
+                if (uc[pos-1] == 0x0bc6 &&
+                     (uc[pos] == 0xbbe || uc[pos] == 0xbd7))
+                    break;
+                if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe)
+                    break;
+            }
+            goto finish;
+
+        case LengthMark:
+            if (state == Matra) {
+                // ### needs proper testing for correct two/three part matras
+                break;
+            }
+        case IndependentVowel:
+        case Invalid:
+        case Other:
+            goto finish;
+        }
+        state = newState;
+        pos++;
+    }
+ finish:
+    return pos+start;
+}
+
+static void indicAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+    int end = from + len;
+    const ushort *uc = text + from;
+    attributes += from;
+    uint i = 0;
+    while (i < len) {
+        bool invalid;
+        uint boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
+         attributes[i].graphemeBoundary = true;
+
+        if (boundary > len-1) boundary = len;
+        i++;
+        while (i < boundary) {
+            attributes[i].graphemeBoundary = false;
+            ++uc;
+            ++i;
+        }
+        assert(i == boundary);
+    }
+
+
+}
+
+#define LIBTHAI_MAJOR   0
+
+/*
+ * if libthai changed please update these codes too.
+ */
+struct thcell_t {
+    unsigned char base;      /**< base character */
+    unsigned char hilo;      /**< upper/lower vowel/diacritic */
+    unsigned char top;       /**< top-level mark */
+};
+typedef int (*th_brk_def) (const unsigned char*, int*, size_t);
+typedef size_t (*th_next_cell_def) (const unsigned char *, size_t, struct thcell_t *, int);
+
+/* libthai related function handles */
+static th_brk_def th_brk = 0;
+static th_next_cell_def th_next_cell = 0;
+
+static int init_libthai() {
+    static bool initialized = false;
+    if (!initialized && (!th_brk || !th_next_cell)) {
+        th_brk = (th_brk_def) QLibrary::resolve(QLatin1String("thai"), (int)LIBTHAI_MAJOR, "th_brk");
+        th_next_cell = (th_next_cell_def)QLibrary::resolve(QLatin1String("thai"), LIBTHAI_MAJOR, "th_next_cell");
+        initialized = true;
+    }
+    if (th_brk && th_next_cell)
+        return 1;
+    else
+        return 0;
+}
+
+static void to_tis620(const ushort *string, uint len, char *cstr)
+{
+    uint i;
+    unsigned char *result = (unsigned char *)cstr;
+
+    for (i = 0; i < len; ++i) {
+        if (string[i] <= 0xa0)
+            result[i] = (unsigned char)string[i];
+        else if (string[i] >= 0xe01 && string[i] <= 0xe5b)
+            result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
+        else
+            result[i] = (unsigned char)~0; // Same encoding as libthai uses for invalid chars
+    }
+
+    result[len] = 0;
+}
+
+/*
+ * Thai Attributes: computes Word Break, Word Boundary and Char stop for THAI.
+ */
+static void thaiAssignAttributes(const ushort *string, uint len, QCharAttributes *attributes)
+{
+    char s[128];
+    char *cstr = s;
+    int *break_positions = 0;
+    int brp[128];
+    int brp_size = 0;
+    uint numbreaks, i, j, cell_length;
+    struct thcell_t tis_cell;
+
+    if (!init_libthai())
+        return ;
+
+    if (len >= 128)
+        cstr = (char *)malloc(len*sizeof(char) + 1);
+
+    to_tis620(string, len, cstr);
+
+    for (i = 0; i < len; ++i) {
+        attributes[i].wordBreak = false;
+        attributes[i].wordStart = false;
+        attributes[i].wordEnd = false;
+        attributes[i].lineBreak = false;
+    }
+
+    if (len > 128) {
+        break_positions = (int*) malloc (sizeof(int) * len);
+        memset (break_positions, 0, sizeof(int) * len);
+        brp_size = len;
+    }
+    else {
+        break_positions = brp;
+        brp_size = 128;
+    }
+
+    if (break_positions) {
+        attributes[0].wordBreak = true;
+        attributes[0].wordStart = true;
+        attributes[0].wordEnd = false;
+        numbreaks = th_brk((const unsigned char *)cstr, break_positions, brp_size);
+        for (i = 0; i < numbreaks; ++i) {
+            attributes[break_positions[i]].wordBreak = true;
+            attributes[break_positions[i]].wordStart = true;
+            attributes[break_positions[i]].wordEnd = true;
+            attributes[break_positions[i]].lineBreak = true;
+        }
+        if (numbreaks > 0)
+            attributes[break_positions[numbreaks - 1]].wordStart = false;
+
+        if (break_positions != brp)
+            free(break_positions);
+    }
+
+    /* manage grapheme boundaries */
+    i = 0;
+    while (i < len) {
+        cell_length = (uint)(th_next_cell((const unsigned char *)cstr + i, len - i, &tis_cell, true));
+
+        attributes[i].graphemeBoundary = true;
+        for (j = 1; j < cell_length; j++)
+            attributes[i + j].graphemeBoundary = false;
+
+        /* Set graphemeBoundary for SARA AM */
+        if (cstr[i + cell_length - 1] == (char)0xd3)
+            attributes[i + cell_length - 1].graphemeBoundary = true;
+
+        i += cell_length;
+    }
+
+    if (len >= 128)
+        free(cstr);
+}
+
+static void thaiAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+    assert(script == QChar::Script_Thai);
+    const ushort *uc = text + from;
+    attributes += from;
+    Q_UNUSED(script);
+    thaiAssignAttributes(uc, len, attributes);
+}
+
+/*
+ tibetan syllables are of the form:
+    head position consonant
+    first sub-joined consonant
+    ....intermediate sub-joined consonants (if any)
+    last sub-joined consonant
+    sub-joined vowel (a-chung U+0F71)
+    standard or compound vowel sign (or 'virama' for devanagari transliteration)
+*/
+
+typedef enum {
+    TibetanOther,
+    TibetanHeadConsonant,
+    TibetanSubjoinedConsonant,
+    TibetanSubjoinedVowel,
+    TibetanVowel
+} TibetanForm;
+
+/* this table starts at U+0f40 */
+static const unsigned char tibetanForm[0x80] = {
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+    TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+    TibetanOther, TibetanOther, TibetanOther, TibetanOther,
+
+    TibetanOther, TibetanVowel, TibetanVowel, TibetanVowel,
+    TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+    TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+    TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+
+    TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+    TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+    TibetanOther, TibetanOther, TibetanOther, TibetanOther,
+    TibetanOther, TibetanOther, TibetanOther, TibetanOther,
+
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+    TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+    TibetanSubjoinedConsonant, TibetanOther, TibetanOther, TibetanOther
+};
+
+#define tibetan_form(c) \
+    ((c) >= 0x0f40 && (c) < 0x0fc0 ? (TibetanForm)tibetanForm[(c) - 0x0f40] : TibetanOther)
+
+static int tibetan_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid)
+{
+    const ushort *uc = s + start;
+
+    int pos = 0;
+    TibetanForm state = tibetan_form(*uc);
+
+/*     qDebug("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);*/
+    pos++;
+
+    if (state != TibetanHeadConsonant) {
+        if (state != TibetanOther)
+            *invalid = true;
+        goto finish;
+    }
+
+    while (pos < end - start) {
+        TibetanForm newState = tibetan_form(uc[pos]);
+        switch (newState) {
+        case TibetanSubjoinedConsonant:
+        case TibetanSubjoinedVowel:
+            if (state != TibetanHeadConsonant &&
+                 state != TibetanSubjoinedConsonant)
+                goto finish;
+            state = newState;
+            break;
+        case TibetanVowel:
+            if (state != TibetanHeadConsonant &&
+                 state != TibetanSubjoinedConsonant &&
+                 state != TibetanSubjoinedVowel)
+                goto finish;
+            break;
+        case TibetanOther:
+        case TibetanHeadConsonant:
+            goto finish;
+        }
+        pos++;
+    }
+
+finish:
+    *invalid = false;
+    return start+pos;
+}
+
+static void tibetanAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+    int end = from + len;
+    const ushort *uc = text + from;
+    uint i = 0;
+    Q_UNUSED(script);
+    attributes += from;
+    while (i < len) {
+        bool invalid;
+        uint boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from;
+
+        attributes[i].graphemeBoundary = true;
+
+        if (boundary > len-1) boundary = len;
+        i++;
+        while (i < boundary) {
+            attributes[i].graphemeBoundary = false;
+            ++uc;
+            ++i;
+        }
+        assert(i == boundary);
+    }
+}
+
+enum MymrCharClassValues {
+    Mymr_CC_RESERVED = 0,
+    Mymr_CC_CONSONANT = 1, /* Consonant of type 1, that has subscript form */
+    Mymr_CC_CONSONANT2 = 2, /* Consonant of type 2, that has no subscript form */
+    Mymr_CC_NGA = 3, /* Consonant NGA */
+    Mymr_CC_YA = 4, /* Consonant YA */
+    Mymr_CC_RA = 5, /* Consonant RA */
+    Mymr_CC_WA = 6, /* Consonant WA */
+    Mymr_CC_HA = 7, /* Consonant HA */
+    Mymr_CC_IND_VOWEL = 8, /* Independent vowel */
+    Mymr_CC_ZERO_WIDTH_NJ_MARK = 9, /* Zero Width non joiner character (0x200C) */
+    Mymr_CC_VIRAMA = 10, /* Subscript consonant combining character */
+    Mymr_CC_PRE_VOWEL = 11, /* Dependent vowel, prebase (Vowel e) */
+    Mymr_CC_BELOW_VOWEL = 12, /* Dependent vowel, prebase (Vowel u, uu) */
+    Mymr_CC_ABOVE_VOWEL = 13, /* Dependent vowel, prebase (Vowel i, ii, ai) */
+    Mymr_CC_POST_VOWEL = 14, /* Dependent vowel, prebase (Vowel aa) */
+    Mymr_CC_SIGN_ABOVE = 15,
+    Mymr_CC_SIGN_BELOW = 16,
+    Mymr_CC_SIGN_AFTER = 17,
+    Mymr_CC_ZERO_WIDTH_J_MARK = 18, /* Zero width joiner character */
+    Mymr_CC_COUNT = 19 /* This is the number of character classes */
+};
+
+enum MymrCharClassFlags {
+    Mymr_CF_CLASS_MASK = 0x0000FFFF,
+
+    Mymr_CF_CONSONANT = 0x01000000, /* flag to speed up comparing */
+    Mymr_CF_MEDIAL = 0x02000000, /* flag to speed up comparing */
+    Mymr_CF_IND_VOWEL = 0x04000000, /* flag to speed up comparing */
+    Mymr_CF_DEP_VOWEL = 0x08000000, /* flag to speed up comparing */
+    Mymr_CF_DOTTED_CIRCLE = 0x10000000, /* add a dotted circle if a character with this flag is the
+                                           first in a syllable */
+    Mymr_CF_VIRAMA = 0x20000000, /* flag to speed up comparing */
+
+    /* position flags */
+    Mymr_CF_POS_BEFORE = 0x00080000,
+    Mymr_CF_POS_BELOW = 0x00040000,
+    Mymr_CF_POS_ABOVE = 0x00020000,
+    Mymr_CF_POS_AFTER = 0x00010000,
+    Mymr_CF_POS_MASK = 0x000f0000,
+
+    Mymr_CF_AFTER_KINZI = 0x00100000
+};
+
+/* Characters that get refrered to by name */
+enum MymrChar
+{
+    Mymr_C_SIGN_ZWNJ     = 0x200C,
+    Mymr_C_SIGN_ZWJ      = 0x200D,
+    Mymr_C_DOTTED_CIRCLE = 0x25CC,
+    Mymr_C_RA            = 0x101B,
+    Mymr_C_YA            = 0x101A,
+    Mymr_C_NGA           = 0x1004,
+    Mymr_C_VOWEL_E       = 0x1031,
+    Mymr_C_VIRAMA        = 0x1039
+};
+
+enum
+{
+    Mymr_xx = Mymr_CC_RESERVED,
+    Mymr_c1 = Mymr_CC_CONSONANT | Mymr_CF_CONSONANT | Mymr_CF_POS_BELOW,
+    Mymr_c2 = Mymr_CC_CONSONANT2 | Mymr_CF_CONSONANT,
+    Mymr_ng = Mymr_CC_NGA | Mymr_CF_CONSONANT | Mymr_CF_POS_ABOVE,
+    Mymr_ya = Mymr_CC_YA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_AFTER | Mymr_CF_AFTER_KINZI,
+    Mymr_ra = Mymr_CC_RA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BEFORE,
+    Mymr_wa = Mymr_CC_WA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW,
+    Mymr_ha = Mymr_CC_HA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW,
+    Mymr_id = Mymr_CC_IND_VOWEL | Mymr_CF_IND_VOWEL,
+    Mymr_vi = Mymr_CC_VIRAMA | Mymr_CF_VIRAMA | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE,
+    Mymr_dl = Mymr_CC_PRE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BEFORE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
+    Mymr_db = Mymr_CC_BELOW_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
+    Mymr_da = Mymr_CC_ABOVE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
+    Mymr_dr = Mymr_CC_POST_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
+    Mymr_sa = Mymr_CC_SIGN_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_ABOVE | Mymr_CF_AFTER_KINZI,
+    Mymr_sb = Mymr_CC_SIGN_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_BELOW | Mymr_CF_AFTER_KINZI,
+    Mymr_sp = Mymr_CC_SIGN_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI
+};
+
+
+typedef int MymrCharClass;
+
+
+static const MymrCharClass mymrCharClasses[] =
+{
+    Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_ng, Mymr_c1, Mymr_c1, Mymr_c1,
+    Mymr_c1, Mymr_c1, Mymr_c2, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, /* 1000 - 100F */
+    Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1,
+    Mymr_c1, Mymr_c1, Mymr_ya, Mymr_ra, Mymr_c1, Mymr_wa, Mymr_c1, Mymr_ha, /* 1010 - 101F */
+    Mymr_c2, Mymr_c2, Mymr_xx, Mymr_id, Mymr_id, Mymr_id, Mymr_id, Mymr_id,
+    Mymr_xx, Mymr_id, Mymr_id, Mymr_xx, Mymr_dr, Mymr_da, Mymr_da, Mymr_db, /* 1020 - 102F */
+    Mymr_db, Mymr_dl, Mymr_da, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_sa, Mymr_sb,
+    Mymr_sp, Mymr_vi, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1030 - 103F */
+    Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx,
+    Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1040 - 104F */
+    Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx,
+    Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1050 - 105F */
+};
+
+static MymrCharClass
+getMyanmarCharClass (ushort ch)
+{
+    if (ch == Mymr_C_SIGN_ZWJ)
+        return Mymr_CC_ZERO_WIDTH_J_MARK;
+
+    if (ch == Mymr_C_SIGN_ZWNJ)
+        return Mymr_CC_ZERO_WIDTH_NJ_MARK;
+
+    if (ch < 0x1000 || ch > 0x105f)
+        return Mymr_CC_RESERVED;
+
+    return mymrCharClasses[ch - 0x1000];
+}
+
+static const signed char mymrStateTable[][Mymr_CC_COUNT] =
+{
+/*   xx  c1, c2  ng  ya  ra  wa  ha  id zwnj vi  dl  db  da  dr  sa  sb  sp zwj */
+    { 1,  4,  4,  2,  4,  4,  4,  4, 24,  1, 27, 17, 18, 19, 20, 21,  1,  1,  4}, /*  0 - ground state */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /*  1 - exit state (or sp to the right of the syllable) */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  3, 17, 18, 19, 20, 21, -1, -1,  4}, /*  2 - NGA */
+    {-1,  4,  4,  4,  4,  4,  4,  4, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /*  3 - Virama after NGA */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  5, 17, 18, 19, 20, 21,  1,  1, -1}, /*  4 - Base consonant */
+    {-2,  6, -2, -2,  7,  8,  9, 10, -2, 23, -2, -2, -2, -2, -2, -2, -2, -2, -2}, /*  5 - First virama */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 25, 17, 18, 19, 20, 21, -1, -1, -1}, /*  6 - c1 after virama */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, /*  7 - ya after virama */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, /*  8 - ra after virama */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, /*  9 - wa after virama */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, /* 10 - ha after virama */
+    {-1, -1, -1, -1,  7,  8,  9, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 11 - Virama after NGA+zwj */
+    {-2, -2, -2, -2, -2, -2, 13, 14, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, /* 12 - Second virama */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 18, 19, 20, 21, -1, -1, -1}, /* 13 - wa after virama */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, /* 14 - ha after virama */
+    {-2, -2, -2, -2, -2, -2, -2, 16, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, /* 15 - Third virama */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, /* 16 - ha after virama */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 20, 21,  1,  1, -1}, /* 17 - dl, Dependent vowel e */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, -1, 21,  1,  1, -1}, /* 18 - db, Dependent vowel u,uu */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1,  1,  1, -1}, /* 19 - da, Dependent vowel i,ii,ai */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1,  1,  1, -1}, /* 20 - dr, Dependent vowel aa */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1,  1, -1}, /* 21 - sa, Sign anusvara */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 22 - atha */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1,  1, -1}, /* 23 - zwnj for atha */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1}, /* 24 - Independent vowel */
+    {-2, -2, -2, -2, 26, 26, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, /* 25 - Virama after subscript consonant */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1,  1, -1}, /* 26 - ra/ya after subscript consonant + virama */
+    {-1,  6, -1, -1,  7,  8,  9, 10, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 27 - Virama after ground state */
+/* exit state -2 is for invalid order of medials and combination of invalids
+   with virama where virama should treat as start of next syllable
+ */
+};
+
+/*#define MYANMAR_DEBUG */
+#ifdef MYANMAR_DEBUG
+#define MMDEBUG qDebug
+#else
+#    define MMDEBUG                                                                                \
+        if (0)                                                                                     \
+        printf
+#endif
+
+/*
+//  Given an input string of characters and a location in which to start looking
+//  calculate, using the state table, which one is the last character of the syllable
+//  that starts in the starting position.
+*/
+static int myanmar_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid)
+{
+    const ushort *uc = s + start;
+    int state = 0;
+    int pos = start;
+    *invalid = false;
+
+    while (pos < end) {
+        MymrCharClass charClass = getMyanmarCharClass(*uc);
+        state = mymrStateTable[state][charClass & Mymr_CF_CLASS_MASK];
+        if (pos == start)
+            *invalid = (bool)(charClass & Mymr_CF_DOTTED_CIRCLE);
+
+        MMDEBUG("state[%d]=%d class=%8x (uc=%4x)", pos - start, state, charClass, *uc);
+
+        if (state < 0) {
+            if (state < -1)
+                --pos;
+            break;
+        }
+        ++uc;
+        ++pos;
+    }
+    return pos;
+}
+
+static void myanmarAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+    int end = from + len;
+    const ushort *uc = text + from;
+    uint i = 0;
+    Q_UNUSED(script);
+    attributes += from;
+    while (i < len) {
+    bool invalid;
+    uint boundary = myanmar_nextSyllableBoundary(text, from+i, end, &invalid) - from;
+
+    attributes[i].graphemeBoundary = true;
+    attributes[i].lineBreak = true;
+
+    if (boundary > len-1)
+            boundary = len;
+    i++;
+    while (i < boundary) {
+        attributes[i].graphemeBoundary = false;
+        ++uc;
+        ++i;
+    }
+    assert(i == boundary);
+    }
+}
+
+/*
+//  Vocabulary
+//      Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the
+//                      center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
+//                      split vowels, signs... but there is only one base in a syllable, it has to be coded as
+//                      the first character of the syllable.
+//      split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
+//                      Khmer language has five of them. Khmer split vowels either have one part before the
+//                      base and one after the base or they have a part before the base and a part above the base.
+//                      The first part of all Khmer split vowels is the same character, identical to
+//                      the glyph of Khmer dependent vowel SRA EI
+//      coeng -->  modifier used in Khmer to construct coeng (subscript) consonants
+//                 Differently than indian languages, the coeng modifies the consonant that follows it,
+//                 not the one preceding it  Each consonant has two forms, the base form and the subscript form
+//                 the base form is the normal one (using the consonants code-point), the subscript form is
+//                 displayed when the combination coeng + consonant is encountered.
+//      Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
+//      Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
+//      Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
+//      Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
+//                           if it is attached to a consonant of the first series or a consonant of the second series
+//                           Most consonants have an equivalent in the other series, but some of theme exist only in
+//                           one series (for example SA). If we want to use the consonant SA with a vowel sound that
+//                           can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
+//                           of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
+//                           x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
+//                           MUSIKATOAN a second series consonant to have a first series vowel sound.
+//                           Consonant shifter are both normally supercript marks, but, when they are followed by a
+//                           superscript, they change shape and take the form of subscript dependent vowel SRA U.
+//                           If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
+//                           should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
+//                           be placed after the coeng consonant.
+//      Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base
+//                           Each vowel has its own position. Only one vowel per syllable is allowed.
+//      Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
+//                           Allowed in a syllable.
+//
+//
+//   order is important here! This order must be the same that is found in each horizontal
+//   line in the statetable for Khmer (see khmerStateTable) .
+*/
+enum KhmerCharClassValues {
+    CC_RESERVED             =  0,
+    CC_CONSONANT            =  1, /* Consonant of type 1 or independent vowel */
+    CC_CONSONANT2           =  2, /* Consonant of type 2 */
+    CC_CONSONANT3           =  3, /* Consonant of type 3 */
+    CC_ZERO_WIDTH_NJ_MARK   =  4, /* Zero Width non joiner character (0x200C) */
+    CC_CONSONANT_SHIFTER    =  5,
+    CC_ROBAT                =  6, /* Khmer special diacritic accent -treated differently in state table */
+    CC_COENG                =  7, /* Subscript consonant combining character */
+    CC_DEPENDENT_VOWEL      =  8,
+    CC_SIGN_ABOVE           =  9,
+    CC_SIGN_AFTER           = 10,
+    CC_ZERO_WIDTH_J_MARK    = 11, /* Zero width joiner character */
+    CC_COUNT                = 12  /* This is the number of character classes */
+};
+
+
+enum KhmerCharClassFlags {
+    CF_CLASS_MASK    = 0x0000FFFF,
+
+    CF_CONSONANT     = 0x01000000,  /* flag to speed up comparing */
+    CF_SPLIT_VOWEL   = 0x02000000,  /* flag for a split vowel -> the first part is added in front of the syllable */
+    CF_DOTTED_CIRCLE = 0x04000000,  /* add a dotted circle if a character with this flag is the first in a syllable */
+    CF_COENG         = 0x08000000,  /* flag to speed up comparing */
+    CF_SHIFTER       = 0x10000000,  /* flag to speed up comparing */
+    CF_ABOVE_VOWEL   = 0x20000000,  /* flag to speed up comparing */
+
+    /* position flags */
+    CF_POS_BEFORE    = 0x00080000,
+    CF_POS_BELOW     = 0x00040000,
+    CF_POS_ABOVE     = 0x00020000,
+    CF_POS_AFTER     = 0x00010000,
+    CF_POS_MASK      = 0x000f0000
+};
+
+
+/* Characters that get referred to by name */
+enum KhmerChar {
+    C_SIGN_ZWNJ     = 0x200C,
+    C_SIGN_ZWJ      = 0x200D,
+    C_RO            = 0x179A,
+    C_VOWEL_AA      = 0x17B6,
+    C_SIGN_NIKAHIT  = 0x17C6,
+    C_VOWEL_E       = 0x17C1,
+    C_COENG         = 0x17D2
+};
+
+
+/*
+//  simple classes, they are used in the statetable (in this file) to control the length of a syllable
+//  they are also used to know where a character should be placed (location in reference to the base character)
+//  and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
+//  indicate error in syllable construction
+*/
+enum {
+    _xx = CC_RESERVED,
+    _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
+    _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,
+    _c1 = CC_CONSONANT | CF_CONSONANT,
+    _c2 = CC_CONSONANT2 | CF_CONSONANT,
+    _c3 = CC_CONSONANT3 | CF_CONSONANT,
+    _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,
+    _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,
+    _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,
+    _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,
+    _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,
+    _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,
+    _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,
+
+    /* split vowel */
+    _va = _da | CF_SPLIT_VOWEL,
+    _vr = _dr | CF_SPLIT_VOWEL
+};
+
+
+/*
+//   Character class: a character class value
+//   ORed with character class flags.
+*/
+typedef unsigned long KhmerCharClass;
+
+
+/*
+//  Character class tables
+//  _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
+//  _sa Sign placed above the base
+//  _sp Sign placed after the base
+//  _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
+//  _c2 Consonant of type 2 (only RO)
+//  _c3 Consonant of type 3
+//  _rb Khmer sign robat u17CC. combining mark for subscript consonants
+//  _cd Consonant-shifter
+//  _dl Dependent vowel placed before the base (left of the base)
+//  _db Dependent vowel placed below the base
+//  _da Dependent vowel placed above the base
+//  _dr Dependent vowel placed behind the base (right of the base)
+//  _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
+//      it to create a subscript consonant or independent vowel
+//  _va Khmer split vowel in which the first part is before the base and the second one above the base
+//  _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base
+*/
+static const KhmerCharClass khmerCharClasses[] = {
+    _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */
+    _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */
+    _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */
+    _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */
+    _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */
+    _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx  /* 17D0 - 17DF */
+};
+
+/* this enum must reflect the range of khmerCharClasses */
+enum KhmerCharClassesRange {
+    KhmerFirstChar = 0x1780,
+    KhmerLastChar  = 0x17df
+};
+
+/*
+//  Below we define how a character in the input string is either in the khmerCharClasses table
+//  (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
+//  within the syllable, but are not in the table) we also get their type back, or an unknown object
+//  in which case we get _xx (CC_RESERVED) back
+*/
+static KhmerCharClass getKhmerCharClass(ushort uc)
+{
+    if (uc == C_SIGN_ZWJ) {
+        return CC_ZERO_WIDTH_J_MARK;
+    }
+
+    if (uc == C_SIGN_ZWNJ) {
+        return CC_ZERO_WIDTH_NJ_MARK;
+    }
+
+    if (uc < KhmerFirstChar || uc > KhmerLastChar) {
+        return CC_RESERVED;
+    }
+
+    return khmerCharClasses[uc - KhmerFirstChar];
+}
+
+
+/*
+//  The stateTable is used to calculate the end (the length) of a well
+//  formed Khmer Syllable.
+//
+//  Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
+//  CharClassValues. This coincidence of values allows the follow up of the table.
+//
+//  Each line corresponds to a state, which does not necessarily need to be a type
+//  of component... for example, state 2 is a base, with is always a first character
+//  in the syllable, but the state could be produced a consonant of any type when
+//  it is the first character that is analysed (in ground state).
+//
+//  Differentiating 3 types of consonants is necessary in order to
+//  forbid the use of certain combinations, such as having a second
+//  coeng after a coeng RO,
+//  The inexistent possibility of having a type 3 after another type 3 is permitted,
+//  eliminating it would very much complicate the table, and it does not create typing
+//  problems, as the case above.
+//
+//  The table is quite complex, in order to limit the number of coeng consonants
+//  to 2 (by means of the table).
+//
+//  There a peculiarity, as far as Unicode is concerned:
+//  - The consonant-shifter is considered in two possible different
+//    locations, the one considered in Unicode 3.0 and the one considered in
+//    Unicode 4.0. (there is a backwards compatibility problem in this standard).
+//
+//
+//  xx    independent character, such as a number, punctuation sign or non-khmer char
+//
+//  c1    Khmer consonant of type 1 or an independent vowel
+//        that is, a letter in which the subscript for is only under the
+//        base, not taking any space to the right or to the left
+//
+//  c2    Khmer consonant of type 2, the coeng form takes space under
+//        and to the left of the base (only RO is of this type)
+//
+//  c3    Khmer consonant of type 3. Its subscript form takes space under
+//        and to the right of the base.
+//
+//  cs    Khmer consonant shifter
+//
+//  rb    Khmer robat
+//
+//  co    coeng character (u17D2)
+//
+//  dv    dependent vowel (including split vowels, they are treated in the same way).
+//        even if dv is not defined above, the component that is really tested for is
+//        KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
+//
+//  zwj   Zero Width joiner
+//
+//  zwnj  Zero width non joiner
+//
+//  sa    above sign
+//
+//  sp    post sign
+//
+//  there are lines with equal content but for an easier understanding
+//  (and maybe change in the future) we did not join them
+*/
+static const signed char khmerStateTable[][CC_COUNT] =
+{
+    /* xx  c1  c2  c3 zwnj cs  rb  co  dv  sa  sp zwj */
+    { 1,  2,  2,  2,  1,  1,  1,  6,  1,  1,  1,  2}, /*  0 - ground state */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /*  1 - exit state (or sign to the right of the syllable) */
+    {-1, -1, -1, -1,  3,  4,  5,  6, 16, 17,  1, -1}, /*  2 - Base consonant */
+    {-1, -1, -1, -1, -1,  4, -1, -1, 16, -1, -1, -1}, /*  3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */
+    {-1, -1, -1, -1, 15, -1, -1,  6, 16, 17,  1, 14}, /*  4 - First register shifter */
+    {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1,  1, -1}, /*  5 - Robat */
+    {-1,  7,  8,  9, -1, -1, -1, -1, -1, -1, -1, -1}, /*  6 - First Coeng */
+    {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  7 - First consonant of type 1 after coeng */
+    {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17,  1, 14}, /*  8 - First consonant of type 2 after coeng */
+    {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  9 - First consonant or type 3 after ceong */
+    {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
+    {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
+    {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
+    {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 13 - Second register shifter */
+    {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
+    {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17,  1, 18}, /* 16 - dependent vowel */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, 18}, /* 17 - sign above */
+    {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
+    {-1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1}, /* 20 - dependent vowel after a Robat */
+};
+
+
+/*  #define KHMER_DEBUG */
+#ifdef KHMER_DEBUG
+#define KHDEBUG qDebug
+#else
+#    define KHDEBUG                                                                                \
+        if (0)                                                                                     \
+        printf
+#endif
+
+/*
+//  Given an input string of characters and a location in which to start looking
+//  calculate, using the state table, which one is the last character of the syllable
+//  that starts in the starting position.
+*/
+static int khmer_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid)
+{
+    const ushort *uc = s + start;
+    int state = 0;
+    int pos = start;
+    *invalid = false;
+
+    while (pos < end) {
+        KhmerCharClass charClass = getKhmerCharClass(*uc);
+        if (pos == start) {
+            *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT);
+        }
+        state = khmerStateTable[state][charClass & CF_CLASS_MASK];
+
+        KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,
+                charClass, *uc );
+
+        if (state < 0) {
+            break;
+        }
+        ++uc;
+        ++pos;
+    }
+    return pos;
+}
+
+static void khmerAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+    int end = from + len;
+    const ushort *uc = text + from;
+    uint i = 0;
+    Q_UNUSED(script);
+    attributes += from;
+    while ( i < len ) {
+    bool invalid;
+    uint boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
+
+    attributes[i].graphemeBoundary = true;
+
+    if ( boundary > len-1 ) boundary = len;
+    i++;
+    while ( i < boundary ) {
+        attributes[i].graphemeBoundary = false;
+        ++uc;
+        ++i;
+    }
+    assert( i == boundary );
+    }
+}
+
+
+const CharAttributeFunction charAttributeFunction[] = {
+//    Script_Unknown,
+    nullptr,
+//    Script_Inherited,
+    nullptr,
+//    Script_Common,
+    nullptr,
+//    Script_Latin,
+    nullptr,
+//    Script_Greek,
+    nullptr,
+//    Script_Cyrillic,
+    nullptr,
+//    Script_Armenian,
+    nullptr,
+//    Script_Hebrew,
+    nullptr,
+//    Script_Arabic,
+    nullptr,
+//    Script_Syriac,
+    nullptr,
+//    Script_Thaana,
+    nullptr,
+//    Script_Devanagari,
+    indicAttributes,
+//    Script_Bengali,
+    indicAttributes,
+//    Script_Gurmukhi,
+    indicAttributes,
+//    Script_Gujarati,
+    indicAttributes,
+//    Script_Oriya,
+    indicAttributes,
+//    Script_Tamil,
+    indicAttributes,
+//    Script_Telugu,
+    indicAttributes,
+//    Script_Kannada,
+    indicAttributes,
+//    Script_Malayalam,
+    indicAttributes,
+//    Script_Sinhala,
+    indicAttributes,
+//    Script_Thai,
+    thaiAttributes,
+//    Script_Lao,
+    nullptr,
+//    Script_Tibetan,
+    tibetanAttributes,
+//    Script_Myanmar,
+    myanmarAttributes,
+//    Script_Georgian,
+    nullptr,
+//    Script_Hangul,
+    nullptr,
+//    Script_Ethiopic,
+    nullptr,
+//    Script_Cherokee,
+    nullptr,
+//    Script_CanadianAboriginal,
+    nullptr,
+//    Script_Ogham,
+    nullptr,
+//    Script_Runic,
+    nullptr,
+//    Script_Khmer,
+    khmerAttributes
+};
+
+static void getCharAttributes(const ushort *string, uint stringLength,
+                                  const QUnicodeTools::ScriptItem *items, uint numItems,
+                                  QCharAttributes *attributes)
+{
+    if (stringLength == 0)
+        return;
+    for (uint i = 0; i < numItems; ++i) {
+        QChar::Script script = items[i].script;
+        if (script > QChar::Script_Khmer)
+            script = QChar::Script_Common;
+        CharAttributeFunction attributeFunction = charAttributeFunction[script];
+        if (!attributeFunction)
+            continue;
+        int end = i < numItems - 1 ? items[i + 1].position : stringLength;
+        attributeFunction(script, string, items[i].position, end - items[i].position, attributes);
+    }
+}
+
+}
 
 Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
                                       const ScriptItem *items, int numItems,
@@ -750,38 +2236,7 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
         if (!items || numItems <= 0)
             return;
 
-        QVarLengthArray<HB_ScriptItem, 64> scriptItems;
-        scriptItems.reserve(numItems);
-        int start = 0;
-        HB_Script startScript = script_to_hbscript(items[start].script);
-        if (Q_UNLIKELY(startScript == HB_Script_Inherited))
-            startScript = HB_Script_Common;
-        for (int i = start + 1; i < numItems; ++i) {
-            HB_Script script = script_to_hbscript(items[i].script);
-            if (Q_LIKELY(script == startScript || script == HB_Script_Inherited))
-                continue;
-            Q_ASSERT(items[i].position > items[start].position);
-            HB_ScriptItem item;
-            item.pos = items[start].position;
-            item.length = items[i].position - items[start].position;
-            item.script = startScript;
-            item.bidiLevel = 0; // unused
-            scriptItems.append(item);
-            start = i;
-            startScript = script;
-        }
-        if (items[start].position + 1 < length) {
-            HB_ScriptItem item;
-            item.pos = items[start].position;
-            item.length = length - items[start].position;
-            item.script = startScript;
-            item.bidiLevel = 0; // unused
-            scriptItems.append(item);
-        }
-        Q_STATIC_ASSERT(sizeof(QCharAttributes) == sizeof(HB_CharAttributes));
-        HB_GetTailoredCharAttributes(string, length,
-                                     scriptItems.constData(), scriptItems.size(),
-                                     reinterpret_cast<HB_CharAttributes *>(attributes));
+        Tailored::getCharAttributes(string, length, items, numItems, attributes);
     }
 }
 
@@ -796,7 +2251,7 @@ Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray
 {
     int sor = 0;
     int eor = 0;
-    uchar script = QChar::Script_Common;
+    QChar::Script script = QChar::Script_Common;
 
     for (int i = 0; i < length; ++i, eor = i) {
         uint ucs4 = string[i];
@@ -810,7 +2265,7 @@ Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray
 
         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
 
-        uchar nscript = prop->script;
+        QChar::Script nscript = QChar::Script(prop->script);
 
         if (Q_LIKELY(nscript == script || nscript <= QChar::Script_Common))
             continue;
diff --git a/src/corelib/text/qunicodetools_p.h b/src/corelib/text/qunicodetools_p.h
index 6294d9ceb4..5715444025 100644
--- a/src/corelib/text/qunicodetools_p.h
+++ b/src/corelib/text/qunicodetools_p.h
@@ -1,6 +1,6 @@
 /****************************************************************************
 **
-** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2020 The Qt Company Ltd.
 ** Contact: https://www.qt.io/licensing/
 **
 ** This file is part of the QtCore module of the Qt Toolkit.
@@ -75,7 +75,7 @@ namespace QUnicodeTools {
 struct ScriptItem
 {
     int position;
-    int script;
+    QChar::Script script;
 };
 
 using ScriptItemArray = QVarLengthArray<ScriptItem, 64>;
diff --git a/src/gui/text/qtextengine.cpp b/src/gui/text/qtextengine.cpp
index 2deae6f4ba..fce3e519d4 100644
--- a/src/gui/text/qtextengine.cpp
+++ b/src/gui/text/qtextengine.cpp
@@ -1985,7 +1985,7 @@ const QCharAttributes *QTextEngine::attributes() const
     for (int i = 0; i < layoutData->items.size(); ++i) {
         const QScriptItem &si = layoutData->items.at(i);
         scriptItems[i].position = si.position;
-        scriptItems[i].script = si.analysis.script;
+        scriptItems[i].script = QChar::Script(si.analysis.script);
     }
 
     QUnicodeTools::initCharAttributes(reinterpret_cast<const ushort *>(layoutData->string.constData()),