summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-03-14 15:23:31 +0100
committerLars Knoll <lars.knoll@qt.io>2020-03-17 01:42:58 +0100
commitdd7d8304bbe599320b163b94e9a4ad9a6f35b740 (patch)
tree9a27d9c39dd72667b402cbb01d9b3f71e6c0e9c6
parent76ced3f179f18f5f28416291140f5804c8a905b9 (diff)
Remove harfbuzz dependency from qunicodetools
Copy the relevant harfbuzz code over from Harfbuzz into qunicodetools.cpp This is basically the attribute functions from the different harfbuzz shapers. Those methods do not require any font support but operate purely on unicode input data. Adjusted the code to use Qt's own data structures and enums (QChar::Script and friends) instead of the harfbuzz equivalents. The code is 100% copyright The Qt Company, so we can do this without requiring any attribution. Change-Id: I8262ba34eae1837f031f07d1b6d9917c0224e160 Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
-rw-r--r--src/corelib/text/qunicodetools.cpp1529
-rw-r--r--src/corelib/text/qunicodetools_p.h4
-rw-r--r--src/gui/text/qtextengine.cpp2
3 files changed, 1495 insertions, 40 deletions
diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp
index 76072f8282..819d8a9c3b 100644
--- a/src/corelib/text/qunicodetools.cpp
+++ b/src/corelib/text/qunicodetools.cpp
@@ -1,6 +1,6 @@
/****************************************************************************
**
-** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2020 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -41,8 +41,7 @@
#include "qunicodetables_p.h"
#include "qvarlengtharray.h"
-
-#include "qharfbuzz_p.h"
+#include "qlibrary.h"
#define FLAG(x) (1 << (x))
@@ -724,6 +723,1493 @@ static void getWhiteSpaces(const ushort *string, quint32 len, QCharAttributes *a
}
}
+namespace Tailored {
+
+using CharAttributeFunction = void (*)(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes);
+
+
+enum Form {
+ Invalid = 0x0,
+ UnknownForm = Invalid,
+ Consonant,
+ Nukta,
+ Halant,
+ Matra,
+ VowelMark,
+ StressMark,
+ IndependentVowel,
+ LengthMark,
+ Control,
+ Other
+};
+
+static const unsigned char indicForms[0xe00-0x900] = {
+ // Devangari
+ Invalid, VowelMark, VowelMark, VowelMark,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, UnknownForm, UnknownForm,
+ Nukta, Other, Matra, Matra,
+
+ Matra, Matra, Matra, Matra,
+ Matra, Matra, Matra, Matra,
+ Matra, Matra, Matra, Matra,
+ Matra, Halant, UnknownForm, UnknownForm,
+
+ Other, StressMark, StressMark, StressMark,
+ StressMark, UnknownForm, UnknownForm, UnknownForm,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ IndependentVowel, IndependentVowel, VowelMark, VowelMark,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Consonant,
+ Consonant, Consonant /* ??? */, Consonant, Consonant,
+
+ // Bengali
+ Invalid, VowelMark, VowelMark, VowelMark,
+ Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, Invalid, Invalid, IndependentVowel,
+
+ IndependentVowel, Invalid, Invalid, IndependentVowel,
+ IndependentVowel, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Invalid, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Invalid, Consonant, Invalid,
+ Invalid, Invalid, Consonant, Consonant,
+ Consonant, Consonant, UnknownForm, UnknownForm,
+ Nukta, Other, Matra, Matra,
+
+ Matra, Matra, Matra, Matra,
+ Matra, Invalid, Invalid, Matra,
+ Matra, Invalid, Invalid, Matra,
+ Matra, Halant, Consonant, UnknownForm,
+
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Invalid, VowelMark,
+ Invalid, Invalid, Invalid, Invalid,
+ Consonant, Consonant, Invalid, Consonant,
+
+ IndependentVowel, IndependentVowel, VowelMark, VowelMark,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ Consonant, Consonant, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ // Gurmukhi
+ Invalid, VowelMark, VowelMark, VowelMark,
+ Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
+ Invalid, Invalid, Invalid, IndependentVowel,
+
+ IndependentVowel, Invalid, Invalid, IndependentVowel,
+ IndependentVowel, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Invalid, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Invalid, Consonant, Consonant,
+ Invalid, Consonant, Consonant, Invalid,
+ Consonant, Consonant, UnknownForm, UnknownForm,
+ Nukta, Other, Matra, Matra,
+
+ Matra, Matra, Matra, Invalid,
+ Invalid, Invalid, Invalid, Matra,
+ Matra, Invalid, Invalid, Matra,
+ Matra, Halant, UnknownForm, UnknownForm,
+
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, UnknownForm, UnknownForm, UnknownForm,
+ Invalid, Consonant, Consonant, Consonant,
+ Consonant, Invalid, Consonant, Invalid,
+
+ Other, Other, Invalid, Invalid,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ StressMark, StressMark, Consonant, Consonant,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ // Gujarati
+ Invalid, VowelMark, VowelMark, VowelMark,
+ Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
+
+ IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
+ IndependentVowel, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Invalid, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Invalid, Consonant, Consonant,
+ Invalid, Consonant, Consonant, Consonant,
+ Consonant, Consonant, UnknownForm, UnknownForm,
+ Nukta, Other, Matra, Matra,
+
+ Matra, Matra, Matra, Matra,
+ Matra, Matra, Invalid, Matra,
+ Matra, Matra, Invalid, Matra,
+ Matra, Halant, UnknownForm, UnknownForm,
+
+ Other, UnknownForm, UnknownForm, UnknownForm,
+ UnknownForm, UnknownForm, UnknownForm, UnknownForm,
+ UnknownForm, UnknownForm, UnknownForm, UnknownForm,
+ UnknownForm, UnknownForm, UnknownForm, UnknownForm,
+
+ IndependentVowel, IndependentVowel, VowelMark, VowelMark,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ // Oriya
+ Invalid, VowelMark, VowelMark, VowelMark,
+ Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, Invalid, Invalid, IndependentVowel,
+
+ IndependentVowel, Invalid, Invalid, IndependentVowel,
+ IndependentVowel, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Invalid, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Invalid, Consonant, Consonant,
+ Invalid, Consonant, Consonant, Consonant,
+ Consonant, Consonant, UnknownForm, UnknownForm,
+ Nukta, Other, Matra, Matra,
+
+ Matra, Matra, Matra, Matra,
+ Invalid, Invalid, Invalid, Matra,
+ Matra, Invalid, Invalid, Matra,
+ Matra, Halant, UnknownForm, UnknownForm,
+
+ Other, Invalid, Invalid, Invalid,
+ Invalid, UnknownForm, LengthMark, LengthMark,
+ Invalid, Invalid, Invalid, Invalid,
+ Consonant, Consonant, Invalid, Consonant,
+
+ IndependentVowel, IndependentVowel, Invalid, Invalid,
+ Invalid, Invalid, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ Other, Consonant, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ //Tamil
+ Invalid, Invalid, VowelMark, Other,
+ Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
+ Invalid, Invalid, IndependentVowel, IndependentVowel,
+
+ IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+ IndependentVowel, Consonant, Invalid, Invalid,
+ Invalid, Consonant, Consonant, Invalid,
+ Consonant, Invalid, Consonant, Consonant,
+
+ Invalid, Invalid, Invalid, Consonant,
+ Consonant, Invalid, Invalid, Invalid,
+ Consonant, Consonant, Consonant, Invalid,
+ Invalid, Invalid, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, UnknownForm, UnknownForm,
+ Invalid, Invalid, Matra, Matra,
+
+ Matra, Matra, Matra, Invalid,
+ Invalid, Invalid, Matra, Matra,
+ Matra, Invalid, Matra, Matra,
+ Matra, Halant, Invalid, Invalid,
+
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Invalid, LengthMark,
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Invalid, Invalid,
+
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ // Telugu
+ Invalid, VowelMark, VowelMark, VowelMark,
+ Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+
+ IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+ IndependentVowel, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Invalid, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Invalid, Consonant, Consonant, Consonant,
+ Consonant, Consonant, UnknownForm, UnknownForm,
+ Invalid, Invalid, Matra, Matra,
+
+ Matra, Matra, Matra, Matra,
+ Matra, Invalid, Matra, Matra,
+ Matra, Invalid, Matra, Matra,
+ Matra, Halant, Invalid, Invalid,
+
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, LengthMark, Matra, Invalid,
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Invalid, Invalid,
+
+ IndependentVowel, IndependentVowel, Invalid, Invalid,
+ Invalid, Invalid, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ // Kannada
+ Invalid, Invalid, VowelMark, VowelMark,
+ Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+
+ IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+ IndependentVowel, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Invalid, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Invalid, Consonant, Consonant, Consonant,
+ Consonant, Consonant, UnknownForm, UnknownForm,
+ Nukta, Other, Matra, Matra,
+
+ Matra, Matra, Matra, Matra,
+ Matra, Invalid, Matra, Matra,
+ Matra, Invalid, Matra, Matra,
+ Matra, Halant, Invalid, Invalid,
+
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, LengthMark, LengthMark, Invalid,
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Consonant, Invalid,
+
+ IndependentVowel, IndependentVowel, VowelMark, VowelMark,
+ Invalid, Invalid, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ // Malayalam
+ Invalid, Invalid, VowelMark, VowelMark,
+ Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+
+ IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
+ IndependentVowel, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Invalid, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, UnknownForm, UnknownForm,
+ Invalid, Invalid, Matra, Matra,
+
+ Matra, Matra, Matra, Matra,
+ Invalid, Invalid, Matra, Matra,
+ Matra, Invalid, Matra, Matra,
+ Matra, Halant, Invalid, Invalid,
+
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Invalid, Matra,
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Invalid, Invalid,
+
+ IndependentVowel, IndependentVowel, Invalid, Invalid,
+ Invalid, Invalid, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+
+ // Sinhala
+ Invalid, Invalid, VowelMark, VowelMark,
+ Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+
+ IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
+ IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
+ Invalid, Invalid, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+
+ Consonant, Consonant, Invalid, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Consonant,
+ Invalid, Consonant, Invalid, Invalid,
+
+ Consonant, Consonant, Consonant, Consonant,
+ Consonant, Consonant, Consonant, Invalid,
+ Invalid, Invalid, Halant, Invalid,
+ Invalid, Invalid, Invalid, Matra,
+
+ Matra, Matra, Matra, Matra,
+ Matra, Invalid, Matra, Invalid,
+ Matra, Matra, Matra, Matra,
+ Matra, Matra, Matra, Matra,
+
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Invalid, Invalid,
+ Invalid, Invalid, Invalid, Invalid,
+
+ Invalid, Invalid, Matra, Matra,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+ Other, Other, Other, Other,
+};
+
+static inline Form form(unsigned short uc) {
+ if (uc < 0x900 || uc > 0xdff) {
+ if (uc == 0x25cc)
+ return Consonant;
+ if (uc == 0x200c || uc == 0x200d)
+ return Control;
+ return Other;
+ }
+ return (Form)indicForms[uc-0x900];
+}
+
+// #define INDIC_DEBUG
+#ifdef INDIC_DEBUG
+#define IDEBUG qDebug
+#else
+#define IDEBUG if constexpr (1) ; else qDebug
+#endif
+
+/* syllables are of the form:
+
+ (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark?
+ (Consonant Nukta? Halant)* Consonant Halant
+ IndependentVowel VowelMark? StressMark?
+
+ We return syllable boundaries on invalid combinations aswell
+*/
+static int indic_nextSyllableBoundary(QChar::Script script, const ushort *s, int start, int end, bool *invalid)
+{
+ *invalid = false;
+ IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
+ const ushort *uc = s+start;
+
+ int pos = 0;
+ Form state = form(uc[pos]);
+ IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);
+ pos++;
+
+ if (state != Consonant && state != IndependentVowel) {
+ if (state != Other)
+ *invalid = true;
+ goto finish;
+ }
+
+ while (pos < end - start) {
+ Form newState = form(uc[pos]);
+ IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]);
+ switch (newState) {
+ case Control:
+ newState = state;
+ if (state == Halant && uc[pos] == 0x200d /* ZWJ */)
+ break;
+ // the control character should be the last char in the item
+ if (state == Consonant && script == QChar::Script_Bengali && uc[pos-1] == 0x09B0 && uc[pos] == 0x200d /* ZWJ */)
+ break;
+ if (state == Consonant && script == QChar::Script_Kannada && uc[pos-1] == 0x0CB0 && uc[pos] == 0x200d /* ZWJ */)
+ break;
+ // Bengali and Kannada has a special exception for rendering yaphala with ra (to avoid reph) see http://www.unicode.org/faq/indic.html#15
+ ++pos;
+ goto finish;
+ case Consonant:
+ if (state == Halant && (script != QChar::Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */))
+ break;
+ goto finish;
+ case Halant:
+ if (state == Nukta || state == Consonant)
+ break;
+ // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya
+ if (script == QChar::Script_Bengali && pos == 1 &&
+ (uc[0] == 0x0985 || uc[0] == 0x098f))
+ break;
+ // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra.
+ if (script == QChar::Script_Sinhala && state == Matra) {
+ ++pos;
+ continue;
+ }
+ if (script == QChar::Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) {
+ ++pos;
+ continue;
+ }
+ goto finish;
+ case Nukta:
+ if (state == Consonant)
+ break;
+ goto finish;
+ case StressMark:
+ if (state == VowelMark)
+ break;
+ // fall through
+ case VowelMark:
+ if (state == Matra || state == LengthMark || state == IndependentVowel)
+ break;
+ // fall through
+ case Matra:
+ if (state == Consonant || state == Nukta)
+ break;
+ if (state == Matra) {
+ // ### needs proper testing for correct two/three part matras
+ break;
+ }
+ // ### not sure if this is correct. If it is, does it apply only to Bengali or should
+ // it work for all Indic languages?
+ // the combination Independent_A + Vowel Sign AA is allowed.
+ if (script == QChar::Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985)
+ break;
+ if (script == QChar::Script_Tamil && state == Matra) {
+ if (uc[pos-1] == 0x0bc6 &&
+ (uc[pos] == 0xbbe || uc[pos] == 0xbd7))
+ break;
+ if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe)
+ break;
+ }
+ goto finish;
+
+ case LengthMark:
+ if (state == Matra) {
+ // ### needs proper testing for correct two/three part matras
+ break;
+ }
+ case IndependentVowel:
+ case Invalid:
+ case Other:
+ goto finish;
+ }
+ state = newState;
+ pos++;
+ }
+ finish:
+ return pos+start;
+}
+
+static void indicAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+ int end = from + len;
+ const ushort *uc = text + from;
+ attributes += from;
+ uint i = 0;
+ while (i < len) {
+ bool invalid;
+ uint boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
+ attributes[i].graphemeBoundary = true;
+
+ if (boundary > len-1) boundary = len;
+ i++;
+ while (i < boundary) {
+ attributes[i].graphemeBoundary = false;
+ ++uc;
+ ++i;
+ }
+ assert(i == boundary);
+ }
+
+
+}
+
+#define LIBTHAI_MAJOR 0
+
+/*
+ * if libthai changed please update these codes too.
+ */
+struct thcell_t {
+ unsigned char base; /**< base character */
+ unsigned char hilo; /**< upper/lower vowel/diacritic */
+ unsigned char top; /**< top-level mark */
+};
+typedef int (*th_brk_def) (const unsigned char*, int*, size_t);
+typedef size_t (*th_next_cell_def) (const unsigned char *, size_t, struct thcell_t *, int);
+
+/* libthai related function handles */
+static th_brk_def th_brk = 0;
+static th_next_cell_def th_next_cell = 0;
+
+static int init_libthai() {
+ static bool initialized = false;
+ if (!initialized && (!th_brk || !th_next_cell)) {
+ th_brk = (th_brk_def) QLibrary::resolve(QLatin1String("thai"), (int)LIBTHAI_MAJOR, "th_brk");
+ th_next_cell = (th_next_cell_def)QLibrary::resolve(QLatin1String("thai"), LIBTHAI_MAJOR, "th_next_cell");
+ initialized = true;
+ }
+ if (th_brk && th_next_cell)
+ return 1;
+ else
+ return 0;
+}
+
+static void to_tis620(const ushort *string, uint len, char *cstr)
+{
+ uint i;
+ unsigned char *result = (unsigned char *)cstr;
+
+ for (i = 0; i < len; ++i) {
+ if (string[i] <= 0xa0)
+ result[i] = (unsigned char)string[i];
+ else if (string[i] >= 0xe01 && string[i] <= 0xe5b)
+ result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
+ else
+ result[i] = (unsigned char)~0; // Same encoding as libthai uses for invalid chars
+ }
+
+ result[len] = 0;
+}
+
+/*
+ * Thai Attributes: computes Word Break, Word Boundary and Char stop for THAI.
+ */
+static void thaiAssignAttributes(const ushort *string, uint len, QCharAttributes *attributes)
+{
+ char s[128];
+ char *cstr = s;
+ int *break_positions = 0;
+ int brp[128];
+ int brp_size = 0;
+ uint numbreaks, i, j, cell_length;
+ struct thcell_t tis_cell;
+
+ if (!init_libthai())
+ return ;
+
+ if (len >= 128)
+ cstr = (char *)malloc(len*sizeof(char) + 1);
+
+ to_tis620(string, len, cstr);
+
+ for (i = 0; i < len; ++i) {
+ attributes[i].wordBreak = false;
+ attributes[i].wordStart = false;
+ attributes[i].wordEnd = false;
+ attributes[i].lineBreak = false;
+ }
+
+ if (len > 128) {
+ break_positions = (int*) malloc (sizeof(int) * len);
+ memset (break_positions, 0, sizeof(int) * len);
+ brp_size = len;
+ }
+ else {
+ break_positions = brp;
+ brp_size = 128;
+ }
+
+ if (break_positions) {
+ attributes[0].wordBreak = true;
+ attributes[0].wordStart = true;
+ attributes[0].wordEnd = false;
+ numbreaks = th_brk((const unsigned char *)cstr, break_positions, brp_size);
+ for (i = 0; i < numbreaks; ++i) {
+ attributes[break_positions[i]].wordBreak = true;
+ attributes[break_positions[i]].wordStart = true;
+ attributes[break_positions[i]].wordEnd = true;
+ attributes[break_positions[i]].lineBreak = true;
+ }
+ if (numbreaks > 0)
+ attributes[break_positions[numbreaks - 1]].wordStart = false;
+
+ if (break_positions != brp)
+ free(break_positions);
+ }
+
+ /* manage grapheme boundaries */
+ i = 0;
+ while (i < len) {
+ cell_length = (uint)(th_next_cell((const unsigned char *)cstr + i, len - i, &tis_cell, true));
+
+ attributes[i].graphemeBoundary = true;
+ for (j = 1; j < cell_length; j++)
+ attributes[i + j].graphemeBoundary = false;
+
+ /* Set graphemeBoundary for SARA AM */
+ if (cstr[i + cell_length - 1] == (char)0xd3)
+ attributes[i + cell_length - 1].graphemeBoundary = true;
+
+ i += cell_length;
+ }
+
+ if (len >= 128)
+ free(cstr);
+}
+
+static void thaiAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+ assert(script == QChar::Script_Thai);
+ const ushort *uc = text + from;
+ attributes += from;
+ Q_UNUSED(script);
+ thaiAssignAttributes(uc, len, attributes);
+}
+
+/*
+ tibetan syllables are of the form:
+ head position consonant
+ first sub-joined consonant
+ ....intermediate sub-joined consonants (if any)
+ last sub-joined consonant
+ sub-joined vowel (a-chung U+0F71)
+ standard or compound vowel sign (or 'virama' for devanagari transliteration)
+*/
+
+typedef enum {
+ TibetanOther,
+ TibetanHeadConsonant,
+ TibetanSubjoinedConsonant,
+ TibetanSubjoinedVowel,
+ TibetanVowel
+} TibetanForm;
+
+/* this table starts at U+0f40 */
+static const unsigned char tibetanForm[0x80] = {
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+ TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
+ TibetanOther, TibetanOther, TibetanOther, TibetanOther,
+
+ TibetanOther, TibetanVowel, TibetanVowel, TibetanVowel,
+ TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+ TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+ TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+
+ TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+ TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
+ TibetanOther, TibetanOther, TibetanOther, TibetanOther,
+ TibetanOther, TibetanOther, TibetanOther, TibetanOther,
+
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+ TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
+ TibetanSubjoinedConsonant, TibetanOther, TibetanOther, TibetanOther
+};
+
+#define tibetan_form(c) \
+ ((c) >= 0x0f40 && (c) < 0x0fc0 ? (TibetanForm)tibetanForm[(c) - 0x0f40] : TibetanOther)
+
+static int tibetan_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid)
+{
+ const ushort *uc = s + start;
+
+ int pos = 0;
+ TibetanForm state = tibetan_form(*uc);
+
+/* qDebug("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);*/
+ pos++;
+
+ if (state != TibetanHeadConsonant) {
+ if (state != TibetanOther)
+ *invalid = true;
+ goto finish;
+ }
+
+ while (pos < end - start) {
+ TibetanForm newState = tibetan_form(uc[pos]);
+ switch (newState) {
+ case TibetanSubjoinedConsonant:
+ case TibetanSubjoinedVowel:
+ if (state != TibetanHeadConsonant &&
+ state != TibetanSubjoinedConsonant)
+ goto finish;
+ state = newState;
+ break;
+ case TibetanVowel:
+ if (state != TibetanHeadConsonant &&
+ state != TibetanSubjoinedConsonant &&
+ state != TibetanSubjoinedVowel)
+ goto finish;
+ break;
+ case TibetanOther:
+ case TibetanHeadConsonant:
+ goto finish;
+ }
+ pos++;
+ }
+
+finish:
+ *invalid = false;
+ return start+pos;
+}
+
+static void tibetanAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+ int end = from + len;
+ const ushort *uc = text + from;
+ uint i = 0;
+ Q_UNUSED(script);
+ attributes += from;
+ while (i < len) {
+ bool invalid;
+ uint boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from;
+
+ attributes[i].graphemeBoundary = true;
+
+ if (boundary > len-1) boundary = len;
+ i++;
+ while (i < boundary) {
+ attributes[i].graphemeBoundary = false;
+ ++uc;
+ ++i;
+ }
+ assert(i == boundary);
+ }
+}
+
+enum MymrCharClassValues {
+ Mymr_CC_RESERVED = 0,
+ Mymr_CC_CONSONANT = 1, /* Consonant of type 1, that has subscript form */
+ Mymr_CC_CONSONANT2 = 2, /* Consonant of type 2, that has no subscript form */
+ Mymr_CC_NGA = 3, /* Consonant NGA */
+ Mymr_CC_YA = 4, /* Consonant YA */
+ Mymr_CC_RA = 5, /* Consonant RA */
+ Mymr_CC_WA = 6, /* Consonant WA */
+ Mymr_CC_HA = 7, /* Consonant HA */
+ Mymr_CC_IND_VOWEL = 8, /* Independent vowel */
+ Mymr_CC_ZERO_WIDTH_NJ_MARK = 9, /* Zero Width non joiner character (0x200C) */
+ Mymr_CC_VIRAMA = 10, /* Subscript consonant combining character */
+ Mymr_CC_PRE_VOWEL = 11, /* Dependent vowel, prebase (Vowel e) */
+ Mymr_CC_BELOW_VOWEL = 12, /* Dependent vowel, prebase (Vowel u, uu) */
+ Mymr_CC_ABOVE_VOWEL = 13, /* Dependent vowel, prebase (Vowel i, ii, ai) */
+ Mymr_CC_POST_VOWEL = 14, /* Dependent vowel, prebase (Vowel aa) */
+ Mymr_CC_SIGN_ABOVE = 15,
+ Mymr_CC_SIGN_BELOW = 16,
+ Mymr_CC_SIGN_AFTER = 17,
+ Mymr_CC_ZERO_WIDTH_J_MARK = 18, /* Zero width joiner character */
+ Mymr_CC_COUNT = 19 /* This is the number of character classes */
+};
+
+enum MymrCharClassFlags {
+ Mymr_CF_CLASS_MASK = 0x0000FFFF,
+
+ Mymr_CF_CONSONANT = 0x01000000, /* flag to speed up comparing */
+ Mymr_CF_MEDIAL = 0x02000000, /* flag to speed up comparing */
+ Mymr_CF_IND_VOWEL = 0x04000000, /* flag to speed up comparing */
+ Mymr_CF_DEP_VOWEL = 0x08000000, /* flag to speed up comparing */
+ Mymr_CF_DOTTED_CIRCLE = 0x10000000, /* add a dotted circle if a character with this flag is the
+ first in a syllable */
+ Mymr_CF_VIRAMA = 0x20000000, /* flag to speed up comparing */
+
+ /* position flags */
+ Mymr_CF_POS_BEFORE = 0x00080000,
+ Mymr_CF_POS_BELOW = 0x00040000,
+ Mymr_CF_POS_ABOVE = 0x00020000,
+ Mymr_CF_POS_AFTER = 0x00010000,
+ Mymr_CF_POS_MASK = 0x000f0000,
+
+ Mymr_CF_AFTER_KINZI = 0x00100000
+};
+
+/* Characters that get refrered to by name */
+enum MymrChar
+{
+ Mymr_C_SIGN_ZWNJ = 0x200C,
+ Mymr_C_SIGN_ZWJ = 0x200D,
+ Mymr_C_DOTTED_CIRCLE = 0x25CC,
+ Mymr_C_RA = 0x101B,
+ Mymr_C_YA = 0x101A,
+ Mymr_C_NGA = 0x1004,
+ Mymr_C_VOWEL_E = 0x1031,
+ Mymr_C_VIRAMA = 0x1039
+};
+
+enum
+{
+ Mymr_xx = Mymr_CC_RESERVED,
+ Mymr_c1 = Mymr_CC_CONSONANT | Mymr_CF_CONSONANT | Mymr_CF_POS_BELOW,
+ Mymr_c2 = Mymr_CC_CONSONANT2 | Mymr_CF_CONSONANT,
+ Mymr_ng = Mymr_CC_NGA | Mymr_CF_CONSONANT | Mymr_CF_POS_ABOVE,
+ Mymr_ya = Mymr_CC_YA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_AFTER | Mymr_CF_AFTER_KINZI,
+ Mymr_ra = Mymr_CC_RA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BEFORE,
+ Mymr_wa = Mymr_CC_WA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW,
+ Mymr_ha = Mymr_CC_HA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW,
+ Mymr_id = Mymr_CC_IND_VOWEL | Mymr_CF_IND_VOWEL,
+ Mymr_vi = Mymr_CC_VIRAMA | Mymr_CF_VIRAMA | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE,
+ Mymr_dl = Mymr_CC_PRE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BEFORE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
+ Mymr_db = Mymr_CC_BELOW_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
+ Mymr_da = Mymr_CC_ABOVE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
+ Mymr_dr = Mymr_CC_POST_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
+ Mymr_sa = Mymr_CC_SIGN_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_ABOVE | Mymr_CF_AFTER_KINZI,
+ Mymr_sb = Mymr_CC_SIGN_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_BELOW | Mymr_CF_AFTER_KINZI,
+ Mymr_sp = Mymr_CC_SIGN_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI
+};
+
+
+typedef int MymrCharClass;
+
+
+static const MymrCharClass mymrCharClasses[] =
+{
+ Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_ng, Mymr_c1, Mymr_c1, Mymr_c1,
+ Mymr_c1, Mymr_c1, Mymr_c2, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, /* 1000 - 100F */
+ Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1,
+ Mymr_c1, Mymr_c1, Mymr_ya, Mymr_ra, Mymr_c1, Mymr_wa, Mymr_c1, Mymr_ha, /* 1010 - 101F */
+ Mymr_c2, Mymr_c2, Mymr_xx, Mymr_id, Mymr_id, Mymr_id, Mymr_id, Mymr_id,
+ Mymr_xx, Mymr_id, Mymr_id, Mymr_xx, Mymr_dr, Mymr_da, Mymr_da, Mymr_db, /* 1020 - 102F */
+ Mymr_db, Mymr_dl, Mymr_da, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_sa, Mymr_sb,
+ Mymr_sp, Mymr_vi, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1030 - 103F */
+ Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx,
+ Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1040 - 104F */
+ Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx,
+ Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1050 - 105F */
+};
+
+static MymrCharClass
+getMyanmarCharClass (ushort ch)
+{
+ if (ch == Mymr_C_SIGN_ZWJ)
+ return Mymr_CC_ZERO_WIDTH_J_MARK;
+
+ if (ch == Mymr_C_SIGN_ZWNJ)
+ return Mymr_CC_ZERO_WIDTH_NJ_MARK;
+
+ if (ch < 0x1000 || ch > 0x105f)
+ return Mymr_CC_RESERVED;
+
+ return mymrCharClasses[ch - 0x1000];
+}
+
+static const signed char mymrStateTable[][Mymr_CC_COUNT] =
+{
+/* xx c1, c2 ng ya ra wa ha id zwnj vi dl db da dr sa sb sp zwj */
+ { 1, 4, 4, 2, 4, 4, 4, 4, 24, 1, 27, 17, 18, 19, 20, 21, 1, 1, 4}, /* 0 - ground state */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sp to the right of the syllable) */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 17, 18, 19, 20, 21, -1, -1, 4}, /* 2 - NGA */
+ {-1, 4, 4, 4, 4, 4, 4, 4, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 3 - Virama after NGA */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, 17, 18, 19, 20, 21, 1, 1, -1}, /* 4 - Base consonant */
+ {-2, 6, -2, -2, 7, 8, 9, 10, -2, 23, -2, -2, -2, -2, -2, -2, -2, -2, -2}, /* 5 - First virama */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 25, 17, 18, 19, 20, 21, -1, -1, -1}, /* 6 - c1 after virama */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, /* 7 - ya after virama */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, /* 8 - ra after virama */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, /* 9 - wa after virama */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, /* 10 - ha after virama */
+ {-1, -1, -1, -1, 7, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 11 - Virama after NGA+zwj */
+ {-2, -2, -2, -2, -2, -2, 13, 14, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, /* 12 - Second virama */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 18, 19, 20, 21, -1, -1, -1}, /* 13 - wa after virama */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, /* 14 - ha after virama */
+ {-2, -2, -2, -2, -2, -2, -2, 16, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, /* 15 - Third virama */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, /* 16 - ha after virama */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 20, 21, 1, 1, -1}, /* 17 - dl, Dependent vowel e */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, -1, 21, 1, 1, -1}, /* 18 - db, Dependent vowel u,uu */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, -1}, /* 19 - da, Dependent vowel i,ii,ai */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, 1, 1, -1}, /* 20 - dr, Dependent vowel aa */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1}, /* 21 - sa, Sign anusvara */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 22 - atha */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1}, /* 23 - zwnj for atha */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 24 - Independent vowel */
+ {-2, -2, -2, -2, 26, 26, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, /* 25 - Virama after subscript consonant */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, 1, -1}, /* 26 - ra/ya after subscript consonant + virama */
+ {-1, 6, -1, -1, 7, 8, 9, 10, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 27 - Virama after ground state */
+/* exit state -2 is for invalid order of medials and combination of invalids
+ with virama where virama should treat as start of next syllable
+ */
+};
+
+/*#define MYANMAR_DEBUG */
+#ifdef MYANMAR_DEBUG
+#define MMDEBUG qDebug
+#else
+# define MMDEBUG \
+ if (0) \
+ printf
+#endif
+
+/*
+// Given an input string of characters and a location in which to start looking
+// calculate, using the state table, which one is the last character of the syllable
+// that starts in the starting position.
+*/
+static int myanmar_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid)
+{
+ const ushort *uc = s + start;
+ int state = 0;
+ int pos = start;
+ *invalid = false;
+
+ while (pos < end) {
+ MymrCharClass charClass = getMyanmarCharClass(*uc);
+ state = mymrStateTable[state][charClass & Mymr_CF_CLASS_MASK];
+ if (pos == start)
+ *invalid = (bool)(charClass & Mymr_CF_DOTTED_CIRCLE);
+
+ MMDEBUG("state[%d]=%d class=%8x (uc=%4x)", pos - start, state, charClass, *uc);
+
+ if (state < 0) {
+ if (state < -1)
+ --pos;
+ break;
+ }
+ ++uc;
+ ++pos;
+ }
+ return pos;
+}
+
+static void myanmarAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+ int end = from + len;
+ const ushort *uc = text + from;
+ uint i = 0;
+ Q_UNUSED(script);
+ attributes += from;
+ while (i < len) {
+ bool invalid;
+ uint boundary = myanmar_nextSyllableBoundary(text, from+i, end, &invalid) - from;
+
+ attributes[i].graphemeBoundary = true;
+ attributes[i].lineBreak = true;
+
+ if (boundary > len-1)
+ boundary = len;
+ i++;
+ while (i < boundary) {
+ attributes[i].graphemeBoundary = false;
+ ++uc;
+ ++i;
+ }
+ assert(i == boundary);
+ }
+}
+
+/*
+// Vocabulary
+// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the
+// center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
+// split vowels, signs... but there is only one base in a syllable, it has to be coded as
+// the first character of the syllable.
+// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
+// Khmer language has five of them. Khmer split vowels either have one part before the
+// base and one after the base or they have a part before the base and a part above the base.
+// The first part of all Khmer split vowels is the same character, identical to
+// the glyph of Khmer dependent vowel SRA EI
+// coeng --> modifier used in Khmer to construct coeng (subscript) consonants
+// Differently than indian languages, the coeng modifies the consonant that follows it,
+// not the one preceding it Each consonant has two forms, the base form and the subscript form
+// the base form is the normal one (using the consonants code-point), the subscript form is
+// displayed when the combination coeng + consonant is encountered.
+// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
+// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
+// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
+// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
+// if it is attached to a consonant of the first series or a consonant of the second series
+// Most consonants have an equivalent in the other series, but some of theme exist only in
+// one series (for example SA). If we want to use the consonant SA with a vowel sound that
+// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
+// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
+// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
+// MUSIKATOAN a second series consonant to have a first series vowel sound.
+// Consonant shifter are both normally supercript marks, but, when they are followed by a
+// superscript, they change shape and take the form of subscript dependent vowel SRA U.
+// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
+// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
+// be placed after the coeng consonant.
+// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base
+// Each vowel has its own position. Only one vowel per syllable is allowed.
+// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are
+// Allowed in a syllable.
+//
+//
+// order is important here! This order must be the same that is found in each horizontal
+// line in the statetable for Khmer (see khmerStateTable) .
+*/
+enum KhmerCharClassValues {
+ CC_RESERVED = 0,
+ CC_CONSONANT = 1, /* Consonant of type 1 or independent vowel */
+ CC_CONSONANT2 = 2, /* Consonant of type 2 */
+ CC_CONSONANT3 = 3, /* Consonant of type 3 */
+ CC_ZERO_WIDTH_NJ_MARK = 4, /* Zero Width non joiner character (0x200C) */
+ CC_CONSONANT_SHIFTER = 5,
+ CC_ROBAT = 6, /* Khmer special diacritic accent -treated differently in state table */
+ CC_COENG = 7, /* Subscript consonant combining character */
+ CC_DEPENDENT_VOWEL = 8,
+ CC_SIGN_ABOVE = 9,
+ CC_SIGN_AFTER = 10,
+ CC_ZERO_WIDTH_J_MARK = 11, /* Zero width joiner character */
+ CC_COUNT = 12 /* This is the number of character classes */
+};
+
+
+enum KhmerCharClassFlags {
+ CF_CLASS_MASK = 0x0000FFFF,
+
+ CF_CONSONANT = 0x01000000, /* flag to speed up comparing */
+ CF_SPLIT_VOWEL = 0x02000000, /* flag for a split vowel -> the first part is added in front of the syllable */
+ CF_DOTTED_CIRCLE = 0x04000000, /* add a dotted circle if a character with this flag is the first in a syllable */
+ CF_COENG = 0x08000000, /* flag to speed up comparing */
+ CF_SHIFTER = 0x10000000, /* flag to speed up comparing */
+ CF_ABOVE_VOWEL = 0x20000000, /* flag to speed up comparing */
+
+ /* position flags */
+ CF_POS_BEFORE = 0x00080000,
+ CF_POS_BELOW = 0x00040000,
+ CF_POS_ABOVE = 0x00020000,
+ CF_POS_AFTER = 0x00010000,
+ CF_POS_MASK = 0x000f0000
+};
+
+
+/* Characters that get referred to by name */
+enum KhmerChar {
+ C_SIGN_ZWNJ = 0x200C,
+ C_SIGN_ZWJ = 0x200D,
+ C_RO = 0x179A,
+ C_VOWEL_AA = 0x17B6,
+ C_SIGN_NIKAHIT = 0x17C6,
+ C_VOWEL_E = 0x17C1,
+ C_COENG = 0x17D2
+};
+
+
+/*
+// simple classes, they are used in the statetable (in this file) to control the length of a syllable
+// they are also used to know where a character should be placed (location in reference to the base character)
+// and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
+// indicate error in syllable construction
+*/
+enum {
+ _xx = CC_RESERVED,
+ _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
+ _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,
+ _c1 = CC_CONSONANT | CF_CONSONANT,
+ _c2 = CC_CONSONANT2 | CF_CONSONANT,
+ _c3 = CC_CONSONANT3 | CF_CONSONANT,
+ _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,
+ _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,
+ _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,
+ _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,
+ _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,
+ _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,
+ _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,
+
+ /* split vowel */
+ _va = _da | CF_SPLIT_VOWEL,
+ _vr = _dr | CF_SPLIT_VOWEL
+};
+
+
+/*
+// Character class: a character class value
+// ORed with character class flags.
+*/
+typedef unsigned long KhmerCharClass;
+
+
+/*
+// Character class tables
+// _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
+// _sa Sign placed above the base
+// _sp Sign placed after the base
+// _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
+// _c2 Consonant of type 2 (only RO)
+// _c3 Consonant of type 3
+// _rb Khmer sign robat u17CC. combining mark for subscript consonants
+// _cd Consonant-shifter
+// _dl Dependent vowel placed before the base (left of the base)
+// _db Dependent vowel placed below the base
+// _da Dependent vowel placed above the base
+// _dr Dependent vowel placed behind the base (right of the base)
+// _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
+// it to create a subscript consonant or independent vowel
+// _va Khmer split vowel in which the first part is before the base and the second one above the base
+// _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base
+*/
+static const KhmerCharClass khmerCharClasses[] = {
+ _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */
+ _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */
+ _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */
+ _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */
+ _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */
+ _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx /* 17D0 - 17DF */
+};
+
+/* this enum must reflect the range of khmerCharClasses */
+enum KhmerCharClassesRange {
+ KhmerFirstChar = 0x1780,
+ KhmerLastChar = 0x17df
+};
+
+/*
+// Below we define how a character in the input string is either in the khmerCharClasses table
+// (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
+// within the syllable, but are not in the table) we also get their type back, or an unknown object
+// in which case we get _xx (CC_RESERVED) back
+*/
+static KhmerCharClass getKhmerCharClass(ushort uc)
+{
+ if (uc == C_SIGN_ZWJ) {
+ return CC_ZERO_WIDTH_J_MARK;
+ }
+
+ if (uc == C_SIGN_ZWNJ) {
+ return CC_ZERO_WIDTH_NJ_MARK;
+ }
+
+ if (uc < KhmerFirstChar || uc > KhmerLastChar) {
+ return CC_RESERVED;
+ }
+
+ return khmerCharClasses[uc - KhmerFirstChar];
+}
+
+
+/*
+// The stateTable is used to calculate the end (the length) of a well
+// formed Khmer Syllable.
+//
+// Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
+// CharClassValues. This coincidence of values allows the follow up of the table.
+//
+// Each line corresponds to a state, which does not necessarily need to be a type
+// of component... for example, state 2 is a base, with is always a first character
+// in the syllable, but the state could be produced a consonant of any type when
+// it is the first character that is analysed (in ground state).
+//
+// Differentiating 3 types of consonants is necessary in order to
+// forbid the use of certain combinations, such as having a second
+// coeng after a coeng RO,
+// The inexistent possibility of having a type 3 after another type 3 is permitted,
+// eliminating it would very much complicate the table, and it does not create typing
+// problems, as the case above.
+//
+// The table is quite complex, in order to limit the number of coeng consonants
+// to 2 (by means of the table).
+//
+// There a peculiarity, as far as Unicode is concerned:
+// - The consonant-shifter is considered in two possible different
+// locations, the one considered in Unicode 3.0 and the one considered in
+// Unicode 4.0. (there is a backwards compatibility problem in this standard).
+//
+//
+// xx independent character, such as a number, punctuation sign or non-khmer char
+//
+// c1 Khmer consonant of type 1 or an independent vowel
+// that is, a letter in which the subscript for is only under the
+// base, not taking any space to the right or to the left
+//
+// c2 Khmer consonant of type 2, the coeng form takes space under
+// and to the left of the base (only RO is of this type)
+//
+// c3 Khmer consonant of type 3. Its subscript form takes space under
+// and to the right of the base.
+//
+// cs Khmer consonant shifter
+//
+// rb Khmer robat
+//
+// co coeng character (u17D2)
+//
+// dv dependent vowel (including split vowels, they are treated in the same way).
+// even if dv is not defined above, the component that is really tested for is
+// KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
+//
+// zwj Zero Width joiner
+//
+// zwnj Zero width non joiner
+//
+// sa above sign
+//
+// sp post sign
+//
+// there are lines with equal content but for an easier understanding
+// (and maybe change in the future) we did not join them
+*/
+static const signed char khmerStateTable[][CC_COUNT] =
+{
+ /* xx c1 c2 c3 zwnj cs rb co dv sa sp zwj */
+ { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, /* 0 - ground state */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sign to the right of the syllable) */
+ {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, /* 2 - Base consonant */
+ {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, /* 3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */
+ {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, /* 4 - First register shifter */
+ {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, /* 5 - Robat */
+ {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, /* 6 - First Coeng */
+ {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 7 - First consonant of type 1 after coeng */
+ {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, /* 8 - First consonant of type 2 after coeng */
+ {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 9 - First consonant or type 3 after ceong */
+ {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
+ {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
+ {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
+ {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 13 - Second register shifter */
+ {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
+ {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, /* 16 - dependent vowel */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, /* 17 - sign above */
+ {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
+ {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 20 - dependent vowel after a Robat */
+};
+
+
+/* #define KHMER_DEBUG */
+#ifdef KHMER_DEBUG
+#define KHDEBUG qDebug
+#else
+# define KHDEBUG \
+ if (0) \
+ printf
+#endif
+
+/*
+// Given an input string of characters and a location in which to start looking
+// calculate, using the state table, which one is the last character of the syllable
+// that starts in the starting position.
+*/
+static int khmer_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid)
+{
+ const ushort *uc = s + start;
+ int state = 0;
+ int pos = start;
+ *invalid = false;
+
+ while (pos < end) {
+ KhmerCharClass charClass = getKhmerCharClass(*uc);
+ if (pos == start) {
+ *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT);
+ }
+ state = khmerStateTable[state][charClass & CF_CLASS_MASK];
+
+ KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,
+ charClass, *uc );
+
+ if (state < 0) {
+ break;
+ }
+ ++uc;
+ ++pos;
+ }
+ return pos;
+}
+
+static void khmerAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
+{
+ int end = from + len;
+ const ushort *uc = text + from;
+ uint i = 0;
+ Q_UNUSED(script);
+ attributes += from;
+ while ( i < len ) {
+ bool invalid;
+ uint boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
+
+ attributes[i].graphemeBoundary = true;
+
+ if ( boundary > len-1 ) boundary = len;
+ i++;
+ while ( i < boundary ) {
+ attributes[i].graphemeBoundary = false;
+ ++uc;
+ ++i;
+ }
+ assert( i == boundary );
+ }
+}
+
+
+const CharAttributeFunction charAttributeFunction[] = {
+// Script_Unknown,
+ nullptr,
+// Script_Inherited,
+ nullptr,
+// Script_Common,
+ nullptr,
+// Script_Latin,
+ nullptr,
+// Script_Greek,
+ nullptr,
+// Script_Cyrillic,
+ nullptr,
+// Script_Armenian,
+ nullptr,
+// Script_Hebrew,
+ nullptr,
+// Script_Arabic,
+ nullptr,
+// Script_Syriac,
+ nullptr,
+// Script_Thaana,
+ nullptr,
+// Script_Devanagari,
+ indicAttributes,
+// Script_Bengali,
+ indicAttributes,
+// Script_Gurmukhi,
+ indicAttributes,
+// Script_Gujarati,
+ indicAttributes,
+// Script_Oriya,
+ indicAttributes,
+// Script_Tamil,
+ indicAttributes,
+// Script_Telugu,
+ indicAttributes,
+// Script_Kannada,
+ indicAttributes,
+// Script_Malayalam,
+ indicAttributes,
+// Script_Sinhala,
+ indicAttributes,
+// Script_Thai,
+ thaiAttributes,
+// Script_Lao,
+ nullptr,
+// Script_Tibetan,
+ tibetanAttributes,
+// Script_Myanmar,
+ myanmarAttributes,
+// Script_Georgian,
+ nullptr,
+// Script_Hangul,
+ nullptr,
+// Script_Ethiopic,
+ nullptr,
+// Script_Cherokee,
+ nullptr,
+// Script_CanadianAboriginal,
+ nullptr,
+// Script_Ogham,
+ nullptr,
+// Script_Runic,
+ nullptr,
+// Script_Khmer,
+ khmerAttributes
+};
+
+static void getCharAttributes(const ushort *string, uint stringLength,
+ const QUnicodeTools::ScriptItem *items, uint numItems,
+ QCharAttributes *attributes)
+{
+ if (stringLength == 0)
+ return;
+ for (uint i = 0; i < numItems; ++i) {
+ QChar::Script script = items[i].script;
+ if (script > QChar::Script_Khmer)
+ script = QChar::Script_Common;
+ CharAttributeFunction attributeFunction = charAttributeFunction[script];
+ if (!attributeFunction)
+ continue;
+ int end = i < numItems - 1 ? items[i + 1].position : stringLength;
+ attributeFunction(script, string, items[i].position, end - items[i].position, attributes);
+ }
+}
+
+}
Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
const ScriptItem *items, int numItems,
@@ -750,38 +2236,7 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
if (!items || numItems <= 0)
return;
- QVarLengthArray<HB_ScriptItem, 64> scriptItems;
- scriptItems.reserve(numItems);
- int start = 0;
- HB_Script startScript = script_to_hbscript(items[start].script);
- if (Q_UNLIKELY(startScript == HB_Script_Inherited))
- startScript = HB_Script_Common;
- for (int i = start + 1; i < numItems; ++i) {
- HB_Script script = script_to_hbscript(items[i].script);
- if (Q_LIKELY(script == startScript || script == HB_Script_Inherited))
- continue;
- Q_ASSERT(items[i].position > items[start].position);
- HB_ScriptItem item;
- item.pos = items[start].position;
- item.length = items[i].position - items[start].position;
- item.script = startScript;
- item.bidiLevel = 0; // unused
- scriptItems.append(item);
- start = i;
- startScript = script;
- }
- if (items[start].position + 1 < length) {
- HB_ScriptItem item;
- item.pos = items[start].position;
- item.length = length - items[start].position;
- item.script = startScript;
- item.bidiLevel = 0; // unused
- scriptItems.append(item);
- }
- Q_STATIC_ASSERT(sizeof(QCharAttributes) == sizeof(HB_CharAttributes));
- HB_GetTailoredCharAttributes(string, length,
- scriptItems.constData(), scriptItems.size(),
- reinterpret_cast<HB_CharAttributes *>(attributes));
+ Tailored::getCharAttributes(string, length, items, numItems, attributes);
}
}
@@ -796,7 +2251,7 @@ Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray
{
int sor = 0;
int eor = 0;
- uchar script = QChar::Script_Common;
+ QChar::Script script = QChar::Script_Common;
for (int i = 0; i < length; ++i, eor = i) {
uint ucs4 = string[i];
@@ -810,7 +2265,7 @@ Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
- uchar nscript = prop->script;
+ QChar::Script nscript = QChar::Script(prop->script);
if (Q_LIKELY(nscript == script || nscript <= QChar::Script_Common))
continue;
diff --git a/src/corelib/text/qunicodetools_p.h b/src/corelib/text/qunicodetools_p.h
index 6294d9ceb4..5715444025 100644
--- a/src/corelib/text/qunicodetools_p.h
+++ b/src/corelib/text/qunicodetools_p.h
@@ -1,6 +1,6 @@
/****************************************************************************
**
-** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2020 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -75,7 +75,7 @@ namespace QUnicodeTools {
struct ScriptItem
{
int position;
- int script;
+ QChar::Script script;
};
using ScriptItemArray = QVarLengthArray<ScriptItem, 64>;
diff --git a/src/gui/text/qtextengine.cpp b/src/gui/text/qtextengine.cpp
index 2deae6f4ba..fce3e519d4 100644
--- a/src/gui/text/qtextengine.cpp
+++ b/src/gui/text/qtextengine.cpp
@@ -1985,7 +1985,7 @@ const QCharAttributes *QTextEngine::attributes() const
for (int i = 0; i < layoutData->items.size(); ++i) {
const QScriptItem &si = layoutData->items.at(i);
scriptItems[i].position = si.position;
- scriptItems[i].script = si.analysis.script;
+ scriptItems[i].script = QChar::Script(si.analysis.script);
}
QUnicodeTools::initCharAttributes(reinterpret_cast<const ushort *>(layoutData->string.constData()),