summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKonstantin Ritt <ritt.ks@gmail.com>2012-04-23 07:00:28 +0300
committerQt by Nokia <qt-info@nokia.com>2012-04-25 01:57:04 +0200
commit5e66c35a2379099cc1ec668efc766272bc097dad (patch)
tree57f07497820a140d767a9d4221797089144bad85 /src
parent6fb0110f972b8d9de22c7c06246b68bd7b6ebe50 (diff)
optimize QChar::decomposition()
* by not using QString::fromUtf16() as we know for sure that the data is 'raw' UCS-2; * it's safe to avoid a check for > UNICODE_LAST_CODEPOINT as GET_DECOMPOSITION_INDEX macro already does a similar check Change-Id: Ifb660efc51c664d06733ac8ed46d54278520da06 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/tools/qchar.cpp59
1 files changed, 32 insertions, 27 deletions
diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp
index 74df59ab5b..b83170cb46 100644
--- a/src/corelib/tools/qchar.cpp
+++ b/src/corelib/tools/qchar.cpp
@@ -892,27 +892,26 @@ ushort QChar::mirroredChar(ushort ucs2)
}
+// constants for Hangul (de)composition, see UAX #15
enum {
Hangul_SBase = 0xac00,
Hangul_LBase = 0x1100,
Hangul_VBase = 0x1161,
Hangul_TBase = 0x11a7,
- Hangul_SCount = 11172,
Hangul_LCount = 19,
Hangul_VCount = 21,
Hangul_TCount = 28,
- Hangul_NCount = 21*28
+ Hangul_NCount = Hangul_VCount * Hangul_TCount,
+ Hangul_SCount = Hangul_LCount * Hangul_NCount
};
// buffer has to have a length of 3. It's needed for Hangul decomposition
static const unsigned short * QT_FASTCALL decompositionHelper
(uint ucs4, int *length, int *tag, unsigned short *buffer)
{
- *length = 0;
- if (ucs4 > UNICODE_LAST_CODEPOINT)
- return 0;
if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
- int SIndex = ucs4 - Hangul_SBase;
+ // compute Hangul syllable decomposition as per UAX #15
+ const uint SIndex = ucs4 - Hangul_SBase;
buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
@@ -922,8 +921,12 @@ static const unsigned short * QT_FASTCALL decompositionHelper
}
const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
- if (index == 0xffff)
+ if (index == 0xffff) {
+ *length = 0;
+ *tag = QChar::NoDecomposition;
return 0;
+ }
+
const unsigned short *decomposition = uc_decomposition_map+index;
*tag = (*decomposition) & 0xff;
*length = (*decomposition) >> 8;
@@ -950,7 +953,7 @@ QString QChar::decomposition(uint ucs4)
int length;
int tag;
const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
- return QString::fromUtf16(d, length);
+ return QString(reinterpret_cast<const QChar *>(d), length);
}
/*!
@@ -969,8 +972,6 @@ QChar::Decomposition QChar::decompositionTag() const
*/
QChar::Decomposition QChar::decompositionTag(uint ucs4)
{
- if (ucs4 > UNICODE_LAST_CODEPOINT)
- return QChar::NoDecomposition;
if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount)
return QChar::Canonical;
const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
@@ -1400,6 +1401,8 @@ QDataStream &operator>>(QDataStream &in, QChar &chr)
static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
{
+ int length;
+ int tag;
unsigned short buffer[3];
QString &s = *str;
@@ -1415,18 +1418,18 @@ static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion
ucs4 = QChar::surrogateToUcs4(high, ucs4);
}
}
+
const QChar::UnicodeVersion v = QChar::unicodeVersion(ucs4);
if (v > version || v == QChar::Unicode_Unassigned)
continue;
- int length;
- int tag;
+
const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
if (!d || (canonical && tag != QChar::Canonical))
continue;
int pos = uc - utf16;
s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
- // since the insert invalidates the pointers and we do decomposition recursive
+ // since the replace invalidates the pointers and we do decomposition recursive
utf16 = reinterpret_cast<unsigned short *>(s.data());
uc = utf16 + pos + length;
}
@@ -1445,20 +1448,22 @@ inline bool operator<(const UCS2Pair &ligature, ushort u1)
static ushort ligatureHelper(ushort u1, ushort u2)
{
- // hangul L-V pair
- int LIndex = u1 - Hangul_LBase;
- if (0 <= LIndex && LIndex < Hangul_LCount) {
- int VIndex = u2 - Hangul_VBase;
- if (0 <= VIndex && VIndex < Hangul_VCount)
- return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
- }
-
- // hangul LV-T pair
- int SIndex = u1 - Hangul_SBase;
- if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
- int TIndex = u2 - Hangul_TBase;
- if (0 <= TIndex && TIndex <= Hangul_TCount)
- return u1 + TIndex;
+ if (u1 >= Hangul_LBase && u1 <= Hangul_SBase + Hangul_SCount) {
+ // compute Hangul syllable composition as per UAX #15
+ // hangul L-V pair
+ const uint LIndex = u1 - Hangul_LBase;
+ if (LIndex < Hangul_LCount) {
+ const uint VIndex = u2 - Hangul_VBase;
+ if (VIndex < Hangul_VCount)
+ return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
+ }
+ // hangul LV-T pair
+ const uint SIndex = u1 - Hangul_SBase;
+ if (SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
+ const uint TIndex = u2 - Hangul_TBase;
+ if (TIndex <= Hangul_TCount)
+ return u1 + TIndex;
+ }
}
const unsigned short index = GET_LIGATURE_INDEX(u2);