From 2fbb69a09361bf1ecf517516f76405c3be494d6d Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Thu, 22 Nov 2012 03:25:05 +0200 Subject: QTBF: Fix issue with no splitting the words at "." (FULL STOP) As of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet for better URL and abbreviations handling which caused "hi.there" to be treated like if it were just a single word; until we have the Unicode Text Segmentation tailoring mechanism, retain the old behavior by remapping (some of) those characters back to their old values. Change-Id: I49dea6064f2ea40a82fc0b1bc3c4f0b4e803919f Reviewed-by: David Faure Reviewed-by: Lars Knoll --- util/unicode/main.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'util') diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 7bc667ca14..8e612f0b03 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -1572,6 +1572,15 @@ static void readWordBreak() qFatal("unassigned word break class: %s", l[1].constData()); for (int codepoint = from; codepoint <= to; ++codepoint) { + // ### [ + // as of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet + // which caused "hi.there" to be treated like if it were just a single word; + // until we have a tailoring mechanism, retain the old behavior by remapping those characters here. + if (codepoint == 0x002E) // FULL STOP + brk = WordBreak_MidNum; + else if (codepoint == 0x003A) // COLON + brk = WordBreak_Other; + // ] ### UnicodeData &ud = UnicodeData::valueRef(codepoint); ud.p.wordBreakClass = brk; } -- cgit v1.2.3