6 files changed, 88 insertions, 46 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 9111ac6379..c3d9dbbd31 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -43,24 +43,12 @@
 #include "qlist.h"
 #include "qendian.h"
 #include "qchar.h"
+#include <private/qunicodetables_p.h>
 
 QT_BEGIN_NAMESPACE
 
 enum { Endian = 0, Data = 1 };
 
-static inline bool isUnicodeNonCharacter(uint ucs4)
-{
-    // Unicode has a couple of "non-characters" that one can use internally,
-    // but are not allowed to be used for text interchange.
-    //
-    // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
-    // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
-    // U+FDEF (inclusive)
-
-    return (ucs4 & 0xfffe) == 0xfffe
-            || (ucs4 - 0xfdd0U) < 32;
-}
-
 QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state)
 {
     uchar replacement = '?';
@@ -120,7 +108,7 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
                 *cursor++ = 0xc0 | ((uchar) (u >> 6));
             } else {
                 // is it one of the Unicode non-characters?
-                if (isUnicodeNonCharacter(u)) {
+                if (QUnicodeTables::isNonCharacter(u)) {
                     *cursor++ = replacement;
                     ++ch;
                     ++invalid;
@@ -196,7 +184,7 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
                     bool nonCharacter;
                     if (!headerdone && uc == 0xfeff) {
                         // don't do anything, just skip the BOM
-                    } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {
+                    } else if (!(nonCharacter = QUnicodeTables::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {
                         // surrogate pair
                         Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
                         *qch++ = QChar::highSurrogate(uc);
diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp
index a17426580f..8c5693c9be 100644
--- a/src/corelib/json/qjsonparser.cpp
+++ b/src/corelib/json/qjsonparser.cpp
@@ -45,6 +45,7 @@
 #include <qdebug.h>
 #include "qjsonparser_p.h"
 #include "qjson_p.h"
+#include <private/qunicodetables_p.h>
 
 //#define PARSER_DEBUG
 #ifdef PARSER_DEBUG
@@ -721,19 +722,6 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint *
     return true;
 }
 
-static inline bool isUnicodeNonCharacter(uint ucs4)
-{
-    // Unicode has a couple of "non-characters" that one can use internally,
-    // but are not allowed to be used for text interchange.
-    //
-    // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
-    // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
-    // U+FDEF (inclusive)
-
-    return (ucs4 & 0xfffe) == 0xfffe
-            || (ucs4 - 0xfdd0U) < 32;
-}
-
 static inline bool scanUtf8Char(const char *&json, const char *end, uint *result)
 {
     int need;
@@ -769,7 +757,7 @@ static inline bool scanUtf8Char(const char *&json, const char *end, uint *result
         uc = (uc << 6) | (ch & 0x3f);
     }
 
-    if (uc < min_uc || isUnicodeNonCharacter(uc) ||
+    if (uc < min_uc || QUnicodeTables::isNonCharacter(uc) ||
         (uc >= 0xd800 && uc <= 0xdfff) || uc >= 0x110000) {
         return false;
     }
diff --git a/src/corelib/json/qjsonwriter.cpp b/src/corelib/json/qjsonwriter.cpp
index 7cdc3f0dba..b086cbdea9 100644
--- a/src/corelib/json/qjsonwriter.cpp
+++ b/src/corelib/json/qjsonwriter.cpp
@@ -41,6 +41,7 @@
 
 #include "qjsonwriter_p.h"
 #include "qjson_p.h"
+#include <private/qunicodetables_p.h>
 
 QT_BEGIN_NAMESPACE
 
@@ -49,21 +50,6 @@ using namespace QJsonPrivate;
 static void objectContentToJson(const QJsonPrivate::Object *o, QByteArray &json, int indent, bool compact);
 static void arrayContentToJson(const QJsonPrivate::Array *a, QByteArray &json, int indent, bool compact);
 
-// some code from qutfcodec.cpp, inlined here for performance reasons
-// to allow fast escaping of strings
-static inline bool isUnicodeNonCharacter(uint ucs4)
-{
-    // Unicode has a couple of "non-characters" that one can use internally,
-    // but are not allowed to be used for text interchange.
-    //
-    // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
-    // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
-    // U+FDEF (inclusive)
-
-    return (ucs4 & 0xfffe) == 0xfffe
-            || (ucs4 - 0xfdd0U) < 32;
-}
-
 static inline uchar hexdig(uint u)
 {
     return (u < 0xa ? '0' + u : 'a' + u - 0xa);
@@ -154,7 +140,7 @@ static QByteArray escapedString(const QString &s)
                 *cursor++ = 0xc0 | ((uchar) (u >> 6));
             } else {
                 // is it one of the Unicode non-characters?
-                if (isUnicodeNonCharacter(u)) {
+                if (QUnicodeTables::isNonCharacter(u)) {
                     *cursor++ = replacement;
                     ++ch;
                     continue;
diff --git a/src/corelib/tools/qunicodetables.cpp b/src/corelib/tools/qunicodetables.cpp
index 04031251e4..9a2a36cd49 100644
--- a/src/corelib/tools/qunicodetables.cpp
+++ b/src/corelib/tools/qunicodetables.cpp
@@ -4348,6 +4348,21 @@ Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2)
     return qGetProp(ucs2);
 }
 
+Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4)
+{
+    return (GraphemeBreak)qGetProp(ucs4)->graphemeBreak;
+}
+
+Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4)
+{
+    return (WordBreak)qGetProp(ucs4)->wordBreak;
+}
+
+Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4)
+{
+    return (SentenceBreak)qGetProp(ucs4)->sentenceBreak;
+}
+
 Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)
 {
     return (LineBreakClass)qGetProp(ucs4)->line_break_class;
diff --git a/src/corelib/tools/qunicodetables_p.h b/src/corelib/tools/qunicodetables_p.h
index 50afebdd9c..15d5415b0b 100644
--- a/src/corelib/tools/qunicodetables_p.h
+++ b/src/corelib/tools/qunicodetables_p.h
@@ -217,6 +217,18 @@ namespace QUnicodeTables {
     };
 
 
+    Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);
+    inline int graphemeBreakClass(QChar ch)
+    { return graphemeBreakClass(ch.unicode()); }
+
+    Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);
+    inline int wordBreakClass(QChar ch)
+    { return wordBreakClass(ch.unicode()); }
+
+    Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);
+    inline int sentenceBreakClass(QChar ch)
+    { return sentenceBreakClass(ch.unicode()); }
+
     Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
     inline int lineBreakClass(QChar ch)
     { return lineBreakClass(ch.unicode()); }
@@ -225,6 +237,18 @@ namespace QUnicodeTables {
     inline int script(QChar ch)
     { return script(ch.unicode()); }
 
+
+    inline bool isNonCharacter(uint ucs4)
+    {
+        // Noncharacter_Code_Point:
+        // Unicode has a couple of "non-characters" that one can use internally,
+        // but are not allowed to be used for text interchange.
+        // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF,
+        // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF
+
+        return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);
+    }
+
 } // namespace QUnicodeTables
 
 QT_END_NAMESPACE
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index b9245ba387..42360f0628 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -397,6 +397,18 @@ static const char *property_string =
     "    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n";
 
 static const char *methods =
+    "    Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);\n"
+    "    inline int graphemeBreakClass(QChar ch)\n"
+    "    { return graphemeBreakClass(ch.unicode()); }\n"
+    "\n"
+    "    Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);\n"
+    "    inline int wordBreakClass(QChar ch)\n"
+    "    { return wordBreakClass(ch.unicode()); }\n"
+    "\n"
+    "    Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);\n"
+    "    inline int sentenceBreakClass(QChar ch)\n"
+    "    { return sentenceBreakClass(ch.unicode()); }\n"
+    "\n"
     "    Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n"
     "    inline int lineBreakClass(QChar ch)\n"
     "    { return lineBreakClass(ch.unicode()); }\n"
@@ -405,6 +417,18 @@ static const char *methods =
     "    inline int script(QChar ch)\n"
     "    { return script(ch.unicode()); }\n\n";
 
+static const char *generated_methods =
+    "    inline bool isNonCharacter(uint ucs4)\n"
+    "    {\n"
+    "        // Noncharacter_Code_Point:\n"
+    "        // Unicode has a couple of \"non-characters\" that one can use internally,\n"
+    "        // but are not allowed to be used for text interchange.\n"
+    "        // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF,\n"
+    "        // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF\n"
+    "\n"
+    "        return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);\n"
+    "    }\n\n";
+
 static const int SizeOfPropertiesStruct = 20;
 
 struct PropertyFlags {
@@ -2275,7 +2299,22 @@ static QByteArray createPropertyInfo()
            "    return qGetProp(ucs2);\n"
            "}\n\n";
 
-    out += "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n"
+    out += "Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4)\n"
+           "{\n"
+           "    return (GraphemeBreak)qGetProp(ucs4)->graphemeBreak;\n"
+           "}\n"
+           "\n"
+           "Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4)\n"
+           "{\n"
+           "    return (WordBreak)qGetProp(ucs4)->wordBreak;\n"
+           "}\n"
+           "\n"
+           "Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4)\n"
+           "{\n"
+           "    return (SentenceBreak)qGetProp(ucs4)->sentenceBreak;\n"
+           "}\n"
+           "\n"
+           "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n"
            "{\n"
            "    return (LineBreakClass)qGetProp(ucs4)->line_break_class;\n"
            "}\n\n";
@@ -2868,6 +2907,8 @@ int main(int, char **)
     f.write(line_break_class_string);
     f.write("\n");
     f.write(methods);
+    f.write("\n");
+    f.write(generated_methods);
     f.write("} // namespace QUnicodeTables\n\n"
             "QT_END_NAMESPACE\n\n"
             "#endif // QUNICODETABLES_P_H\n");