From 8c0048a377568b646b3b87be0b02322fce68b780 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Wed, 9 May 2012 16:44:36 +0300 Subject: add some useful methods to QUnicodeTables:: in order to reduce code duplication and prepare the ground for upcoming changes Change-Id: I980244149f65384c9484bbec4682de8b7b848b08 Reviewed-by: Lars Knoll --- src/corelib/codecs/qutfcodec.cpp | 18 +++------------ src/corelib/json/qjsonparser.cpp | 16 ++------------ src/corelib/json/qjsonwriter.cpp | 18 ++------------- src/corelib/tools/qunicodetables.cpp | 15 +++++++++++++ src/corelib/tools/qunicodetables_p.h | 24 ++++++++++++++++++++ util/unicode/main.cpp | 43 +++++++++++++++++++++++++++++++++++- 6 files changed, 88 insertions(+), 46 deletions(-) diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 9111ac6379..c3d9dbbd31 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -43,24 +43,12 @@ #include "qlist.h" #include "qendian.h" #include "qchar.h" +#include QT_BEGIN_NAMESPACE enum { Endian = 0, Data = 1 }; -static inline bool isUnicodeNonCharacter(uint ucs4) -{ - // Unicode has a couple of "non-characters" that one can use internally, - // but are not allowed to be used for text interchange. - // - // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, - // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and - // U+FDEF (inclusive) - - return (ucs4 & 0xfffe) == 0xfffe - || (ucs4 - 0xfdd0U) < 32; -} - QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state) { uchar replacement = '?'; @@ -120,7 +108,7 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { // is it one of the Unicode non-characters? - if (isUnicodeNonCharacter(u)) { + if (QUnicodeTables::isNonCharacter(u)) { *cursor++ = replacement; ++ch; ++invalid; @@ -196,7 +184,7 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte bool nonCharacter; if (!headerdone && uc == 0xfeff) { // don't do anything, just skip the BOM - } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) { + } else if (!(nonCharacter = QUnicodeTables::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) { // surrogate pair Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length()); *qch++ = QChar::highSurrogate(uc); diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp index a17426580f..8c5693c9be 100644 --- a/src/corelib/json/qjsonparser.cpp +++ b/src/corelib/json/qjsonparser.cpp @@ -45,6 +45,7 @@ #include #include "qjsonparser_p.h" #include "qjson_p.h" +#include //#define PARSER_DEBUG #ifdef PARSER_DEBUG @@ -721,19 +722,6 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint * return true; } -static inline bool isUnicodeNonCharacter(uint ucs4) -{ - // Unicode has a couple of "non-characters" that one can use internally, - // but are not allowed to be used for text interchange. - // - // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, - // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and - // U+FDEF (inclusive) - - return (ucs4 & 0xfffe) == 0xfffe - || (ucs4 - 0xfdd0U) < 32; -} - static inline bool scanUtf8Char(const char *&json, const char *end, uint *result) { int need; @@ -769,7 +757,7 @@ static inline bool scanUtf8Char(const char *&json, const char *end, uint *result uc = (uc << 6) | (ch & 0x3f); } - if (uc < min_uc || isUnicodeNonCharacter(uc) || + if (uc < min_uc || QUnicodeTables::isNonCharacter(uc) || (uc >= 0xd800 && uc <= 0xdfff) || uc >= 0x110000) { return false; } diff --git a/src/corelib/json/qjsonwriter.cpp b/src/corelib/json/qjsonwriter.cpp index 7cdc3f0dba..b086cbdea9 100644 --- a/src/corelib/json/qjsonwriter.cpp +++ b/src/corelib/json/qjsonwriter.cpp @@ -41,6 +41,7 @@ #include "qjsonwriter_p.h" #include "qjson_p.h" +#include QT_BEGIN_NAMESPACE @@ -49,21 +50,6 @@ using namespace QJsonPrivate; static void objectContentToJson(const QJsonPrivate::Object *o, QByteArray &json, int indent, bool compact); static void arrayContentToJson(const QJsonPrivate::Array *a, QByteArray &json, int indent, bool compact); -// some code from qutfcodec.cpp, inlined here for performance reasons -// to allow fast escaping of strings -static inline bool isUnicodeNonCharacter(uint ucs4) -{ - // Unicode has a couple of "non-characters" that one can use internally, - // but are not allowed to be used for text interchange. - // - // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, - // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and - // U+FDEF (inclusive) - - return (ucs4 & 0xfffe) == 0xfffe - || (ucs4 - 0xfdd0U) < 32; -} - static inline uchar hexdig(uint u) { return (u < 0xa ? '0' + u : 'a' + u - 0xa); @@ -154,7 +140,7 @@ static QByteArray escapedString(const QString &s) *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { // is it one of the Unicode non-characters? - if (isUnicodeNonCharacter(u)) { + if (QUnicodeTables::isNonCharacter(u)) { *cursor++ = replacement; ++ch; continue; diff --git a/src/corelib/tools/qunicodetables.cpp b/src/corelib/tools/qunicodetables.cpp index 04031251e4..9a2a36cd49 100644 --- a/src/corelib/tools/qunicodetables.cpp +++ b/src/corelib/tools/qunicodetables.cpp @@ -4348,6 +4348,21 @@ Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2) return qGetProp(ucs2); } +Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4) +{ + return (GraphemeBreak)qGetProp(ucs4)->graphemeBreak; +} + +Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4) +{ + return (WordBreak)qGetProp(ucs4)->wordBreak; +} + +Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4) +{ + return (SentenceBreak)qGetProp(ucs4)->sentenceBreak; +} + Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4) { return (LineBreakClass)qGetProp(ucs4)->line_break_class; diff --git a/src/corelib/tools/qunicodetables_p.h b/src/corelib/tools/qunicodetables_p.h index 50afebdd9c..15d5415b0b 100644 --- a/src/corelib/tools/qunicodetables_p.h +++ b/src/corelib/tools/qunicodetables_p.h @@ -217,6 +217,18 @@ namespace QUnicodeTables { }; + Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4); + inline int graphemeBreakClass(QChar ch) + { return graphemeBreakClass(ch.unicode()); } + + Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4); + inline int wordBreakClass(QChar ch) + { return wordBreakClass(ch.unicode()); } + + Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4); + inline int sentenceBreakClass(QChar ch) + { return sentenceBreakClass(ch.unicode()); } + Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4); inline int lineBreakClass(QChar ch) { return lineBreakClass(ch.unicode()); } @@ -225,6 +237,18 @@ namespace QUnicodeTables { inline int script(QChar ch) { return script(ch.unicode()); } + + inline bool isNonCharacter(uint ucs4) + { + // Noncharacter_Code_Point: + // Unicode has a couple of "non-characters" that one can use internally, + // but are not allowed to be used for text interchange. + // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF, + // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF + + return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe); + } + } // namespace QUnicodeTables QT_END_NAMESPACE diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index b9245ba387..42360f0628 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -397,6 +397,18 @@ static const char *property_string = " Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n"; static const char *methods = + " Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);\n" + " inline int graphemeBreakClass(QChar ch)\n" + " { return graphemeBreakClass(ch.unicode()); }\n" + "\n" + " Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);\n" + " inline int wordBreakClass(QChar ch)\n" + " { return wordBreakClass(ch.unicode()); }\n" + "\n" + " Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);\n" + " inline int sentenceBreakClass(QChar ch)\n" + " { return sentenceBreakClass(ch.unicode()); }\n" + "\n" " Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n" " inline int lineBreakClass(QChar ch)\n" " { return lineBreakClass(ch.unicode()); }\n" @@ -405,6 +417,18 @@ static const char *methods = " inline int script(QChar ch)\n" " { return script(ch.unicode()); }\n\n"; +static const char *generated_methods = + " inline bool isNonCharacter(uint ucs4)\n" + " {\n" + " // Noncharacter_Code_Point:\n" + " // Unicode has a couple of \"non-characters\" that one can use internally,\n" + " // but are not allowed to be used for text interchange.\n" + " // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF,\n" + " // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF\n" + "\n" + " return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);\n" + " }\n\n"; + static const int SizeOfPropertiesStruct = 20; struct PropertyFlags { @@ -2275,7 +2299,22 @@ static QByteArray createPropertyInfo() " return qGetProp(ucs2);\n" "}\n\n"; - out += "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n" + out += "Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4)\n" + "{\n" + " return (GraphemeBreak)qGetProp(ucs4)->graphemeBreak;\n" + "}\n" + "\n" + "Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4)\n" + "{\n" + " return (WordBreak)qGetProp(ucs4)->wordBreak;\n" + "}\n" + "\n" + "Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4)\n" + "{\n" + " return (SentenceBreak)qGetProp(ucs4)->sentenceBreak;\n" + "}\n" + "\n" + "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n" "{\n" " return (LineBreakClass)qGetProp(ucs4)->line_break_class;\n" "}\n\n"; @@ -2868,6 +2907,8 @@ int main(int, char **) f.write(line_break_class_string); f.write("\n"); f.write(methods); + f.write("\n"); + f.write(generated_methods); f.write("} // namespace QUnicodeTables\n\n" "QT_END_NAMESPACE\n\n" "#endif // QUNICODETABLES_P_H\n"); -- cgit v1.2.3