summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorKonstantin Ritt <ritt.ks@gmail.com>2012-05-09 16:44:36 +0300
committerQt by Nokia <qt-info@nokia.com>2012-05-10 11:34:25 +0200
commit8c0048a377568b646b3b87be0b02322fce68b780 (patch)
tree575e79f7191060be29ba17989bc9676b272e3370 /src/corelib
parent2d23374a6f88bac2f5f2d7f0528e9e554b4a4cbb (diff)
add some useful methods to QUnicodeTables::
in order to reduce code duplication and prepare the ground for upcoming changes Change-Id: I980244149f65384c9484bbec4682de8b7b848b08 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/codecs/qutfcodec.cpp18
-rw-r--r--src/corelib/json/qjsonparser.cpp16
-rw-r--r--src/corelib/json/qjsonwriter.cpp18
-rw-r--r--src/corelib/tools/qunicodetables.cpp15
-rw-r--r--src/corelib/tools/qunicodetables_p.h24
5 files changed, 46 insertions, 45 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 9111ac6379..c3d9dbbd31 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -43,24 +43,12 @@
#include "qlist.h"
#include "qendian.h"
#include "qchar.h"
+#include <private/qunicodetables_p.h>
QT_BEGIN_NAMESPACE
enum { Endian = 0, Data = 1 };
-static inline bool isUnicodeNonCharacter(uint ucs4)
-{
- // Unicode has a couple of "non-characters" that one can use internally,
- // but are not allowed to be used for text interchange.
- //
- // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
- // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
- // U+FDEF (inclusive)
-
- return (ucs4 & 0xfffe) == 0xfffe
- || (ucs4 - 0xfdd0U) < 32;
-}
-
QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state)
{
uchar replacement = '?';
@@ -120,7 +108,7 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
// is it one of the Unicode non-characters?
- if (isUnicodeNonCharacter(u)) {
+ if (QUnicodeTables::isNonCharacter(u)) {
*cursor++ = replacement;
++ch;
++invalid;
@@ -196,7 +184,7 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
bool nonCharacter;
if (!headerdone && uc == 0xfeff) {
// don't do anything, just skip the BOM
- } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {
+ } else if (!(nonCharacter = QUnicodeTables::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {
// surrogate pair
Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
*qch++ = QChar::highSurrogate(uc);
diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp
index a17426580f..8c5693c9be 100644
--- a/src/corelib/json/qjsonparser.cpp
+++ b/src/corelib/json/qjsonparser.cpp
@@ -45,6 +45,7 @@
#include <qdebug.h>
#include "qjsonparser_p.h"
#include "qjson_p.h"
+#include <private/qunicodetables_p.h>
//#define PARSER_DEBUG
#ifdef PARSER_DEBUG
@@ -721,19 +722,6 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint *
return true;
}
-static inline bool isUnicodeNonCharacter(uint ucs4)
-{
- // Unicode has a couple of "non-characters" that one can use internally,
- // but are not allowed to be used for text interchange.
- //
- // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
- // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
- // U+FDEF (inclusive)
-
- return (ucs4 & 0xfffe) == 0xfffe
- || (ucs4 - 0xfdd0U) < 32;
-}
-
static inline bool scanUtf8Char(const char *&json, const char *end, uint *result)
{
int need;
@@ -769,7 +757,7 @@ static inline bool scanUtf8Char(const char *&json, const char *end, uint *result
uc = (uc << 6) | (ch & 0x3f);
}
- if (uc < min_uc || isUnicodeNonCharacter(uc) ||
+ if (uc < min_uc || QUnicodeTables::isNonCharacter(uc) ||
(uc >= 0xd800 && uc <= 0xdfff) || uc >= 0x110000) {
return false;
}
diff --git a/src/corelib/json/qjsonwriter.cpp b/src/corelib/json/qjsonwriter.cpp
index 7cdc3f0dba..b086cbdea9 100644
--- a/src/corelib/json/qjsonwriter.cpp
+++ b/src/corelib/json/qjsonwriter.cpp
@@ -41,6 +41,7 @@
#include "qjsonwriter_p.h"
#include "qjson_p.h"
+#include <private/qunicodetables_p.h>
QT_BEGIN_NAMESPACE
@@ -49,21 +50,6 @@ using namespace QJsonPrivate;
static void objectContentToJson(const QJsonPrivate::Object *o, QByteArray &json, int indent, bool compact);
static void arrayContentToJson(const QJsonPrivate::Array *a, QByteArray &json, int indent, bool compact);
-// some code from qutfcodec.cpp, inlined here for performance reasons
-// to allow fast escaping of strings
-static inline bool isUnicodeNonCharacter(uint ucs4)
-{
- // Unicode has a couple of "non-characters" that one can use internally,
- // but are not allowed to be used for text interchange.
- //
- // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
- // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
- // U+FDEF (inclusive)
-
- return (ucs4 & 0xfffe) == 0xfffe
- || (ucs4 - 0xfdd0U) < 32;
-}
-
static inline uchar hexdig(uint u)
{
return (u < 0xa ? '0' + u : 'a' + u - 0xa);
@@ -154,7 +140,7 @@ static QByteArray escapedString(const QString &s)
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
// is it one of the Unicode non-characters?
- if (isUnicodeNonCharacter(u)) {
+ if (QUnicodeTables::isNonCharacter(u)) {
*cursor++ = replacement;
++ch;
continue;
diff --git a/src/corelib/tools/qunicodetables.cpp b/src/corelib/tools/qunicodetables.cpp
index 04031251e4..9a2a36cd49 100644
--- a/src/corelib/tools/qunicodetables.cpp
+++ b/src/corelib/tools/qunicodetables.cpp
@@ -4348,6 +4348,21 @@ Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2)
return qGetProp(ucs2);
}
+Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4)
+{
+ return (GraphemeBreak)qGetProp(ucs4)->graphemeBreak;
+}
+
+Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4)
+{
+ return (WordBreak)qGetProp(ucs4)->wordBreak;
+}
+
+Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4)
+{
+ return (SentenceBreak)qGetProp(ucs4)->sentenceBreak;
+}
+
Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)
{
return (LineBreakClass)qGetProp(ucs4)->line_break_class;
diff --git a/src/corelib/tools/qunicodetables_p.h b/src/corelib/tools/qunicodetables_p.h
index 50afebdd9c..15d5415b0b 100644
--- a/src/corelib/tools/qunicodetables_p.h
+++ b/src/corelib/tools/qunicodetables_p.h
@@ -217,6 +217,18 @@ namespace QUnicodeTables {
};
+ Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);
+ inline int graphemeBreakClass(QChar ch)
+ { return graphemeBreakClass(ch.unicode()); }
+
+ Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);
+ inline int wordBreakClass(QChar ch)
+ { return wordBreakClass(ch.unicode()); }
+
+ Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);
+ inline int sentenceBreakClass(QChar ch)
+ { return sentenceBreakClass(ch.unicode()); }
+
Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
inline int lineBreakClass(QChar ch)
{ return lineBreakClass(ch.unicode()); }
@@ -225,6 +237,18 @@ namespace QUnicodeTables {
inline int script(QChar ch)
{ return script(ch.unicode()); }
+
+ inline bool isNonCharacter(uint ucs4)
+ {
+ // Noncharacter_Code_Point:
+ // Unicode has a couple of "non-characters" that one can use internally,
+ // but are not allowed to be used for text interchange.
+ // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF,
+ // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF
+
+ return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);
+ }
+
} // namespace QUnicodeTables
QT_END_NAMESPACE