diff options
Diffstat (limited to 'src/declarative/qml/ftw/qhashedstring.cpp')
-rw-r--r-- | src/declarative/qml/ftw/qhashedstring.cpp | 479 |
1 files changed, 479 insertions, 0 deletions
diff --git a/src/declarative/qml/ftw/qhashedstring.cpp b/src/declarative/qml/ftw/qhashedstring.cpp new file mode 100644 index 0000000000..4a7b3376cc --- /dev/null +++ b/src/declarative/qml/ftw/qhashedstring.cpp @@ -0,0 +1,479 @@ +/**************************************************************************** +** +** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtDeclarative module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** GNU Lesser General Public License Usage +** This file may be used under the terms of the GNU Lesser General Public +** License version 2.1 as published by the Free Software Foundation and +** appearing in the file LICENSE.LGPL included in the packaging of this +** file. Please review the following information to ensure the GNU Lesser +** General Public License version 2.1 requirements will be met: +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU General +** Public License version 3.0 as published by the Free Software Foundation +** and appearing in the file LICENSE.GPL included in the packaging of this +** file. Please review the following information to ensure the GNU General +** Public License version 3.0 requirements will be met: +** http://www.gnu.org/copyleft/gpl.html. +** +** Other Usage +** Alternatively, this file may be used in accordance with the terms and +** conditions contained in a signed written agreement between you and Nokia. +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qhashedstring_p.h" + +// This is a reimplementation of V8's string hash algorithm. It is significantly +// faster to do it here than call into V8, but it adds the maintainence burden of +// ensuring that the two hashes are identical. We Q_ASSERT() that the two return +// the same value. If these asserts start to fail, the hash code needs to be +// synced with V8. +namespace String { + static const int kMaxArrayIndexSize = 10; + static const int kMaxHashCalcLength = 16383; + static const int kNofHashBitFields = 2; + static const int kHashShift = kNofHashBitFields; + static const int kIsNotArrayIndexMask = 1 << 1; + static const int kArrayIndexValueBits = 24; + static const int kArrayIndexHashLengthShift = kArrayIndexValueBits + kNofHashBitFields; + static const int kMaxCachedArrayIndexLength = 7; +}; + +template <typename schar> +uint32_t calculateHash(const schar* chars, int length) { + if (length > String::kMaxHashCalcLength) { + // V8 trivial hash + return (length << String::kHashShift) | String::kIsNotArrayIndexMask; + } + + uint32_t raw_running_hash = 0; + uint32_t array_index = 0; + bool is_array_index = (0 < length && length <= String::kMaxArrayIndexSize); + bool is_first_char = true; + + int ii = 0; + for (;is_array_index && ii < length; ++ii) { + quint32 c = *chars++; + + raw_running_hash += c; + raw_running_hash += (raw_running_hash << 10); + raw_running_hash ^= (raw_running_hash >> 6); + + if (c < '0' || c > '9') { + is_array_index = false; + } else { + int d = c - '0'; + if (is_first_char) { + is_first_char = false; + if (c == '0' && length > 1) { + is_array_index = false; + continue; + } + } + if (array_index > 429496729U - ((d + 2) >> 3)) { + is_array_index = false; + } else { + array_index = array_index * 10 + d; + } + } + } + + for (;ii < length; ++ii) { + raw_running_hash += *chars++; + raw_running_hash += (raw_running_hash << 10); + raw_running_hash ^= (raw_running_hash >> 6); + } + + if (is_array_index) { + array_index <<= String::kHashShift; + array_index |= length << String::kArrayIndexHashLengthShift; + return array_index; + } else { + raw_running_hash += (raw_running_hash << 3); + raw_running_hash ^= (raw_running_hash >> 11); + raw_running_hash += (raw_running_hash << 15); + if (raw_running_hash == 0) { + raw_running_hash = 27; + } + + return (raw_running_hash << String::kHashShift) | String::kIsNotArrayIndexMask; + } +} + +inline quint32 stringHash(const QChar* data, int length) +{ + quint32 rv = calculateHash<quint16>((quint16*)data, length) >> String::kHashShift; + Q_ASSERT(rv == v8::String::ComputeHash((uint16_t*)data, length)); + return rv; +} + +inline quint32 stringHash(const char *data, int length) +{ + quint32 rv = calculateHash<quint8>((quint8*)data, length) >> String::kHashShift; + Q_ASSERT(rv == v8::String::ComputeHash((char *)data, length)); + return rv; +} + +void QHashedString::computeHash() const +{ + m_hash = stringHash(constData(), length()); +} + +void QHashedStringRef::computeHash() const +{ + m_hash = stringHash(m_data, m_length); +} + +void QHashedCStringRef::computeHash() const +{ + m_hash = stringHash(m_data, m_length); +} + +/* + A QHash has initially around pow(2, MinNumBits) buckets. For + example, if MinNumBits is 4, it has 17 buckets. +*/ +const int MinNumBits = 4; + +/* + The prime_deltas array is a table of selected prime values, even + though it doesn't look like one. The primes we are using are 1, + 2, 5, 11, 17, 37, 67, 131, 257, ..., i.e. primes in the immediate + surrounding of a power of two. + + The primeForNumBits() function returns the prime associated to a + power of two. For example, primeForNumBits(8) returns 257. +*/ + +static const uchar prime_deltas[] = { + 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3, + 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0 +}; + +static inline int primeForNumBits(int numBits) +{ + return (1 << numBits) + prime_deltas[numBits]; +} + +void QStringHashData::rehashToSize(int size) +{ + short bits = qMax(MinNumBits, (int)numBits); + while (primeForNumBits(bits) < size) bits++; + + if (bits > numBits) + rehashToBits(bits); +} + +void QStringHashData::rehashToBits(short bits) +{ + numBits = qMax(MinNumBits, (int)bits); + + int nb = primeForNumBits(numBits); + if (nb == numBuckets && buckets) + return; + + numBuckets = nb; + + delete [] buckets; + buckets = new QStringHashNode *[numBuckets]; + ::memset(buckets, 0, sizeof(QStringHashNode *) * numBuckets); + + QStringHashNode *nodeList = nodes; + while (nodeList) { + int bucket = nodeList->hash % numBuckets; + nodeList->next = buckets[bucket]; + buckets[bucket] = nodeList; + + nodeList = nodeList->nlist; + } +} + +// Copy of QString's qMemCompare +bool QHashedString::compare(const QChar *lhs, const QChar *rhs, int length) +{ + Q_ASSERT(lhs && rhs); + const quint16 *a = (const quint16 *)lhs; + const quint16 *b = (const quint16 *)rhs; + + if (a == b || !length) + return true; + + register union { + const quint16 *w; + const quint32 *d; + quintptr value; + } sa, sb; + sa.w = a; + sb.w = b; + + // check alignment + if ((sa.value & 2) == (sb.value & 2)) { + // both addresses have the same alignment + if (sa.value & 2) { + // both addresses are not aligned to 4-bytes boundaries + // compare the first character + if (*sa.w != *sb.w) + return false; + --length; + ++sa.w; + ++sb.w; + + // now both addresses are 4-bytes aligned + } + + // both addresses are 4-bytes aligned + // do a fast 32-bit comparison + register const quint32 *e = sa.d + (length >> 1); + for ( ; sa.d != e; ++sa.d, ++sb.d) { + if (*sa.d != *sb.d) + return false; + } + + // do we have a tail? + return (length & 1) ? *sa.w == *sb.w : true; + } else { + // one of the addresses isn't 4-byte aligned but the other is + register const quint16 *e = sa.w + length; + for ( ; sa.w != e; ++sa.w, ++sb.w) { + if (*sa.w != *sb.w) + return false; + } + } + return true; +} + +// Unicode stuff +static inline bool isUnicodeNonCharacter(uint ucs4) +{ + // Unicode has a couple of "non-characters" that one can use internally, + // but are not allowed to be used for text interchange. + // + // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, + // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and + // U+FDEF (inclusive) + + return (ucs4 & 0xfffe) == 0xfffe + || (ucs4 - 0xfdd0U) < 16; +} + +static int utf8LengthFromUtf16(const QChar *uc, int len) +{ + int length = 0; + + int surrogate_high = -1; + + const QChar *ch = uc; + int invalid = 0; + + const QChar *end = ch + len; + while (ch < end) { + uint u = ch->unicode(); + if (surrogate_high >= 0) { + if (u >= 0xdc00 && u < 0xe000) { + u = (surrogate_high - 0xd800)*0x400 + (u - 0xdc00) + 0x10000; + surrogate_high = -1; + } else { + // high surrogate without low + ++ch; + ++invalid; + surrogate_high = -1; + continue; + } + } else if (u >= 0xdc00 && u < 0xe000) { + // low surrogate without high + ++ch; + ++invalid; + continue; + } else if (u >= 0xd800 && u < 0xdc00) { + surrogate_high = u; + ++ch; + continue; + } + + if (u < 0x80) { + ++length; + } else { + if (u < 0x0800) { + ++length; + } else { + // is it one of the Unicode non-characters? + if (isUnicodeNonCharacter(u)) { + ++length; + ++ch; + ++invalid; + continue; + } + + if (u > 0xffff) { + ++length; + ++length; + } else { + ++length; + } + ++length; + } + ++length; + } + ++ch; + } + + return length; +} + +// Writes the utf8 version of uc to output. uc is of length len. +// There must be at least utf8LengthFromUtf16(uc, len) bytes in output. +// A null terminator is not written. +static void utf8FromUtf16(char *output, const QChar *uc, int len) +{ + uchar replacement = '?'; + int surrogate_high = -1; + + uchar* cursor = (uchar*)output; + const QChar *ch = uc; + int invalid = 0; + + const QChar *end = ch + len; + while (ch < end) { + uint u = ch->unicode(); + if (surrogate_high >= 0) { + if (u >= 0xdc00 && u < 0xe000) { + u = (surrogate_high - 0xd800)*0x400 + (u - 0xdc00) + 0x10000; + surrogate_high = -1; + } else { + // high surrogate without low + *cursor = replacement; + ++ch; + ++invalid; + surrogate_high = -1; + continue; + } + } else if (u >= 0xdc00 && u < 0xe000) { + // low surrogate without high + *cursor = replacement; + ++ch; + ++invalid; + continue; + } else if (u >= 0xd800 && u < 0xdc00) { + surrogate_high = u; + ++ch; + continue; + } + + if (u < 0x80) { + *cursor++ = (uchar)u; + } else { + if (u < 0x0800) { + *cursor++ = 0xc0 | ((uchar) (u >> 6)); + } else { + // is it one of the Unicode non-characters? + if (isUnicodeNonCharacter(u)) { + *cursor++ = replacement; + ++ch; + ++invalid; + continue; + } + + if (u > 0xffff) { + *cursor++ = 0xf0 | ((uchar) (u >> 18)); + *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); + } else { + *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f); + } + *cursor++ = 0x80 | (((uchar) (u >> 6)) & 0x3f); + } + *cursor++ = 0x80 | ((uchar) (u&0x3f)); + } + ++ch; + } +} + +void QHashedStringRef::computeUtf8Length() const +{ + if (m_length) + m_utf8length = utf8LengthFromUtf16(m_data, m_length); + else + m_utf8length = 0; +} + +QHashedStringRef QHashedStringRef::mid(int offset, int length) const +{ + Q_ASSERT(offset < m_length); + return QHashedStringRef(m_data + offset, + (length == -1 || (offset + length) > m_length)?(m_length - offset):length); +} + +bool QHashedStringRef::endsWith(const QString &s) const +{ + return s.length() < m_length && + QHashedString::compare(s.constData(), m_data + m_length - s.length(), s.length()); +} + +bool QHashedStringRef::startsWith(const QString &s) const +{ + return s.length() < m_length && + QHashedString::compare(s.constData(), m_data, s.length()); +} + +QString QHashedStringRef::toString() const +{ + if (m_length == 0) + return QString(); + return QString(m_data, m_length); +} + +QByteArray QHashedStringRef::toUtf8() const +{ + if (m_length == 0) + return QByteArray(); + + QByteArray result; + result.resize(utf8length()); + writeUtf8(result.data()); + return result; +} + +void QHashedStringRef::writeUtf8(char *output) const +{ + if (m_length) { + int ulen = utf8length(); + if (ulen == m_length) { + // Must be a latin1 string + uchar *o = (uchar *)output; + const QChar *c = m_data; + while (ulen--) + *o++ = (uchar)((*c++).unicode()); + } else { + utf8FromUtf16(output, m_data, m_length); + } + } +} + +QString QHashedCStringRef::toUtf16() const +{ + if (m_length == 0) + return QString(); + + QString rv; + rv.resize(m_length); + writeUtf16((uint16_t*)rv.data()); + return rv; +} + |