diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2013-12-12 22:52:08 -0800 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2014-01-31 21:51:14 +0100 |
commit | 2cfdb3526975130dad9536ec5d3da642021ab0d0 (patch) | |
tree | 00a1f1eda09fd0691017699c696e7cfe926bc5e5 /src/corelib/tools/qstring.cpp | |
parent | f32a5b158f3929a8f391240b4f21dde1db294637 (diff) |
Merge qMemEquals and ucstrncmp
qMemEquals is the same as ucstrncmp and checking that the comparison
was 0, so implement it like that. Meanwhile, it already had code for
doing 32-bit comparisons in the absence of higher SIMD optimizations,
which we can now make use of.
Change-Id: If26b446856e3b756efabc98b471fcdc344a8e732
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Diffstat (limited to 'src/corelib/tools/qstring.cpp')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 122 |
1 files changed, 38 insertions, 84 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 1dd2832ad9..269b592f62 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -257,84 +257,11 @@ static int ucstrncmp(const QChar *a, const QChar *b, int l) } } #endif - - while (l-- && *a == *b) - a++,b++; - if (l==-1) - return 0; - return a->unicode() - b->unicode(); -} - -// Unicode case-sensitive comparison -static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen) -{ - if (a == b && alen == blen) + if (!l) return 0; - int l = qMin(alen, blen); - int cmp = ucstrncmp(a, b, l); - return cmp ? cmp : (alen-blen); -} - -// Unicode case-insensitive compare two same-sized strings -static int ucstrnicmp(const ushort *a, const ushort *b, int l) -{ - return ucstricmp(a, a + l, b, b + l); -} - -// Benchmarking indicates that doing memcmp is much slower than -// executing the comparison ourselves. -// -// The profiling was done on a population of calls to qMemEquals, generated -// during a run of the demo browser. The profile of the data (32-bit x86 -// Linux) was: -// -// total number of comparisons: 21353 -// longest string compared: 95 -// average comparison length: 14.8786 -// cache-line crosses: 5661 (13.3%) -// alignment histogram: -// 0xXXX0 = 512 (1.2%) strings, 0 (0.0%) of which same-aligned -// 0xXXX2 = 15087 (35.3%) strings, 5145 (34.1%) of which same-aligned -// 0xXXX4 = 525 (1.2%) strings, 0 (0.0%) of which same-aligned -// 0xXXX6 = 557 (1.3%) strings, 6 (1.1%) of which same-aligned -// 0xXXX8 = 509 (1.2%) strings, 0 (0.0%) of which same-aligned -// 0xXXXa = 24358 (57.0%) strings, 9901 (40.6%) of which same-aligned -// 0xXXXc = 557 (1.3%) strings, 0 (0.0%) of which same-aligned -// 0xXXXe = 601 (1.4%) strings, 15 (2.5%) of which same-aligned -// total = 42706 (100%) strings, 15067 (35.3%) of which same-aligned -// -// 92% of the strings have alignment of 2 or 10, which is due to malloc on -// 32-bit Linux returning values aligned to 8 bytes, and offsetof(array, QString::Data) == 18. -// -// The profile on 64-bit will be different since offsetof(array, QString::Data) == 26. -// -// The benchmark results were, for a Core-i7 @ 2.67 GHz 32-bit, compiled with -O3 -funroll-loops: -// 16-bit loads only: 872,301 CPU ticks [Qt 4.5 / memcmp] -// 32- and 16-bit loads: 773,362 CPU ticks [Qt 4.6] -// SSE2 "movdqu" 128-bit loads: 618,736 CPU ticks -// SSE3 "lddqu" 128-bit loads: 619,954 CPU ticks -// SSSE3 "palignr" corrections: 852,147 CPU ticks -// SSE4.2 "pcmpestrm": 738,702 CPU ticks -// -// The same benchmark on an Atom N450 @ 1.66 GHz, is: -// 16-bit loads only: 2,185,882 CPU ticks -// 32- and 16-bit loads: 1,805,060 CPU ticks -// SSE2 "movdqu" 128-bit loads: 2,529,843 CPU ticks -// SSE3 "lddqu" 128-bit loads: 2,514,858 CPU ticks -// SSSE3 "palignr" corrections: 2,160,325 CPU ticks -// SSE4.2 not available -// -// The conclusion we reach is that alignment the SSE2 unaligned code can gain -// 20% improvement in performance in some systems, but suffers a penalty due -// to the unaligned loads on others. - -static bool qMemEquals(const quint16 *a, const quint16 *b, int length) -{ - if (a == b || !length) - return true; union { - const quint16 *w; + const QChar *w; const quint32 *d; quintptr value; } sa, sb; @@ -348,8 +275,8 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length) // both addresses are not aligned to 4-bytes boundaries // compare the first character if (*sa.w != *sb.w) - return false; - --length; + return sa.w->unicode() - sb.w->unicode(); + --l; ++sa.w; ++sb.w; @@ -358,23 +285,50 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length) // both addresses are 4-bytes aligned // do a fast 32-bit comparison - const quint32 *e = sa.d + (length >> 1); + const quint32 *e = sa.d + (l >> 1); for ( ; sa.d != e; ++sa.d, ++sb.d) { - if (*sa.d != *sb.d) - return false; + if (*sa.d != *sb.d) { + if (*sa.w != *sb.w) + return sa.w->unicode() - sb.w->unicode(); + return sa.w[1].unicode() - sb.w[1].unicode(); + } } // do we have a tail? - return (length & 1) ? *sa.w == *sb.w : true; + return (l & 1) ? sa.w->unicode() - sb.w->unicode() : 0; } else { // one of the addresses isn't 4-byte aligned but the other is - const quint16 *e = sa.w + length; + const QChar *e = sa.w + l; for ( ; sa.w != e; ++sa.w, ++sb.w) { if (*sa.w != *sb.w) - return false; + return sa.w->unicode() - sb.w->unicode(); } } - return true; + return 0; +} + +// Unicode case-sensitive comparison +static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen) +{ + if (a == b && alen == blen) + return 0; + int l = qMin(alen, blen); + int cmp = ucstrncmp(a, b, l); + return cmp ? cmp : (alen-blen); +} + +// Unicode case-insensitive compare two same-sized strings +static int ucstrnicmp(const ushort *a, const ushort *b, int l) +{ + return ucstricmp(a, a + l, b, b + l); +} + +static bool qMemEquals(const quint16 *a, const quint16 *b, int length) +{ + if (a == b || !length) + return true; + + return ucstrncmp(reinterpret_cast<const QChar *>(a), reinterpret_cast<const QChar *>(b), length) == 0; } /*! |