diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2020-09-06 21:58:36 +0000 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2020-09-09 04:00:28 +0000 |
commit | 475464906cf8a4edd29510975af8bc3af4e7d9cc (patch) | |
tree | 1f06e55b9f99f5b799edab0f3383fdd003ed1fff /src/corelib/thread/qsemaphore.cpp | |
parent | e33d40a88476e8a9d4f950fb285c75d0c1a92a2e (diff) |
Revert "Cleanup QSemaphore and make it always 64bit large"
This reverts commit ff69227a49119c00f703cf89c9d72db7eeeeaa66.
Reason for revert: 64-bit atomics on 32-bit systems are often
(but not always) worse than the 32-bit semaphore as it was
implemented. Plus the High32 and Low32 functions are returning
the same thing, forgetting the endianness check.
Change-Id: I5d5ade6e9bc7086600ff2302546385151e32142b
Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
Diffstat (limited to 'src/corelib/thread/qsemaphore.cpp')
-rw-r--r-- | src/corelib/thread/qsemaphore.cpp | 162 |
1 files changed, 103 insertions, 59 deletions
diff --git a/src/corelib/thread/qsemaphore.cpp b/src/corelib/thread/qsemaphore.cpp index a61789c6c5..ee4cee5281 100644 --- a/src/corelib/thread/qsemaphore.cpp +++ b/src/corelib/thread/qsemaphore.cpp @@ -126,19 +126,29 @@ using namespace QtFutex; unspecified, but it's likely single-token threads will get woken up first. */ -static const quint64 futexNeedsWakeAllBit = - Q_UINT64_C(1) << (sizeof(quint64) * CHAR_BIT - 1); +#if defined(FUTEX_OP) && QT_POINTER_SIZE > 4 +static constexpr bool futexHasWaiterCount = true; +#else +static constexpr bool futexHasWaiterCount = false; +#endif + +static const quintptr futexNeedsWakeAllBit = + Q_UINT64_C(1) << (sizeof(quintptr) * CHAR_BIT - 1); -static int futexAvailCounter(quint64 v) +static int futexAvailCounter(quintptr v) { - // the high bit of the low word isn't used - Q_ASSERT((v & 0x80000000U) == 0); + // the low 31 bits + if (futexHasWaiterCount) { + // the high bit of the low word isn't used + Q_ASSERT((v & 0x80000000U) == 0); - // so we can be a little faster - return int(unsigned(v)); + // so we can be a little faster + return int(unsigned(v)); + } + return int(v & 0x7fffffffU); } -static bool futexNeedsWake(quint64 v) +static bool futexNeedsWake(quintptr v) { // If we're counting waiters, the number of waiters is stored in the low 31 // bits of the high word (that is, bits 32-62). If we're not, then we use @@ -147,22 +157,26 @@ static bool futexNeedsWake(quint64 v) return v >> 31; } -static QBasicAtomicInteger<quint32> *futexLow32(QBasicAtomicInteger<quint64> *ptr) +static QBasicAtomicInteger<quint32> *futexLow32(QBasicAtomicInteger<quintptr> *ptr) { auto result = reinterpret_cast<QBasicAtomicInteger<quint32> *>(ptr); +#if Q_BYTE_ORDER == Q_BIG_ENDIAN && QT_POINTER_SIZE > 4 ++result; +#endif return result; } -static QBasicAtomicInteger<quint32> *futexHigh32(QBasicAtomicInteger<quint64> *ptr) +static QBasicAtomicInteger<quint32> *futexHigh32(QBasicAtomicInteger<quintptr> *ptr) { auto result = reinterpret_cast<QBasicAtomicInteger<quint32> *>(ptr); +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN && QT_POINTER_SIZE > 4 ++result; +#endif return result; } template <bool IsTimed> bool -futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quint64> &u, quint64 curValue, quint64 nn, int timeout) +futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quintptr> &u, quintptr curValue, quintptr nn, int timeout) { QDeadlineTimer timer(IsTimed ? QDeadlineTimer(timeout) : QDeadlineTimer()); qint64 remainingTime = timeout * Q_INT64_C(1000) * 1000; @@ -174,7 +188,7 @@ futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quint64> &u, quint64 curValue, forever { if (futexAvailCounter(curValue) >= n) { // try to acquire - quint64 newValue = curValue - nn; + quintptr newValue = curValue - nn; if (u.testAndSetOrdered(curValue, newValue, curValue)) return true; // succeeded! continue; @@ -187,11 +201,14 @@ futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quint64> &u, quint64 curValue, // indicate we're waiting start_wait: auto ptr = futexLow32(&u); - if (n > 1) { + if (n > 1 || !futexHasWaiterCount) { u.fetchAndOrRelaxed(futexNeedsWakeAllBit); curValue |= futexNeedsWakeAllBit; - ptr = futexHigh32(&u); - curValue >>= 32; + if (n > 1 && futexHasWaiterCount) { + ptr = futexHigh32(&u); + //curValue >>= 32; // but this is UB in 32-bit, so roundabout: + curValue = quint64(curValue) >> 32; + } } if (IsTimed && remainingTime > 0) { @@ -208,17 +225,18 @@ start_wait: } } -template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quint64> &u, int n, int timeout) +template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quintptr> &u, int n, int timeout) { // Try to acquire without waiting (we still loop because the testAndSet // call can fail). - quint64 nn = unsigned(n); - nn |= quint64(nn) << 32; // token count replicated in high word + quintptr nn = unsigned(n); + if (futexHasWaiterCount) + nn |= quint64(nn) << 32; // token count replicated in high word - quint64 curValue = u.loadAcquire(); + quintptr curValue = u.loadAcquire(); while (futexAvailCounter(curValue) >= n) { // try to acquire - quint64 newValue = curValue - nn; + quintptr newValue = curValue - nn; if (u.testAndSetOrdered(curValue, newValue, curValue)) return true; // succeeded! } @@ -226,25 +244,29 @@ template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quint6 return false; // we need to wait - quint64 oneWaiter = quint64(Q_UINT64_C(1) << 32); // zero on 32-bit - // increase the waiter count - u.fetchAndAddRelaxed(oneWaiter); - - // We don't use the fetched value from above so futexWait() fails if - // it changed after the testAndSetOrdered above. - if ((quint64(curValue) >> 32) == 0x7fffffff) - return false; // overflow! - curValue += oneWaiter; - - // Also adjust nn to subtract oneWaiter when we succeed in acquiring. - nn += oneWaiter; + quintptr oneWaiter = quintptr(Q_UINT64_C(1) << 32); // zero on 32-bit + if (futexHasWaiterCount) { + // increase the waiter count + u.fetchAndAddRelaxed(oneWaiter); + + // We don't use the fetched value from above so futexWait() fails if + // it changed after the testAndSetOrdered above. + if ((quint64(curValue) >> 32) == 0x7fffffff) + return false; // overflow! + curValue += oneWaiter; + + // Also adjust nn to subtract oneWaiter when we succeed in acquiring. + nn += oneWaiter; + } if (futexSemaphoreTryAcquire_loop<IsTimed>(u, curValue, nn, timeout)) return true; - // decrement the number of threads waiting - Q_ASSERT(futexHigh32(&u)->loadRelaxed() & 0x7fffffffU); - u.fetchAndSubRelaxed(oneWaiter); + if (futexHasWaiterCount) { + // decrement the number of threads waiting + Q_ASSERT(futexHigh32(&u)->loadRelaxed() & 0x7fffffffU); + u.fetchAndSubRelaxed(oneWaiter); + } return false; } @@ -268,8 +290,9 @@ QSemaphore::QSemaphore(int n) { Q_ASSERT_X(n >= 0, "QSemaphore", "parameter 'n' must be non-negative"); if (futexAvailable()) { - quint64 nn = unsigned(n); - nn |= nn << 32; // token count replicated in high word + quintptr nn = unsigned(n); + if (futexHasWaiterCount) + nn |= quint64(nn) << 32; // token count replicated in high word u.storeRelaxed(nn); } else { d = new QSemaphorePrivate(n); @@ -328,30 +351,51 @@ void QSemaphore::release(int n) Q_ASSERT_X(n >= 0, "QSemaphore::release", "parameter 'n' must be non-negative"); if (futexAvailable()) { - quint64 nn = unsigned(n); - nn |= nn << 32; // token count replicated in high word - quint64 prevValue = u.fetchAndAddRelease(nn); + quintptr nn = unsigned(n); + if (futexHasWaiterCount) + nn |= quint64(nn) << 32; // token count replicated in high word + quintptr prevValue = u.fetchAndAddRelease(nn); if (futexNeedsWake(prevValue)) { #ifdef FUTEX_OP - /* - The single-token waiters wait on the low half - and the multi-token waiters wait on the upper half. So we ask - the kernel to wake up n single-token waiters and all multi-token - waiters (if any), then clear the multi-token wait bit. - - atomic { - int oldval = *upper; - *upper = oldval & ~(1 << 31); - futexWake(lower, n); - if (oldval < 0) // sign bit set - futexWake(upper, INT_MAX); - } - */ - quint32 op = FUTEX_OP_ANDN | FUTEX_OP_OPARG_SHIFT; - quint32 oparg = 31; - quint32 cmp = FUTEX_OP_CMP_LT; - quint32 cmparg = 0; - futexWakeOp(*futexLow32(&u), n, INT_MAX, *futexHigh32(&u), FUTEX_OP(op, oparg, cmp, cmparg)); + if (!futexHasWaiterCount) { + /* + On 32-bit systems, all waiters are waiting on the same address, + so we'll wake them all and ask the kernel to clear the high bit. + + atomic { + int oldval = u; + u = oldval & ~(1 << 31); + futexWake(u, INT_MAX); + if (oldval == 0) // impossible condition + futexWake(u, INT_MAX); + } + */ + quint32 op = FUTEX_OP_ANDN | FUTEX_OP_OPARG_SHIFT; + quint32 oparg = 31; + quint32 cmp = FUTEX_OP_CMP_EQ; + quint32 cmparg = 0; + futexWakeOp(u, INT_MAX, INT_MAX, u, FUTEX_OP(op, oparg, cmp, cmparg)); + } else { + /* + On 64-bit systems, the single-token waiters wait on the low half + and the multi-token waiters wait on the upper half. So we ask + the kernel to wake up n single-token waiters and all multi-token + waiters (if any), then clear the multi-token wait bit. + + atomic { + int oldval = *upper; + *upper = oldval & ~(1 << 31); + futexWake(lower, n); + if (oldval < 0) // sign bit set + futexWake(upper, INT_MAX); + } + */ + quint32 op = FUTEX_OP_ANDN | FUTEX_OP_OPARG_SHIFT; + quint32 oparg = 31; + quint32 cmp = FUTEX_OP_CMP_LT; + quint32 cmparg = 0; + futexWakeOp(*futexLow32(&u), n, INT_MAX, *futexHigh32(&u), FUTEX_OP(op, oparg, cmp, cmparg)); + } #else // Unset the bit and wake everyone. There are two possibibilies // under which a thread can set the bit between the AND and the |