diff options
Diffstat (limited to 'libc/src/__support/GPU/utils.h')
-rw-r--r-- | libc/src/__support/GPU/utils.h | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h index 0f9167cdee06..93022e8de811 100644 --- a/libc/src/__support/GPU/utils.h +++ b/libc/src/__support/GPU/utils.h @@ -31,6 +31,25 @@ LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { return gpu::get_lane_id() == get_first_lane_id(lane_mask); } +/// Gets the sum of all lanes inside the warp or wavefront. +LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) { + for (uint32_t step = gpu::get_lane_size() / 2; step > 0; step /= 2) { + uint32_t index = step + gpu::get_lane_id(); + x += gpu::shuffle(lane_mask, index, x); + } + return gpu::broadcast_value(lane_mask, x); +} + +/// Gets the accumulator scan of the threads in the warp or wavefront. +LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) { + for (uint32_t step = 1; step < gpu::get_lane_size(); step *= 2) { + uint32_t index = gpu::get_lane_id() - step; + uint32_t bitmask = gpu::get_lane_id() >= step; + x += -bitmask & gpu::shuffle(lane_mask, index, x); + } + return x; +} + } // namespace gpu } // namespace LIBC_NAMESPACE |