summaryrefslogtreecommitdiffstats
path: root/libc/src/__support/GPU/utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'libc/src/__support/GPU/utils.h')
-rw-r--r--libc/src/__support/GPU/utils.h19
1 files changed, 19 insertions, 0 deletions
diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h
index 0f9167cdee06..93022e8de811 100644
--- a/libc/src/__support/GPU/utils.h
+++ b/libc/src/__support/GPU/utils.h
@@ -31,6 +31,25 @@ LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
return gpu::get_lane_id() == get_first_lane_id(lane_mask);
}
+/// Gets the sum of all lanes inside the warp or wavefront.
+LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) {
+ for (uint32_t step = gpu::get_lane_size() / 2; step > 0; step /= 2) {
+ uint32_t index = step + gpu::get_lane_id();
+ x += gpu::shuffle(lane_mask, index, x);
+ }
+ return gpu::broadcast_value(lane_mask, x);
+}
+
+/// Gets the accumulator scan of the threads in the warp or wavefront.
+LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) {
+ for (uint32_t step = 1; step < gpu::get_lane_size(); step *= 2) {
+ uint32_t index = gpu::get_lane_id() - step;
+ uint32_t bitmask = gpu::get_lane_id() >= step;
+ x += -bitmask & gpu::shuffle(lane_mask, index, x);
+ }
+ return x;
+}
+
} // namespace gpu
} // namespace LIBC_NAMESPACE