summaryrefslogtreecommitdiffstats
path: root/libc/src/__support/GPU/amdgpu/utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'libc/src/__support/GPU/amdgpu/utils.h')
-rw-r--r--libc/src/__support/GPU/amdgpu/utils.h6
1 files changed, 6 insertions, 0 deletions
diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h
index 75f0b5744ebd..9b520a6bcf38 100644
--- a/libc/src/__support/GPU/amdgpu/utils.h
+++ b/libc/src/__support/GPU/amdgpu/utils.h
@@ -145,6 +145,12 @@ LIBC_INLINE uint32_t get_lane_size() {
__builtin_amdgcn_wave_barrier();
}
+/// Shuffles the the lanes inside the wavefront according to the given index.
+[[clang::convergent]] LIBC_INLINE uint32_t shuffle(uint64_t, uint32_t idx,
+ uint32_t x) {
+ return __builtin_amdgcn_ds_bpermute(idx << 2, x);
+}
+
/// Returns the current value of the GPU's processor clock.
/// NOTE: The RDNA3 and RDNA2 architectures use a 20-bit cycle counter.
LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }