#include "gmxsycl.h"
SYCL kernel helper functions.
- Author
- Andrey Alekseenko al42a.nosp@m.nd@g.nosp@m.mail..nosp@m.com
|
struct | AmdPackedFloat3 |
| Special packed Float3 flavor to help compiler optimizations on AMD CDNA2 devices. More...
|
|
|
#define | SYCL_ASSERT(condition) |
|
|
constexpr bool | compilingForHost () |
|
template<int expectedSubGroupSize> |
constexpr bool | compilingForSubGroupSize () |
|
template<int expectedSubGroupSize> |
constexpr bool | skipKernelCompilation () |
|
template<typename T , sycl::memory_scope MemoryScope, sycl::access::address_space AddressSpace> |
static void | atomicAddDefault (T &val, const T delta) |
|
template<typename T , sycl::memory_scope MemoryScope = sycl::memory_scope::device, sycl::access::address_space AddressSpace = sycl::access::address_space::global_space> |
static void | atomicFetchAdd (T &val, const T delta) |
| Convenience wrapper to do atomic addition to a global buffer.
|
|
template<typename T > |
static void | atomicFetchAddLocal (T &val, const T delta) |
|
template<typename T , sycl::memory_scope MemoryScope = sycl::memory_scope::device> |
static T | atomicLoad (T &val) |
| Convenience wrapper to do atomic loads from a global buffer.
|
|
| __attribute__ ((always_inline)) static AmdPackedFloat3 operator*(const AmdPackedFloat3 &v |
|
template<int Dim> |
static void | subGroupBarrier (const sycl::nd_item< Dim > itemIdx) |
| Issue an intra sub-group barrier. More...
|
|
|
static constexpr unsigned int | c_cudaFullWarpMask = 0xffffffff |
| Full warp active thread mask used in CUDA warp-level primitives.
|
|
const float & | s |
|
const AmdPackedFloat3 & | v |
|
template<int Dim>
static void subGroupBarrier |
( |
const sycl::nd_item< Dim > |
itemIdx | ) |
|
|
inlinestatic |
Issue an intra sub-group barrier.
Equivalent with CUDA's syncwarp(c_cudaFullWarpMask)
.
Initial value:{
return { v.xy() * s, v.z() * s }
Initial value:{
return { v.xy() * s, v.z() * s }