#include <hip/hip_runtime.h>
#include "gromacs/utility/basedefinitions.h"
|
#define | LAUNCH_BOUNDS_EXACT(WORK_GROUP_SIZE, WAVES_PER_EU) |
|
#define | LAUNCH_BOUNDS_EXACT_SINGLE(WORK_GROUP_SIZE) __attribute__((amdgpu_flat_work_group_size(WORK_GROUP_SIZE, WORK_GROUP_SIZE))) |
|
#define | GMX_HIP_MAX_BLOCKS_PER_MP 16 |
|
#define | GMX_HIP_MAX_THREADS_PER_MP 1024 |
|
|
template<typename T > |
__device__ __forceinline__ T | LDG (const T *ptr) |
|
template<typename T > |
static __forceinline__ __device__ T | fetchFromParamLookupTable (const T *d_ptr, const hipTextureObject_t texObj, int index) |
| Fetch the value by index from the parameter lookup table. More...
|
|
#define LAUNCH_BOUNDS_EXACT |
( |
|
WORK_GROUP_SIZE, |
|
|
|
WAVES_PER_EU |
|
) |
| |
Value:__attribute__((amdgpu_flat_work_group_size(WORK_GROUP_SIZE, WORK_GROUP_SIZE), \
amdgpu_waves_per_eu(WAVES_PER_EU, WAVES_PER_EU)))
static GMX_DEVICE_ATTRIBUTE __attribute__((always_inline)) float gmxGpuFDim(const float one
Linear interpolation using exactly two FMA operations.
Definition: gpu_kernel_utils.h:110
template<typename T >
static __forceinline__ __device__ T fetchFromParamLookupTable |
( |
const T * |
d_ptr, |
|
|
const hipTextureObject_t |
texObj, |
|
|
int |
index |
|
) |
| |
|
static |
Fetch the value by index
from the parameter lookup table.
Depending on what is supported, it fetches parameters either using direct load or texture objects.
- Template Parameters
-
- Parameters
-
[in] | d_ptr | Device pointer to the raw table memory |
[in] | texObj | Table texture object |
[in] | index | Non-negative element index |
- Returns
- The value from the table at
index
template<typename T >
__device__ __forceinline__ T LDG |
( |
const T * |
ptr | ) |
|
Load directly or using __ldg() when supported.