Gromacs
2019
|
#include <string>
#include "gromacs/gpu_utils/gmxopencl.h"
#include "gromacs/gpu_utils/gputraits_ocl.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/gmxassert.h"
Declare utility routines for OpenCL.
Enumerations | |
enum | ocl_vendor_id_t { OCL_VENDOR_NVIDIA = 0, OCL_VENDOR_AMD, OCL_VENDOR_INTEL, OCL_VENDOR_UNKNOWN } |
OpenCL vendor IDs. | |
Functions | |
int | ocl_copy_D2H (void *h_dest, cl_mem d_src, size_t offset, size_t bytes, GpuApiCallBehavior transferKind, cl_command_queue command_queue, cl_event *copy_event) |
Launches synchronous or asynchronous device to host memory copy. More... | |
int | ocl_copy_D2H_async (void *h_dest, cl_mem d_src, size_t offset, size_t bytes, cl_command_queue command_queue, cl_event *copy_event) |
Launches asynchronous device to host memory copy. More... | |
int | ocl_copy_H2D (cl_mem d_dest, void *h_src, size_t offset, size_t bytes, GpuApiCallBehavior transferKind, cl_command_queue command_queue, cl_event *copy_event) |
Launches synchronous or asynchronous host to device memory copy. More... | |
int | ocl_copy_H2D_async (cl_mem d_dest, void *h_src, size_t offset, size_t bytes, cl_command_queue command_queue, cl_event *copy_event) |
Launches asynchronous host to device memory copy. More... | |
int | ocl_copy_H2D_sync (cl_mem d_dest, void *h_src, size_t offset, size_t bytes, cl_command_queue command_queue) |
Launches synchronous host to device memory copy. | |
void | pmalloc (void **h_ptr, size_t nbytes) |
Allocate host memory in malloc style. More... | |
void | pfree (void *h_ptr) |
Free host memory in malloc style. More... | |
std::string | ocl_get_error_string (cl_int error) |
Convert error code to diagnostic string. | |
static void | gpuStreamSynchronize (cl_command_queue s) |
Calls clFinish() in the stream s . More... | |
void | ensureReferenceCount (const cl_event &event, unsigned int refCount) |
A debug checker to track cl_events being released correctly. | |
static bool | haveStreamTasksCompleted (cl_command_queue s) |
Pretend to synchronize an OpenCL stream (dummy implementation). More... | |
void | prepareGpuKernelArgument (cl_kernel kernel, const KernelLaunchConfig &config, size_t argIndex) |
A function for setting up a single OpenCL kernel argument. This is the tail of the compile-time recursive function below. It has to be seen by the compiler first. As NB kernels might be using dynamic local memory as the last argument, this function also manages that, using sharedMemorySize from config . More... | |
template<typename CurrentArg , typename... RemainingArgs> | |
void | prepareGpuKernelArgument (cl_kernel kernel, const KernelLaunchConfig &config, size_t argIndex, const CurrentArg *argPtr, const RemainingArgs *...otherArgsPtrs) |
Compile-time recursive function for setting up a single OpenCL kernel argument. This function uses one kernel argument pointer argPtr to call clSetKernelArg(), and calls itself on the next argument, eventually calling the tail function above. More... | |
template<typename... Args> | |
void * | prepareGpuKernelArguments (cl_kernel kernel, const KernelLaunchConfig &config, const Args *...argsPtrs) |
A wrapper function for setting up all the OpenCL kernel arguments. Calls the recursive functions above. More... | |
void | launchGpuKernel (cl_kernel kernel, const KernelLaunchConfig &config, CommandEvent *timingEvent, const char *kernelName, const void *) |
Launches the OpenCL kernel and handles the errors. More... | |
|
inlinestatic |
Calls clFinish() in the stream s
.
[in] | s | stream to synchronize with |
|
inlinestatic |
Pretend to synchronize an OpenCL stream (dummy implementation).
[in] | s | queue to check |
s
(at the time of this call) have completed.
|
inline |
Launches the OpenCL kernel and handles the errors.
[in] | kernel | Kernel function handle |
[in] | config | Kernel configuration for launching |
[in] | timingEvent | Timing event, fetched from GpuRegionTimer |
[in] | kernelName | Human readable kernel description, for error handling only |
gmx::InternalError | on kernel launch failure |
int ocl_copy_D2H | ( | void * | h_dest, |
cl_mem | d_src, | ||
size_t | offset, | ||
size_t | bytes, | ||
GpuApiCallBehavior | transferKind, | ||
cl_command_queue | command_queue, | ||
cl_event * | copy_event | ||
) |
Launches synchronous or asynchronous device to host memory copy.
If copy_event is not NULL, on return it will contain an event object identifying this particular device to host operation. The event can further be used to queue a wait for this operation or to query profiling information.
int ocl_copy_D2H_async | ( | void * | h_dest, |
cl_mem | d_src, | ||
size_t | offset, | ||
size_t | bytes, | ||
cl_command_queue | command_queue, | ||
cl_event * | copy_event | ||
) |
Launches asynchronous device to host memory copy.
If copy_event is not nullptr, on return it will contain an event object identifying this particular host to device operation. The event can further be used to queue a wait for this operation or to query profiling information.
int ocl_copy_H2D | ( | cl_mem | d_dest, |
void * | h_src, | ||
size_t | offset, | ||
size_t | bytes, | ||
GpuApiCallBehavior | transferKind, | ||
cl_command_queue | command_queue, | ||
cl_event * | copy_event | ||
) |
Launches synchronous or asynchronous host to device memory copy.
If copy_event is not NULL, on return it will contain an event object identifying this particular host to device operation. The event can further be used to queue a wait for this operation or to query profiling information.
int ocl_copy_H2D_async | ( | cl_mem | d_dest, |
void * | h_src, | ||
size_t | offset, | ||
size_t | bytes, | ||
cl_command_queue | command_queue, | ||
cl_event * | copy_event | ||
) |
Launches asynchronous host to device memory copy.
If copy_event is not nullptr, on return it will contain an event object identifying this particular host to device operation. The event can further be used to queue a wait for this operation or to query profiling information.
void pfree | ( | void * | h_ptr | ) |
Free host memory in malloc style.
Free host memory in malloc style.
[in] | h_ptr | Buffer allocated with pmalloc that needs to be freed. |
void pmalloc | ( | void ** | h_ptr, |
size_t | nbytes | ||
) |
Allocate host memory in malloc style.
Allocate host memory in malloc style.
[in,out] | h_ptr | Pointer where to store the address of the newly allocated buffer. |
[in] | nbytes | Size in bytes of the buffer to be allocated. |
|
inline |
A function for setting up a single OpenCL kernel argument. This is the tail of the compile-time recursive function below. It has to be seen by the compiler first. As NB kernels might be using dynamic local memory as the last argument, this function also manages that, using sharedMemorySize from config
.
[in] | kernel | Kernel function handle |
[in] | config | Kernel configuration for launching |
[in] | argIndex | Index of the current argument |
void prepareGpuKernelArgument | ( | cl_kernel | kernel, |
const KernelLaunchConfig & | config, | ||
size_t | argIndex, | ||
const CurrentArg * | argPtr, | ||
const RemainingArgs *... | otherArgsPtrs | ||
) |
Compile-time recursive function for setting up a single OpenCL kernel argument. This function uses one kernel argument pointer argPtr
to call clSetKernelArg(), and calls itself on the next argument, eventually calling the tail function above.
CurrentArg | Type of the current argument |
RemainingArgs | Types of remaining arguments after the current one |
[in] | kernel | Kernel function handle |
[in] | config | Kernel configuration for launching |
[in] | argIndex | Index of the current argument |
[in] | argPtr | Pointer to the current argument |
[in] | otherArgsPtrs | Pack of pointers to arguments remaining to process after the current one |
void* prepareGpuKernelArguments | ( | cl_kernel | kernel, |
const KernelLaunchConfig & | config, | ||
const Args *... | argsPtrs | ||
) |
A wrapper function for setting up all the OpenCL kernel arguments. Calls the recursive functions above.
Args | Types of all the kernel arguments |
[in] | kernel | Kernel function handle |
[in] | config | Kernel configuration for launching |
[in] | argsPtrs | Pointers to all the kernel arguments |