Gromacs
2026.0-dev-20241106-9ba7f4d
|
#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/device_stream.h"
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/gpu_utils/gputraits_ocl.h"
#include "gromacs/gpu_utils/oclutils.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
Implements the DeviceBuffer type and routines for OpenCL. Should only be included directly by the main DeviceBuffer file devicebuffer.h. TODO: the intent is for DeviceBuffer to become a class.
Typedefs | |
using | DeviceTexture = void * |
Device texture wrapper. | |
Functions | |
template<typename ValueType > | |
void | allocateDeviceBuffer (DeviceBuffer< ValueType > *buffer, size_t numValues, const DeviceContext &deviceContext) |
Allocates a device-side buffer. It is currently a caller's responsibility to call it only on not-yet allocated buffers. More... | |
template<typename DeviceBuffer > | |
void | freeDeviceBuffer (DeviceBuffer *buffer) |
Frees a device-side buffer. This does not reset separately stored size/capacity integers, as this is planned to be a destructor of DeviceBuffer as a proper class, and no calls on buffer should be made afterwards. More... | |
template<typename ValueType > | |
void | copyToDeviceBuffer (DeviceBuffer< ValueType > *buffer, const ValueType *hostBuffer, size_t startingOffset, size_t numValues, const DeviceStream &deviceStream, GpuApiCallBehavior transferKind, CommandEvent *timingEvent) |
Performs the host-to-device data copy, synchronous or asynchronously on request. More... | |
template<typename ValueType > | |
void | copyFromDeviceBuffer (ValueType *hostBuffer, DeviceBuffer< ValueType > *buffer, size_t startingOffset, size_t numValues, const DeviceStream &deviceStream, GpuApiCallBehavior transferKind, CommandEvent *timingEvent) |
Performs the device-to-host data copy, synchronous or asynchronously on request. More... | |
template<typename ValueType > | |
void | copyBetweenDeviceBuffers (DeviceBuffer< ValueType > *, DeviceBuffer< ValueType > *, size_t, const DeviceStream &, GpuApiCallBehavior, CommandEvent *) |
Performs the device-to-device data copy, synchronous or asynchronously on request. More... | |
template<typename ValueType > | |
void | clearDeviceBufferAsync (DeviceBuffer< ValueType > *buffer, size_t startingOffset, size_t numValues, const DeviceStream &deviceStream) |
Clears the device buffer asynchronously. More... | |
template<typename T > | |
static bool | checkDeviceBuffer (DeviceBuffer< T > buffer, int requiredSize) |
Check the validity of the device buffer. More... | |
template<typename ValueType > | |
void | initParamLookupTable (DeviceBuffer< ValueType > *deviceBuffer, DeviceTexture *, const ValueType *hostBuffer, int numValues, const DeviceContext &deviceContext, const DeviceStream &) |
Create a texture object for an array of type ValueType. More... | |
template<typename ValueType > | |
void | destroyParamLookupTable (DeviceBuffer< ValueType > *deviceBuffer, const DeviceTexture &) |
Release the OpenCL device buffer. More... | |
template<typename ValueType > | |
CLANG_DIAGNOSTIC_RESET ValueType * | asMpiPointer (DeviceBuffer< ValueType > &) |
void allocateDeviceBuffer | ( | DeviceBuffer< ValueType > * | buffer, |
size_t | numValues, | ||
const DeviceContext & | deviceContext | ||
) |
Allocates a device-side buffer. It is currently a caller's responsibility to call it only on not-yet allocated buffers.
ValueType | Raw value type of the buffer . |
[in,out] | buffer | Pointer to the device-side buffer. |
[in] | numValues | Number of values to accommodate. |
[in] | deviceContext | The buffer's device context-to-be. |
|
static |
Check the validity of the device buffer.
Checks if the buffer is not nullptr and if its allocation is big enough.
[in] | buffer | Device buffer to be checked. |
[in] | requiredSize | Number of elements that the buffer will have to accommodate. |
void clearDeviceBufferAsync | ( | DeviceBuffer< ValueType > * | buffer, |
size_t | startingOffset, | ||
size_t | numValues, | ||
const DeviceStream & | deviceStream | ||
) |
Clears the device buffer asynchronously.
ValueType | Raw value type of the buffer . |
[in,out] | buffer | Pointer to the device-side buffer |
[in] | startingOffset | Offset (in values) at the device-side buffer to start clearing at. |
[in] | numValues | Number of values to clear. |
[in] | deviceStream | GPU stream. |
void copyBetweenDeviceBuffers | ( | DeviceBuffer< ValueType > * | , |
DeviceBuffer< ValueType > * | , | ||
size_t | , | ||
const DeviceStream & | , | ||
GpuApiCallBehavior | , | ||
CommandEvent * | |||
) |
Performs the device-to-device data copy, synchronous or asynchronously on request.
ValueType | Raw value type of the buffer . |
void copyFromDeviceBuffer | ( | ValueType * | hostBuffer, |
DeviceBuffer< ValueType > * | buffer, | ||
size_t | startingOffset, | ||
size_t | numValues, | ||
const DeviceStream & | deviceStream, | ||
GpuApiCallBehavior | transferKind, | ||
CommandEvent * | timingEvent | ||
) |
Performs the device-to-host data copy, synchronous or asynchronously on request.
Note that synchronous copy will not synchronize the stream in case of zero numValues
because of the early return.
ValueType | Raw value type of the buffer . |
[in,out] | hostBuffer | Pointer to the raw host-side memory, also typed ValueType |
[in] | buffer | Pointer to the device-side buffer |
[in] | startingOffset | Offset (in values) at the device-side buffer to copy from. |
[in] | numValues | Number of values to copy. |
[in] | deviceStream | GPU stream to perform asynchronous copy in. |
[in] | transferKind | Copy type: synchronous or asynchronous. |
[out] | timingEvent | A pointer to the H2D copy timing event to be filled in. If the pointer is not null, the event can further be used to queue a wait for this operation or to query profiling information. |
void copyToDeviceBuffer | ( | DeviceBuffer< ValueType > * | buffer, |
const ValueType * | hostBuffer, | ||
size_t | startingOffset, | ||
size_t | numValues, | ||
const DeviceStream & | deviceStream, | ||
GpuApiCallBehavior | transferKind, | ||
CommandEvent * | timingEvent | ||
) |
Performs the host-to-device data copy, synchronous or asynchronously on request.
Note that synchronous copy will not synchronize the stream in case of zero numValues
because of the early return.
ValueType | Raw value type of the buffer . |
[in,out] | buffer | Pointer to the device-side buffer |
[in] | hostBuffer | Pointer to the raw host-side memory, also typed ValueType |
[in] | startingOffset | Offset (in values) at the device-side buffer to copy into. |
[in] | numValues | Number of values to copy. |
[in] | deviceStream | GPU stream to perform asynchronous copy in. |
[in] | transferKind | Copy type: synchronous or asynchronous. |
[out] | timingEvent | A pointer to the H2D copy timing event to be filled in. If the pointer is not null, the event can further be used to queue a wait for this operation or to query profiling information. |
void destroyParamLookupTable | ( | DeviceBuffer< ValueType > * | deviceBuffer, |
const DeviceTexture & | |||
) |
Release the OpenCL device buffer.
ValueType | Raw data type. |
[in,out] | deviceBuffer | Device buffer to store data in. |
void freeDeviceBuffer | ( | DeviceBuffer * | buffer | ) |
Frees a device-side buffer. This does not reset separately stored size/capacity integers, as this is planned to be a destructor of DeviceBuffer as a proper class, and no calls on buffer
should be made afterwards.
[in] | buffer | Pointer to the buffer to free. |
void initParamLookupTable | ( | DeviceBuffer< ValueType > * | deviceBuffer, |
DeviceTexture * | , | ||
const ValueType * | hostBuffer, | ||
int | numValues, | ||
const DeviceContext & | deviceContext, | ||
const DeviceStream & | |||
) |
Create a texture object for an array of type ValueType.
Creates the device buffer and copies read-only data for an array of type ValueType.
ValueType | Raw data type. |
[out] | deviceBuffer | Device buffer to store data in. |
[in] | hostBuffer | Host buffer to get date from. |
[in] | numValues | Number of elements in the buffer. |
[in] | deviceContext | Device context for memory allocation. |