|
Gromacs
2026.1
|
Functions | |
| bool | deviceHas64ParallelExecutionSize (const DeviceInformation &deviceInfo) |
| template<bool is64ExecutionWidth> | |
| __global__ void | pmeGpuPackHaloExternal (const float *__restrict__ gm_realGrid, float *__restrict__ gm_transferGridUp, float *__restrict__ gm_transferGridDown, float *__restrict__ gm_transferGridLeft, float *__restrict__ gm_transferGridRight, float *__restrict__ gm_transferGridUpLeft, float *__restrict__ gm_transferGridDownLeft, float *__restrict__ gm_transferGridUpRight, float *__restrict__ gm_transferGridDownRight, int overlapSizeUp, int overlapSizeDown, int overlapSizeLeft, int overlapSizeRight, int myGridX, int myGridY, int3 pmeSize) |
| A HIP kernel which packs non-contiguous overlap data in all 8 neighboring directions. More... | |
| template<bool is64ExecutionWidth> | |
| __global__ void | pmeGpuUnpackHaloExternal (float *__restrict__ gm_realGrid, const float *__restrict__ gm_transferGridUp, const float *__restrict__ gm_transferGridDown, const float *__restrict__ gm_transferGridLeft, const float *__restrict__ gm_transferGridRight, const float *__restrict__ gm_transferGridUpLeft, const float *__restrict__ gm_transferGridDownLeft, const float *__restrict__ gm_transferGridUpRight, const float *__restrict__ gm_transferGridDownRight, int overlapSizeUp, int overlapSizeDown, int overlapSizeLeft, int overlapSizeRight, int myGridX, int myGridY, int3 pmeSize) |
| A HIP kernel which assigns data in halo region in all 8 neighboring directions. More... | |
| template<bool is64ExecutionWidth> | |
| __global__ void | pmeGpuUnpackAndAddHaloInternal (float *__restrict__ gm_realGrid, const float *__restrict__ gm_transferGridUp, const float *__restrict__ gm_transferGridDown, const float *__restrict__ gm_transferGridLeft, const float *__restrict__ gm_transferGridRight, const float *__restrict__ gm_transferGridUpLeft, const float *__restrict__ gm_transferGridDownLeft, const float *__restrict__ gm_transferGridUpRight, const float *__restrict__ gm_transferGridDownRight, int overlapSizeX, int overlapSizeY, int overlapUp, int overlapLeft, int myGridX, int myGridY, int3 pmeSize) |
| A HIP kernel which adds grid overlap data received from neighboring ranks. More... | |
| template<bool is64ExecutionWidth> | |
| __global__ void | pmeGpuPackHaloInternal (const float *__restrict__ gm_realGrid, float *__restrict__ gm_transferGridUp, float *__restrict__ gm_transferGridDown, float *__restrict__ gm_transferGridLeft, float *__restrict__ gm_transferGridRight, float *__restrict__ gm_transferGridUpLeft, float *__restrict__ gm_transferGridDownLeft, float *__restrict__ gm_transferGridUpRight, float *__restrict__ gm_transferGridDownRight, int overlapSizeX, int overlapSizeY, int overlapUp, int overlapLeft, int myGridX, int myGridY, int3 pmeSize) |
| A HIP kernel which packs non-contiguous overlap data in all 8 neighboring directions. More... | |
| template<bool pmeToFft, bool is64ExecutionWidth> | |
| __global__ void | pmegrid_to_fftgrid (float *__restrict__ gm_realGrid, float *__restrict__ gm_fftGrid, int3 fftNData, int3 fftSize, int3 pmeSize) |
| A HIP kernel which copies data from pme grid to FFT grid and back. More... | |
| template<bool is64ExecutionWidth> | |
| void | packHaloDataExternal (const PmeGpu *pmeGpu, int overlapUp, int overlapDown, int overlapLeft, int overlapRight, int myGridX, int myGridY, const ivec &pmeSize, DeviceBuffer< float > realGrid, DeviceBuffer< float > packedGridUp, DeviceBuffer< float > packedGridDown, DeviceBuffer< float > packedGridLeft, DeviceBuffer< float > packedGridRight, DeviceBuffer< float > packedGridUpLeft, DeviceBuffer< float > packedGridDownLeft, DeviceBuffer< float > packedGridUpRight, DeviceBuffer< float > packedGridDownRight) |
| Launches HIP kernel to pack non-contiguous external halo data. | |
| template<bool is64ExecutionWidth> | |
| void | packHaloDataInternal (const PmeGpu *pmeGpu, int overlapSizeX, int overlapSizeY, int overlapUp, int overlapLeft, int myGridX, int myGridY, const ivec &pmeSize, DeviceBuffer< float > realGrid, DeviceBuffer< float > packedGridUp, DeviceBuffer< float > packedGridDown, DeviceBuffer< float > packedGridLeft, DeviceBuffer< float > packedGridRight, DeviceBuffer< float > packedGridUpLeft, DeviceBuffer< float > packedGridDownLeft, DeviceBuffer< float > packedGridUpRight, DeviceBuffer< float > packedGridDownRight) |
| Launches HIP kernel to pack non-contiguous internal halo data. | |
| template<bool is64ExecutionWidth> | |
| void | unpackAndAddHaloDataInternal (const PmeGpu *pmeGpu, int overlapSizeX, int overlapSizeY, int overlapUp, int overlapLeft, int myGridX, int myGridY, const ivec &pmeSize, DeviceBuffer< float > realGrid, DeviceBuffer< float > packedGridUp, DeviceBuffer< float > packedGridDown, DeviceBuffer< float > packedGridLeft, DeviceBuffer< float > packedGridRight, DeviceBuffer< float > packedGridUpLeft, DeviceBuffer< float > packedGridDownLeft, DeviceBuffer< float > packedGridUpRight, DeviceBuffer< float > packedGridDownRight) |
| Launches HIP kernel to unpack and reduce overlap data. | |
| template<bool is64ExecutionWidth> | |
| void | unpackHaloDataExternal (const PmeGpu *pmeGpu, int overlapUp, int overlapDown, int overlapLeft, int overlapRight, int myGridX, int myGridY, const ivec &pmeSize, DeviceBuffer< float > realGrid, DeviceBuffer< float > packedGridUp, DeviceBuffer< float > packedGridDown, DeviceBuffer< float > packedGridLeft, DeviceBuffer< float > packedGridRight, DeviceBuffer< float > packedGridUpLeft, DeviceBuffer< float > packedGridDownLeft, DeviceBuffer< float > packedGridUpRight, DeviceBuffer< float > packedGridDownRight) |
| Launches HIP kernel to initialize overlap data. | |
| void | receiveAndSend (DeviceBuffer< float > sendBuf, int sendCount, int dest, MPI_Request *sendRequest, DeviceBuffer< float > recvBuf, int recvCount, int src, MPI_Request *recvRequest, int tag, MPI_Comm comm) |
| utility function to send and recv halo data from neighboring ranks | |
Variables | |
| template<bool is64ExecutionWidth> | |
| constexpr int | sc_subGroupSizeX = is64ExecutionWidth ? 64 : 32 |
| Sub-group size for conversion kernels. More... | |
| template<bool is64ExecutionWidth> | |
| constexpr int | sc_subGroupSizeY = is64ExecutionWidth ? 2 : 4 |
| constexpr int | sc_subGroupSizeZ = 1 |
| __global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmeGpuPackHaloExternal | ( | const float *__restrict__ | gm_realGrid, |
| float *__restrict__ | gm_transferGridUp, | ||
| float *__restrict__ | gm_transferGridDown, | ||
| float *__restrict__ | gm_transferGridLeft, | ||
| float *__restrict__ | gm_transferGridRight, | ||
| float *__restrict__ | gm_transferGridUpLeft, | ||
| float *__restrict__ | gm_transferGridDownLeft, | ||
| float *__restrict__ | gm_transferGridUpRight, | ||
| float *__restrict__ | gm_transferGridDownRight, | ||
| int | overlapSizeUp, | ||
| int | overlapSizeDown, | ||
| int | overlapSizeLeft, | ||
| int | overlapSizeRight, | ||
| int | myGridX, | ||
| int | myGridY, | ||
| int3 | pmeSize | ||
| ) |
A HIP kernel which packs non-contiguous overlap data in all 8 neighboring directions.
| [in] | gm_realGrid | PME device grid |
| [out] | gm_transferGridUp | packed data in 8-neighboring directions |
| [out] | gm_transferGridDown | packed data in 8-neighboring directions |
| [out] | gm_transferGridLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridRight | packed data in 8-neighboring directions |
| [out] | gm_transferGridUpLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridDownLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridUpRight | packed data in 8-neighboring directions |
| [out] | gm_transferGridDownRight | packed data in 8-neighboring directions |
| [in] | overlapSizeUp | halo size in 4 directions, up |
| [in] | overlapSizeDown | halo size in 4 directions, down |
| [in] | overlapSizeLeft | halo size in 4 directions, left |
| [in] | overlapSizeRight | halo size in 4 directions, right |
| [in] | myGridX | local domain size in X dimension |
| [in] | myGridY | local domain size in Y dimension |
| [in] | pmeSize | Local PME grid size |
| __global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmeGpuPackHaloInternal | ( | const float *__restrict__ | gm_realGrid, |
| float *__restrict__ | gm_transferGridUp, | ||
| float *__restrict__ | gm_transferGridDown, | ||
| float *__restrict__ | gm_transferGridLeft, | ||
| float *__restrict__ | gm_transferGridRight, | ||
| float *__restrict__ | gm_transferGridUpLeft, | ||
| float *__restrict__ | gm_transferGridDownLeft, | ||
| float *__restrict__ | gm_transferGridUpRight, | ||
| float *__restrict__ | gm_transferGridDownRight, | ||
| int | overlapSizeX, | ||
| int | overlapSizeY, | ||
| int | overlapUp, | ||
| int | overlapLeft, | ||
| int | myGridX, | ||
| int | myGridY, | ||
| int3 | pmeSize | ||
| ) |
A HIP kernel which packs non-contiguous overlap data in all 8 neighboring directions.
| [in] | gm_realGrid | PME device grid |
| [out] | gm_transferGridUp | packed data in 8-neighboring directions |
| [out] | gm_transferGridDown | packed data in 8-neighboring directions |
| [out] | gm_transferGridLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridRight | packed data in 8-neighboring directions |
| [out] | gm_transferGridUpLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridDownLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridUpRight | packed data in 8-neighboring directions |
| [out] | gm_transferGridDownRight | packed data in 8-neighboring directions |
| [in] | overlapSizeX | halo size in 4 directions, X |
| [in] | overlapSizeY | halo size in 4 directions, Y |
| [in] | overlapUp | halo size in 4 directions, up |
| [in] | overlapLeft | halo size in 4 directions, left |
| [in] | myGridX | local domain size in X dimension |
| [in] | myGridY | local domain size in Y dimension |
| [in] | pmeSize | Local PME grid size |
| __global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmeGpuUnpackAndAddHaloInternal | ( | float *__restrict__ | gm_realGrid, |
| const float *__restrict__ | gm_transferGridUp, | ||
| const float *__restrict__ | gm_transferGridDown, | ||
| const float *__restrict__ | gm_transferGridLeft, | ||
| const float *__restrict__ | gm_transferGridRight, | ||
| const float *__restrict__ | gm_transferGridUpLeft, | ||
| const float *__restrict__ | gm_transferGridDownLeft, | ||
| const float *__restrict__ | gm_transferGridUpRight, | ||
| const float *__restrict__ | gm_transferGridDownRight, | ||
| int | overlapSizeX, | ||
| int | overlapSizeY, | ||
| int | overlapUp, | ||
| int | overlapLeft, | ||
| int | myGridX, | ||
| int | myGridY, | ||
| int3 | pmeSize | ||
| ) |
A HIP kernel which adds grid overlap data received from neighboring ranks.
| [in] | gm_realGrid | PME device grid |
| [out] | gm_transferGridUp | packed data in 8-neighboring directions |
| [out] | gm_transferGridDown | packed data in 8-neighboring directions |
| [out] | gm_transferGridLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridRight | packed data in 8-neighboring directions |
| [out] | gm_transferGridUpLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridDownLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridUpRight | packed data in 8-neighboring directions |
| [out] | gm_transferGridDownRight | packed data in 8-neighboring directions |
| [in] | overlapSizeX | halo size in 4 directions, X |
| [in] | overlapSizeY | halo size in 4 directions, Y |
| [in] | overlapUp | halo size in 4 directions, Up |
| [in] | overlapLeft | halo size in 4 directions, left |
| [in] | myGridX | local domain size in X dimension |
| [in] | myGridY | local domain size in Y dimension |
| [in] | pmeSize | Local PME grid size |
| __global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmeGpuUnpackHaloExternal | ( | float *__restrict__ | gm_realGrid, |
| const float *__restrict__ | gm_transferGridUp, | ||
| const float *__restrict__ | gm_transferGridDown, | ||
| const float *__restrict__ | gm_transferGridLeft, | ||
| const float *__restrict__ | gm_transferGridRight, | ||
| const float *__restrict__ | gm_transferGridUpLeft, | ||
| const float *__restrict__ | gm_transferGridDownLeft, | ||
| const float *__restrict__ | gm_transferGridUpRight, | ||
| const float *__restrict__ | gm_transferGridDownRight, | ||
| int | overlapSizeUp, | ||
| int | overlapSizeDown, | ||
| int | overlapSizeLeft, | ||
| int | overlapSizeRight, | ||
| int | myGridX, | ||
| int | myGridY, | ||
| int3 | pmeSize | ||
| ) |
A HIP kernel which assigns data in halo region in all 8 neighboring directions.
| [in] | gm_realGrid | PME device grid |
| [out] | gm_transferGridUp | packed data in 8-neighboring directions |
| [out] | gm_transferGridDown | packed data in 8-neighboring directions |
| [out] | gm_transferGridLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridRight | packed data in 8-neighboring directions |
| [out] | gm_transferGridUpLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridDownLeft | packed data in 8-neighboring directions |
| [out] | gm_transferGridUpRight | packed data in 8-neighboring directions |
| [out] | gm_transferGridDownRight | packed data in 8-neighboring directions |
| [in] | overlapSizeUp | halo size in 4 directions, up |
| [in] | overlapSizeDown | halo size in 4 directions, down |
| [in] | overlapSizeLeft | halo size in 4 directions, left |
| [in] | overlapSizeRight | halo size in 4 directions, right |
| [in] | myGridX | local domain size in X dimension |
| [in] | myGridY | local domain size in Y dimension |
| [in] | pmeSize | Local PME grid size |
| __global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmegrid_to_fftgrid | ( | float *__restrict__ | gm_realGrid, |
| float *__restrict__ | gm_fftGrid, | ||
| int3 | fftNData, | ||
| int3 | fftSize, | ||
| int3 | pmeSize | ||
| ) |
A HIP kernel which copies data from pme grid to FFT grid and back.
| [in] | gm_realGrid | local PME real space grid |
| [in] | gm_fftGrid | local FFT grid |
| [in] | fftNData | local FFT grid size without padding |
| [in] | fftSize | local FFT grid padded size |
| [in] | pmeSize | local PME grid padded size |
| pmeToFft | A boolean which tells if this is conversion from PME grid to FFT grid or reverse |
| constexpr int anonymous_namespace{pme_gpu_grid_hip.cpp}::sc_subGroupSizeX = is64ExecutionWidth ? 64 : 32 |
Sub-group size for conversion kernels.
Chosen to match relevant hardware widths on supported hardware.
1.8.5