Gromacs  2026.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Functions | Variables
anonymous_namespace{pme_gpu_grid_hip.cpp} Namespace Reference

Functions

bool deviceHas64ParallelExecutionSize (const DeviceInformation &deviceInfo)
 
template<bool is64ExecutionWidth>
__global__ void pmeGpuPackHaloExternal (const float *__restrict__ gm_realGrid, float *__restrict__ gm_transferGridUp, float *__restrict__ gm_transferGridDown, float *__restrict__ gm_transferGridLeft, float *__restrict__ gm_transferGridRight, float *__restrict__ gm_transferGridUpLeft, float *__restrict__ gm_transferGridDownLeft, float *__restrict__ gm_transferGridUpRight, float *__restrict__ gm_transferGridDownRight, int overlapSizeUp, int overlapSizeDown, int overlapSizeLeft, int overlapSizeRight, int myGridX, int myGridY, int3 pmeSize)
 A HIP kernel which packs non-contiguous overlap data in all 8 neighboring directions. More...
 
template<bool is64ExecutionWidth>
__global__ void pmeGpuUnpackHaloExternal (float *__restrict__ gm_realGrid, const float *__restrict__ gm_transferGridUp, const float *__restrict__ gm_transferGridDown, const float *__restrict__ gm_transferGridLeft, const float *__restrict__ gm_transferGridRight, const float *__restrict__ gm_transferGridUpLeft, const float *__restrict__ gm_transferGridDownLeft, const float *__restrict__ gm_transferGridUpRight, const float *__restrict__ gm_transferGridDownRight, int overlapSizeUp, int overlapSizeDown, int overlapSizeLeft, int overlapSizeRight, int myGridX, int myGridY, int3 pmeSize)
 A HIP kernel which assigns data in halo region in all 8 neighboring directions. More...
 
template<bool is64ExecutionWidth>
__global__ void pmeGpuUnpackAndAddHaloInternal (float *__restrict__ gm_realGrid, const float *__restrict__ gm_transferGridUp, const float *__restrict__ gm_transferGridDown, const float *__restrict__ gm_transferGridLeft, const float *__restrict__ gm_transferGridRight, const float *__restrict__ gm_transferGridUpLeft, const float *__restrict__ gm_transferGridDownLeft, const float *__restrict__ gm_transferGridUpRight, const float *__restrict__ gm_transferGridDownRight, int overlapSizeX, int overlapSizeY, int overlapUp, int overlapLeft, int myGridX, int myGridY, int3 pmeSize)
 A HIP kernel which adds grid overlap data received from neighboring ranks. More...
 
template<bool is64ExecutionWidth>
__global__ void pmeGpuPackHaloInternal (const float *__restrict__ gm_realGrid, float *__restrict__ gm_transferGridUp, float *__restrict__ gm_transferGridDown, float *__restrict__ gm_transferGridLeft, float *__restrict__ gm_transferGridRight, float *__restrict__ gm_transferGridUpLeft, float *__restrict__ gm_transferGridDownLeft, float *__restrict__ gm_transferGridUpRight, float *__restrict__ gm_transferGridDownRight, int overlapSizeX, int overlapSizeY, int overlapUp, int overlapLeft, int myGridX, int myGridY, int3 pmeSize)
 A HIP kernel which packs non-contiguous overlap data in all 8 neighboring directions. More...
 
template<bool pmeToFft, bool is64ExecutionWidth>
__global__ void pmegrid_to_fftgrid (float *__restrict__ gm_realGrid, float *__restrict__ gm_fftGrid, int3 fftNData, int3 fftSize, int3 pmeSize)
 A HIP kernel which copies data from pme grid to FFT grid and back. More...
 
template<bool is64ExecutionWidth>
void packHaloDataExternal (const PmeGpu *pmeGpu, int overlapUp, int overlapDown, int overlapLeft, int overlapRight, int myGridX, int myGridY, const ivec &pmeSize, DeviceBuffer< float > realGrid, DeviceBuffer< float > packedGridUp, DeviceBuffer< float > packedGridDown, DeviceBuffer< float > packedGridLeft, DeviceBuffer< float > packedGridRight, DeviceBuffer< float > packedGridUpLeft, DeviceBuffer< float > packedGridDownLeft, DeviceBuffer< float > packedGridUpRight, DeviceBuffer< float > packedGridDownRight)
 Launches HIP kernel to pack non-contiguous external halo data.
 
template<bool is64ExecutionWidth>
void packHaloDataInternal (const PmeGpu *pmeGpu, int overlapSizeX, int overlapSizeY, int overlapUp, int overlapLeft, int myGridX, int myGridY, const ivec &pmeSize, DeviceBuffer< float > realGrid, DeviceBuffer< float > packedGridUp, DeviceBuffer< float > packedGridDown, DeviceBuffer< float > packedGridLeft, DeviceBuffer< float > packedGridRight, DeviceBuffer< float > packedGridUpLeft, DeviceBuffer< float > packedGridDownLeft, DeviceBuffer< float > packedGridUpRight, DeviceBuffer< float > packedGridDownRight)
 Launches HIP kernel to pack non-contiguous internal halo data.
 
template<bool is64ExecutionWidth>
void unpackAndAddHaloDataInternal (const PmeGpu *pmeGpu, int overlapSizeX, int overlapSizeY, int overlapUp, int overlapLeft, int myGridX, int myGridY, const ivec &pmeSize, DeviceBuffer< float > realGrid, DeviceBuffer< float > packedGridUp, DeviceBuffer< float > packedGridDown, DeviceBuffer< float > packedGridLeft, DeviceBuffer< float > packedGridRight, DeviceBuffer< float > packedGridUpLeft, DeviceBuffer< float > packedGridDownLeft, DeviceBuffer< float > packedGridUpRight, DeviceBuffer< float > packedGridDownRight)
 Launches HIP kernel to unpack and reduce overlap data.
 
template<bool is64ExecutionWidth>
void unpackHaloDataExternal (const PmeGpu *pmeGpu, int overlapUp, int overlapDown, int overlapLeft, int overlapRight, int myGridX, int myGridY, const ivec &pmeSize, DeviceBuffer< float > realGrid, DeviceBuffer< float > packedGridUp, DeviceBuffer< float > packedGridDown, DeviceBuffer< float > packedGridLeft, DeviceBuffer< float > packedGridRight, DeviceBuffer< float > packedGridUpLeft, DeviceBuffer< float > packedGridDownLeft, DeviceBuffer< float > packedGridUpRight, DeviceBuffer< float > packedGridDownRight)
 Launches HIP kernel to initialize overlap data.
 
void receiveAndSend (DeviceBuffer< float > sendBuf, int sendCount, int dest, MPI_Request *sendRequest, DeviceBuffer< float > recvBuf, int recvCount, int src, MPI_Request *recvRequest, int tag, MPI_Comm comm)
 utility function to send and recv halo data from neighboring ranks
 

Variables

template<bool is64ExecutionWidth>
constexpr int sc_subGroupSizeX = is64ExecutionWidth ? 64 : 32
 Sub-group size for conversion kernels. More...
 
template<bool is64ExecutionWidth>
constexpr int sc_subGroupSizeY = is64ExecutionWidth ? 2 : 4
 
constexpr int sc_subGroupSizeZ = 1
 

Function Documentation

template<bool is64ExecutionWidth>
__global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmeGpuPackHaloExternal ( const float *__restrict__  gm_realGrid,
float *__restrict__  gm_transferGridUp,
float *__restrict__  gm_transferGridDown,
float *__restrict__  gm_transferGridLeft,
float *__restrict__  gm_transferGridRight,
float *__restrict__  gm_transferGridUpLeft,
float *__restrict__  gm_transferGridDownLeft,
float *__restrict__  gm_transferGridUpRight,
float *__restrict__  gm_transferGridDownRight,
int  overlapSizeUp,
int  overlapSizeDown,
int  overlapSizeLeft,
int  overlapSizeRight,
int  myGridX,
int  myGridY,
int3  pmeSize 
)

A HIP kernel which packs non-contiguous overlap data in all 8 neighboring directions.

Parameters
[in]gm_realGridPME device grid
[out]gm_transferGridUppacked data in 8-neighboring directions
[out]gm_transferGridDownpacked data in 8-neighboring directions
[out]gm_transferGridLeftpacked data in 8-neighboring directions
[out]gm_transferGridRightpacked data in 8-neighboring directions
[out]gm_transferGridUpLeftpacked data in 8-neighboring directions
[out]gm_transferGridDownLeftpacked data in 8-neighboring directions
[out]gm_transferGridUpRightpacked data in 8-neighboring directions
[out]gm_transferGridDownRightpacked data in 8-neighboring directions
[in]overlapSizeUphalo size in 4 directions, up
[in]overlapSizeDownhalo size in 4 directions, down
[in]overlapSizeLefthalo size in 4 directions, left
[in]overlapSizeRighthalo size in 4 directions, right
[in]myGridXlocal domain size in X dimension
[in]myGridYlocal domain size in Y dimension
[in]pmeSizeLocal PME grid size
template<bool is64ExecutionWidth>
__global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmeGpuPackHaloInternal ( const float *__restrict__  gm_realGrid,
float *__restrict__  gm_transferGridUp,
float *__restrict__  gm_transferGridDown,
float *__restrict__  gm_transferGridLeft,
float *__restrict__  gm_transferGridRight,
float *__restrict__  gm_transferGridUpLeft,
float *__restrict__  gm_transferGridDownLeft,
float *__restrict__  gm_transferGridUpRight,
float *__restrict__  gm_transferGridDownRight,
int  overlapSizeX,
int  overlapSizeY,
int  overlapUp,
int  overlapLeft,
int  myGridX,
int  myGridY,
int3  pmeSize 
)

A HIP kernel which packs non-contiguous overlap data in all 8 neighboring directions.

Parameters
[in]gm_realGridPME device grid
[out]gm_transferGridUppacked data in 8-neighboring directions
[out]gm_transferGridDownpacked data in 8-neighboring directions
[out]gm_transferGridLeftpacked data in 8-neighboring directions
[out]gm_transferGridRightpacked data in 8-neighboring directions
[out]gm_transferGridUpLeftpacked data in 8-neighboring directions
[out]gm_transferGridDownLeftpacked data in 8-neighboring directions
[out]gm_transferGridUpRightpacked data in 8-neighboring directions
[out]gm_transferGridDownRightpacked data in 8-neighboring directions
[in]overlapSizeXhalo size in 4 directions, X
[in]overlapSizeYhalo size in 4 directions, Y
[in]overlapUphalo size in 4 directions, up
[in]overlapLefthalo size in 4 directions, left
[in]myGridXlocal domain size in X dimension
[in]myGridYlocal domain size in Y dimension
[in]pmeSizeLocal PME grid size
template<bool is64ExecutionWidth>
__global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmeGpuUnpackAndAddHaloInternal ( float *__restrict__  gm_realGrid,
const float *__restrict__  gm_transferGridUp,
const float *__restrict__  gm_transferGridDown,
const float *__restrict__  gm_transferGridLeft,
const float *__restrict__  gm_transferGridRight,
const float *__restrict__  gm_transferGridUpLeft,
const float *__restrict__  gm_transferGridDownLeft,
const float *__restrict__  gm_transferGridUpRight,
const float *__restrict__  gm_transferGridDownRight,
int  overlapSizeX,
int  overlapSizeY,
int  overlapUp,
int  overlapLeft,
int  myGridX,
int  myGridY,
int3  pmeSize 
)

A HIP kernel which adds grid overlap data received from neighboring ranks.

Parameters
[in]gm_realGridPME device grid
[out]gm_transferGridUppacked data in 8-neighboring directions
[out]gm_transferGridDownpacked data in 8-neighboring directions
[out]gm_transferGridLeftpacked data in 8-neighboring directions
[out]gm_transferGridRightpacked data in 8-neighboring directions
[out]gm_transferGridUpLeftpacked data in 8-neighboring directions
[out]gm_transferGridDownLeftpacked data in 8-neighboring directions
[out]gm_transferGridUpRightpacked data in 8-neighboring directions
[out]gm_transferGridDownRightpacked data in 8-neighboring directions
[in]overlapSizeXhalo size in 4 directions, X
[in]overlapSizeYhalo size in 4 directions, Y
[in]overlapUphalo size in 4 directions, Up
[in]overlapLefthalo size in 4 directions, left
[in]myGridXlocal domain size in X dimension
[in]myGridYlocal domain size in Y dimension
[in]pmeSizeLocal PME grid size
template<bool is64ExecutionWidth>
__global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmeGpuUnpackHaloExternal ( float *__restrict__  gm_realGrid,
const float *__restrict__  gm_transferGridUp,
const float *__restrict__  gm_transferGridDown,
const float *__restrict__  gm_transferGridLeft,
const float *__restrict__  gm_transferGridRight,
const float *__restrict__  gm_transferGridUpLeft,
const float *__restrict__  gm_transferGridDownLeft,
const float *__restrict__  gm_transferGridUpRight,
const float *__restrict__  gm_transferGridDownRight,
int  overlapSizeUp,
int  overlapSizeDown,
int  overlapSizeLeft,
int  overlapSizeRight,
int  myGridX,
int  myGridY,
int3  pmeSize 
)

A HIP kernel which assigns data in halo region in all 8 neighboring directions.

Parameters
[in]gm_realGridPME device grid
[out]gm_transferGridUppacked data in 8-neighboring directions
[out]gm_transferGridDownpacked data in 8-neighboring directions
[out]gm_transferGridLeftpacked data in 8-neighboring directions
[out]gm_transferGridRightpacked data in 8-neighboring directions
[out]gm_transferGridUpLeftpacked data in 8-neighboring directions
[out]gm_transferGridDownLeftpacked data in 8-neighboring directions
[out]gm_transferGridUpRightpacked data in 8-neighboring directions
[out]gm_transferGridDownRightpacked data in 8-neighboring directions
[in]overlapSizeUphalo size in 4 directions, up
[in]overlapSizeDownhalo size in 4 directions, down
[in]overlapSizeLefthalo size in 4 directions, left
[in]overlapSizeRighthalo size in 4 directions, right
[in]myGridXlocal domain size in X dimension
[in]myGridYlocal domain size in Y dimension
[in]pmeSizeLocal PME grid size
template<bool pmeToFft, bool is64ExecutionWidth>
__global__ void anonymous_namespace{pme_gpu_grid_hip.cpp}::pmegrid_to_fftgrid ( float *__restrict__  gm_realGrid,
float *__restrict__  gm_fftGrid,
int3  fftNData,
int3  fftSize,
int3  pmeSize 
)

A HIP kernel which copies data from pme grid to FFT grid and back.

Parameters
[in]gm_realGridlocal PME real space grid
[in]gm_fftGridlocal FFT grid
[in]fftNDatalocal FFT grid size without padding
[in]fftSizelocal FFT grid padded size
[in]pmeSizelocal PME grid padded size
Template Parameters
pmeToFftA boolean which tells if this is conversion from PME grid to FFT grid or reverse

Variable Documentation

template<bool is64ExecutionWidth>
constexpr int anonymous_namespace{pme_gpu_grid_hip.cpp}::sc_subGroupSizeX = is64ExecutionWidth ? 64 : 32

Sub-group size for conversion kernels.

Chosen to match relevant hardware widths on supported hardware.