Gromacs  2026.0-dev-20251119-5f0a571d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Macros | Functions | Variables
pme_gpu_program_impl_hip.cpp File Reference
#include "gmxpre.h"
#include "gromacs/hardware/device_information.h"
#include "pme_gpu_constants.h"
#include "pme_gpu_internal.h"
#include "pme_gpu_program_impl.h"
#include "pme_gpu_types_host.h"
+ Include dependency graph for pme_gpu_program_impl_hip.cpp:

Description

Implements PmeGpuProgramImpl, which stores permanent PME GPU context-derived data, such as (compiled) kernel handles.

Author
Aleksei Iupinov a.yup.nosp@m.inov.nosp@m.@gmai.nosp@m.l.co.nosp@m.m
Andrey Alekseenko al42a.nosp@m.nd@g.nosp@m.mail..nosp@m.com
Paul Bauer paul..nosp@m.baue.nosp@m.r.q@g.nosp@m.mail.nosp@m..com

Macros

#define INSTANTIATE_SPREAD_2(order, computeSplines, spreadCharges, numGrids, writeGlobal, threadsPerAtom, parallelExecutionWidth)
 
#define INSTANTIATE_SPREAD(order, numGrids, threadsPerAtom, parallelExecutionWidth)
 
#define INSTANTIATE_GATHER_2(order, numGrids, readGlobal, threadsPerAtom, parallelExecutionWidth)
 
#define INSTANTIATE_GATHER(order, numGrids, threadsPerAtom, parallelExecutionWidth)
 
#define INSTANTIATE_X(x, order, parallelExecutionWidth)
 
#define INSTANTIATE_SOLVE(parallelExecutionWidth)
 
#define INSTANTIATE(order, parallelExecutionWidth)
 

Functions

static int deviceParallelExecutionSize (const DeviceInformation &deviceInfo)
 
template<int order, bool computeSplines, bool spreadCharges, bool wrapX, bool wrapY, int mode, bool writeGlobal, ThreadsPerAtom threadsPerAtom, int parallelExecutionWidth>
__global__ void pmeSplineAndSpreadKernel (PmeGpuKernelParams kernelParams)
 PME HIP kernels forward declarations. Kernels are documented in their respective files.
 
template<GridOrdering gridOrdering, bool computeEnergyAndVirial, const int gridIndex, int parallelExecutionWidth>
__global__ void pmeSolveKernel (PmeGpuKernelParams kernelParams)
 
template<int order, bool wrapX, bool wrapY, int nGrids, bool readGlobal, ThreadsPerAtom threadsPerAtom, int parallelExecutionWidth>
__global__ void pmeGatherKernel (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 1, true, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, false, true, true, 1, true, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, false, true, true, true, 1, true, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 1, false, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 1, true, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, false, true, true, 1, true, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, false, true, true, true, 1, true, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 1, false, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 2, true, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, false, true, true, 2, true, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, false, true, true, true, 2, true, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 2, false, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 2, true, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, false, true, true, 2, true, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, false, true, true, true, 2, true, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 2, false, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 1, true, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 1, false, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 1, true, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 1, false, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 2, true, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 2, false, ThreadsPerAtom::Order, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 2, true, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 2, false, ThreadsPerAtom::OrderSquared, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::XYZ, false, c_stateA, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::XYZ, true, c_stateA, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::YZX, false, c_stateA, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::YZX, true, c_stateA, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::XYZ, false, c_stateB, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::XYZ, true, c_stateB, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::YZX, false, c_stateB, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::YZX, true, c_stateB, 32 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 1, true, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, false, true, true, 1, true, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, false, true, true, true, 1, true, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 1, false, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 1, true, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, false, true, true, 1, true, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, false, true, true, true, 1, true, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 1, false, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 2, true, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, false, true, true, 2, true, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, false, true, true, true, 2, true, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 2, false, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 2, true, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, false, true, true, 2, true, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, false, true, true, true, 2, true, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSplineAndSpreadKernel< 4, true, true, true, true, 2, false, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 1, true, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 1, false, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 1, true, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 1, false, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 2, true, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 2, false, ThreadsPerAtom::Order, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 2, true, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeGatherKernel< 4, true, true, 2, false, ThreadsPerAtom::OrderSquared, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::XYZ, false, c_stateA, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::XYZ, true, c_stateA, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::YZX, false, c_stateA, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::YZX, true, c_stateA, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::XYZ, false, c_stateB, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::XYZ, true, c_stateB, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::YZX, false, c_stateB, 64 > (PmeGpuKernelParams kernelParams)
 
template __global__ void pmeSolveKernel< GridOrdering::YZX, true, c_stateB, 64 > (PmeGpuKernelParams kernelParams)
 
template<int parallelExecutionWidth>
static void setKernelPointersAndParams (struct PmeGpuProgramImpl *pmeGpuProgram)
 Helper function to set proper kernel functor pointers.
 

Variables

constexpr int c_pmeOrder = 4
 
constexpr bool c_wrapX = true
 
constexpr bool c_wrapY = true
 
constexpr int c_stateA = 0
 
constexpr int c_stateB = 1
 
template<int parallelExecutionWidth>
constexpr int sc_spreadHipMaxWarpsPerBlock = (parallelExecutionWidth == 64) ? 8 : 4
 
template<int parallelExecutionWidth>
static constexpr int sc_spreadMaxThreadsPerBlock
 
template<int parallelExecutionWidth>
static constexpr int sc_solveMaxThreadsPerBlock = c_solveMaxWarpsPerBlock * parallelExecutionWidth
 
template<int parallelExecutionWidth>
static constexpr int sc_gatherMaxThreadsPerBlock = c_gatherMaxWarpsPerBlock * parallelExecutionWidth
 

Macro Definition Documentation

#define INSTANTIATE (   order,
  parallelExecutionWidth 
)
Value:
INSTANTIATE_X(SPREAD, order, parallelExecutionWidth); \
INSTANTIATE_X(GATHER, order, parallelExecutionWidth); \
INSTANTIATE_SOLVE(parallelExecutionWidth);
#define INSTANTIATE_GATHER (   order,
  numGrids,
  threadsPerAtom,
  parallelExecutionWidth 
)
Value:
INSTANTIATE_GATHER_2(order, numGrids, true, threadsPerAtom, parallelExecutionWidth); \
INSTANTIATE_GATHER_2(order, numGrids, false, threadsPerAtom, parallelExecutionWidth);
static int numGrids(const GridSet::DomainSetup &domainSetup)
Returns the number of search grids.
Definition: gridset.cpp:67
#define INSTANTIATE_GATHER_2 (   order,
  numGrids,
  readGlobal,
  threadsPerAtom,
  parallelExecutionWidth 
)
Value:
extern template __global__ void pmeGatherKernel<order, true, true, numGrids, readGlobal, threadsPerAtom, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams);
int PmeGpuKernelParams
A dummy typedef for the GPU kernel arguments data placeholder on non-GPU builds.
Definition: pme_gpu_types_host.h:79
#define INSTANTIATE_SOLVE (   parallelExecutionWidth)
Value:
extern template __global__ void pmeSolveKernel<GridOrdering::XYZ, false, c_stateA, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams); \
extern template __global__ void pmeSolveKernel<GridOrdering::XYZ, true, c_stateA, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams); \
extern template __global__ void pmeSolveKernel<GridOrdering::YZX, false, c_stateA, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams); \
extern template __global__ void pmeSolveKernel<GridOrdering::YZX, true, c_stateA, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams); \
extern template __global__ void pmeSolveKernel<GridOrdering::XYZ, false, c_stateB, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams); \
extern template __global__ void pmeSolveKernel<GridOrdering::XYZ, true, c_stateB, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams); \
extern template __global__ void pmeSolveKernel<GridOrdering::YZX, false, c_stateB, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams); \
extern template __global__ void pmeSolveKernel<GridOrdering::YZX, true, c_stateB, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams);
int PmeGpuKernelParams
A dummy typedef for the GPU kernel arguments data placeholder on non-GPU builds.
Definition: pme_gpu_types_host.h:79
#define INSTANTIATE_SPREAD (   order,
  numGrids,
  threadsPerAtom,
  parallelExecutionWidth 
)
Value:
INSTANTIATE_SPREAD_2(order, true, true, numGrids, true, threadsPerAtom, parallelExecutionWidth); \
INSTANTIATE_SPREAD_2(order, true, false, numGrids, true, threadsPerAtom, parallelExecutionWidth); \
INSTANTIATE_SPREAD_2(order, false, true, numGrids, true, threadsPerAtom, parallelExecutionWidth); \
INSTANTIATE_SPREAD_2(order, true, true, numGrids, false, threadsPerAtom, parallelExecutionWidth);
static int numGrids(const GridSet::DomainSetup &domainSetup)
Returns the number of search grids.
Definition: gridset.cpp:67
#define INSTANTIATE_SPREAD_2 (   order,
  computeSplines,
  spreadCharges,
  numGrids,
  writeGlobal,
  threadsPerAtom,
  parallelExecutionWidth 
)
Value:
extern template __global__ void \
pmeSplineAndSpreadKernel<order, computeSplines, spreadCharges, true, true, numGrids, writeGlobal, threadsPerAtom, parallelExecutionWidth>( \
PmeGpuKernelParams kernelParams);
int PmeGpuKernelParams
A dummy typedef for the GPU kernel arguments data placeholder on non-GPU builds.
Definition: pme_gpu_types_host.h:79
#define INSTANTIATE_X (   x,
  order,
  parallelExecutionWidth 
)
Value:
INSTANTIATE_##x(order, 1, ThreadsPerAtom::Order, parallelExecutionWidth); \
INSTANTIATE_##x(order, 1, ThreadsPerAtom::OrderSquared, parallelExecutionWidth); \
INSTANTIATE_##x(order, 2, ThreadsPerAtom::Order, parallelExecutionWidth); \
INSTANTIATE_##x(order, 2, ThreadsPerAtom::OrderSquared, parallelExecutionWidth);
Use a number of threads equal to the PME order (ie. 4)
Use a number of threads equal to the square of the PME order (ie. 16)

Variable Documentation

template<int parallelExecutionWidth>
constexpr int sc_spreadMaxThreadsPerBlock
static
Initial value:
=
sc_spreadHipMaxWarpsPerBlock<parallelExecutionWidth> * parallelExecutionWidth