#include "config.h"
#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/locality.h"
#include "gromacs/utility/enumerationhelpers.h"
#include "nbnxm.h"
#include "pairlist.h"

Include dependency graph for gpu_types_common.h:

This graph shows which files directly or indirectly include this file:

Description

Implements common internal types for different NBNXN GPU implementations.

Author: Szilárd Páll pall..nosp@m.szil.nosp@m.ard@g.nosp@m.mail.nosp@m..com

Classes
struct	NBStagingData
	Staging area for temporary data downloaded from the GPU. More...

struct	NBAtomDataGpu
	Nonbonded atom data - both inputs and outputs. More...

struct	NBParamGpu
	Parameters required for the GPU nonbonded calculations. More...

struct	Nbnxm::GpuTimers
	GPU region timers used for timing GPU kernels and H2D/D2H transfers. More...

struct	Nbnxm::GpuTimers::XFTransfers
	Timers for local or non-local coordinate/force transfers. More...

struct	Nbnxm::GpuTimers::Interaction
	Timers for local or non-local interaction related operations. More...

struct	Nbnxm::gpuPlistSorting
	Sorted pair list on GPU and data required for performing the sorting. More...

struct	Nbnxm::gpu_plist
	GPU pair list structure. More...

Macros
#define	GMX_NBNXN_PRUNE_KERNEL_JPACKED_CONCURRENCY 4
	Macro definining default for the prune kernel's jPacked processing concurrency. More...

Variables
static constexpr int	c_sciHistogramSize = 8192
	Number of separate bins used during sorting of plist on gpu. More...

static constexpr int	c_sciSortingThreadsPerBlock = 256
	Number of threads per block used by the gpu sorting kernel. More...

static constexpr int	c_pruneKernelJPackedConcurrency = 4
	Default for the prune kernel's jPacked processing concurrency.

Macro Definition Documentation

#define GMX_NBNXN_PRUNE_KERNEL_JPACKED_CONCURRENCY 4

Macro definining default for the prune kernel's jPacked processing concurrency.

The GMX_NBNXN_PRUNE_KERNEL_JPACKED_CONCURRENCY macro allows compile-time override with the default value of 4.

Variable Documentation

constexpr int c_sciHistogramSize = 8192

static

Number of separate bins used during sorting of plist on gpu.

Ideally this number would be increased for very large system sizes (the cpu version of sorting uses 2 x avg(num cjPacked) but as sorting has negligible impact for very large system sizes we use a constant here for simplicity. On H100 sorting begins to have negligible effect for system sizes greater than ~400k atoms.

constexpr int c_sciSortingThreadsPerBlock = 256

static

Number of threads per block used by the gpu sorting kernel.

TODO this is a reasonable default but the number has not been tuned

Description

Classes

Macros

Variables

Macro Definition Documentation

Variable Documentation