#include <string>
#include "gromacs/math/vectypes.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/timing/walltime_accounting.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"

Include dependency graph for pme.h:

This graph shows which files directly or indirectly include this file:

Description

This file contains function declarations necessary for computing energies and forces for the PME long-ranged part (Coulomb and LJ).

Author: Berk Hess hess@.nosp@m.kth..nosp@m.se

Macros

#define	GMX_PME_SPREAD (1<<0)
	Flag values that control what gmx_pme_do() will calculate. More...

#define	GMX_PME_SOLVE (1<<1)

#define	GMX_PME_CALC_F (1<<2)

#define	GMX_PME_CALC_ENER_VIR (1<<3)

#define	GMX_PME_CALC_POT (1<<4)

#define	GMX_PME_DO_ALL_F (GMX_PME_SPREAD \| GMX_PME_SOLVE \| GMX_PME_CALC_F)

Enumerations
enum	{ GMX_SUM_GRID_FORWARD, GMX_SUM_GRID_BACKWARD }

enum	PmeRunMode { None, CPU, GPU, Mixed }
	Possible PME codepaths on a rank. More...

enum	PmeForceOutputHandling { PmeForceOutputHandling::Set, PmeForceOutputHandling::ReduceWithInput }
	PME gathering output forces treatment. More...

Functions
int	minimalPmeGridSize (int pmeOrder)
	Return the smallest allowed PME grid size for `pmeOrder`.

bool	gmx_pme_check_restrictions (int pme_order, int nkx, int nky, int nkz, int nnodes_major, bool useThreads, bool errorsAreFatal)
	Check restrictions on pme_order and the PME grid nkx,nky,nkz. More...

gmx_pme_t *	gmx_pme_init (const t_commrec cr, int nnodes_major, int nnodes_minor, const t_inputrec ir, int homenr, gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj, gmx_bool bReproducible, real ewaldcoeff_q, real ewaldcoeff_lj, int nthread, PmeRunMode runMode, PmeGpu pmeGpu, gmx_device_info_t gpuInfo, const gmx::MDLogger &mdlog)
	Construct PME data. More...

void	gmx_pme_destroy (gmx_pme_t *pme)
	Destroys the PME data structure.

int	gmx_pme_do (struct gmx_pme_t pme, int start, int homenr, rvec x[], rvec f[], real chargeA[], real chargeB[], real c6A[], real c6B[], real sigmaA[], real sigmaB[], matrix box, t_commrec cr, int maxshift_x, int maxshift_y, t_nrnb nrnb, gmx_wallcycle_t wcycle, matrix vir_q, matrix vir_lj, real energy_q, real energy_lj, real lambda_q, real lambda_lj, real dvdlambda_q, real *dvdlambda_lj, int flags)
	Do a PME calculation on a CPU for the long range electrostatics and/or LJ. More...

int	gmx_pmeonly (struct gmx_pme_t pme, struct t_commrec cr, t_nrnb mynrnb, gmx_wallcycle_t wcycle, gmx_walltime_accounting_t walltime_accounting, t_inputrec ir, PmeRunMode runMode)
	Called on the nodes that do PME exclusively (as slaves)

void	gmx_pme_calc_energy (struct gmx_pme_t pme, int n, rvec x, real q, real V)
	Calculate the PME grid energy V for n charges. More...

void	gmx_pme_send_parameters (struct t_commrec cr, const interaction_const_t ic, gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj, real chargeA, real chargeB, real sqrt_c6A, real sqrt_c6B, real sigmaA, real sigmaB, int maxshift_x, int maxshift_y)
	Send the charges and maxshift to out PME-only node.

void	gmx_pme_send_coordinates (struct t_commrec cr, matrix box, rvec x, real lambda_q, real lambda_lj, gmx_bool bEnerVir, gmx_int64_t step)
	Send the coordinates to our PME-only node and request a PME calculation.

void	gmx_pme_send_finish (struct t_commrec *cr)
	Tell our PME-only node to finish.

void	gmx_pme_send_resetcounters (struct t_commrec *cr, gmx_int64_t step)
	Tell our PME-only node to reset all cycle and flop counters.

void	gmx_pme_receive_f (struct t_commrec cr, gmx::ForceWithVirial forceWithVirial, real energy_q, real energy_lj, real dvdlambda_q, real dvdlambda_lj, float *pme_cycles)
	PP nodes receive the long range forces from the PME nodes.

void	gmx_pme_reinit_atoms (const gmx_pme_t pme, const int nAtoms, const real charges)
	This function updates the local atom data on GPU after DD (charges, coordinates, etc.). TODO: it should update the PME CPU atom data as well. (currently PME CPU call gmx_pme_do() gets passed the input pointers for each computation). More...

bool	pme_gpu_supports_input (const t_inputrec ir, std::string error)
	Checks whether the input system allows to run PME on GPU. TODO: this mostly duplicates an internal PME assert function pme_gpu_check_restrictions(), except that works with a formed gmx_pme_t structure. Should that one go away/work with inputrec? More...

PmeRunMode	pme_run_mode (const gmx_pme_t *pme)
	Returns the active PME codepath (CPU, GPU, mixed). More...

bool	pme_gpu_task_enabled (const gmx_pme_t *pme)
	Tells if PME is enabled to run on GPU (not necessarily active at the moment). More...

void	pme_gpu_reset_timings (const gmx_pme_t *pme)
	Resets the PME GPU timings. To be called at the reset step. More...

void	pme_gpu_get_timings (const gmx_pme_t pme, gmx_wallclock_gpu_pme_t timings)
	Copies the PME GPU timings to the gmx_wallclock_gpu_pme_t structure (for log output). To be called at the run end. More...

void	pme_gpu_prepare_computation (gmx_pme_t *pme, bool needToUpdateBox, const matrix box, gmx_wallcycle_t wcycle, int flags)
	Prepares PME on GPU computation (updating the box if needed) More...

void	pme_gpu_launch_spread (gmx_pme_t pme, const rvec x, gmx_wallcycle_t wcycle)
	Launches first stage of PME on GPU - H2D input transfers, spreading kernel, and D2H grid transfer if needed. More...

void	pme_gpu_launch_complex_transforms (gmx_pme_t *pme, gmx_wallcycle_t wcycle)
	Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode. More...

void	pme_gpu_launch_gather (const gmx_pme_t *pme, gmx_wallcycle_t wcycle, PmeForceOutputHandling forceTreatment)
	Launches last stage of PME on GPU - force gathering and D2H force transfer. More...

void	pme_gpu_wait_finish_task (const gmx_pme_t pme, gmx_wallcycle_t wcycle, gmx::ArrayRef< const gmx::RVec > forces, matrix virial, real *energy)
	Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy (if they were to be computed). More...

bool	pme_gpu_try_finish_task (const gmx_pme_t pme, gmx_wallcycle_t wcycle, gmx::ArrayRef< const gmx::RVec > forces, matrix virial, real *energy, GpuTaskCompletion completionKind)
	Attempts to complete PME GPU tasks. More...

Macro Definition Documentation

#define GMX_PME_SPREAD (1<<0)

Flag values that control what gmx_pme_do() will calculate.

These can be combined with bitwise-OR if more than one thing is required.

Enumeration Type Documentation

enum PmeForceOutputHandling

strong

PME gathering output forces treatment.

Enumerator

Set

Gather simply writes into provided force buffer.

ReduceWithInput

Gather adds its output to the buffer.

On GPU, that means additional H2D copy before the kernel launch.

enum PmeRunMode

Possible PME codepaths on a rank.

Todo:: : make this enum class with gmx_pme_t C++ refactoring

Enumerator
None	No PME task is done.
CPU	Whole PME computation is done on CPU.
GPU	Whole PME computation is done on GPU.
Mixed	Mixed mode: only spread and gather run on GPU; FFT and solving are done on CPU.

Function Documentation

void gmx_pme_calc_energy	(	struct gmx_pme_t *	pme,
		int	n,
		rvec *	x,
		real *	q,
		real *	V
	)

Calculate the PME grid energy V for n charges.

The potential (found in pme) must have been found already with a call to gmx_pme_do() with at least GMX_PME_SPREAD and GMX_PME_SOLVE specified. Note that the charges are not spread on the grid in the pme struct. Currently does not work in parallel or with free energy.

bool gmx_pme_check_restrictions	(	int	pme_order,
		int	nkx,
		int	nky,
		int	nkz,
		int	nnodes_major,
		bool	useThreads,
		bool	errorsAreFatal
	)

Check restrictions on pme_order and the PME grid nkx,nky,nkz.

With errorsAreFatal=true, an exception or fatal error is generated on violation of restrictions. With errorsAreFatal=false, false is returned on violation of restrictions. When all restrictions are obeyed, true is returned. Argument useThreads tells if any MPI rank doing PME uses more than 1 threads. If at calling useThreads is unknown, pass true for conservative checking.

The PME GPU restrictions are checked separately during pme_gpu_init().

int gmx_pme_do	(	struct gmx_pme_t *	pme,
		int	start,
		int	homenr,
		rvec	x[],
		rvec	f[],
		real	chargeA[],
		real	chargeB[],
		real	c6A[],
		real	c6B[],
		real	sigmaA[],
		real	sigmaB[],
		matrix	box,
		t_commrec *	cr,
		int	maxshift_x,
		int	maxshift_y,
		t_nrnb *	nrnb,
		gmx_wallcycle_t	wcycle,
		matrix	vir_q,
		matrix	vir_lj,
		real *	energy_q,
		real *	energy_lj,
		real	lambda_q,
		real	lambda_lj,
		real *	dvdlambda_q,
		real *	dvdlambda_lj,
		int	flags
	)

Do a PME calculation on a CPU for the long range electrostatics and/or LJ.

The meaning of flags is defined above, and determines which parts of the calculation are performed.

Returns: 0 indicates all well, non zero is an error code.

gmx_pme_t* gmx_pme_init	(	const t_commrec *	cr,
		int	nnodes_major,
		int	nnodes_minor,
		const t_inputrec *	ir,
		int	homenr,
		gmx_bool	bFreeEnergy_q,
		gmx_bool	bFreeEnergy_lj,
		gmx_bool	bReproducible,
		real	ewaldcoeff_q,
		real	ewaldcoeff_lj,
		int	nthread,
		PmeRunMode	runMode,
		PmeGpu *	pmeGpu,
		gmx_device_info_t *	gpuInfo,
		const gmx::MDLogger &	mdlog
	)

Construct PME data.

Exceptions

gmx::InconsistentInputError if input grid sizes/PME order are inconsistent.

Returns: Pointer to newly allocated and initialized PME data.

void gmx_pme_reinit_atoms	(	const gmx_pme_t *	pme,
		const int	nAtoms,
		const real *	charges
	)

This function updates the local atom data on GPU after DD (charges, coordinates, etc.). TODO: it should update the PME CPU atom data as well. (currently PME CPU call gmx_pme_do() gets passed the input pointers for each computation).

Parameters

[in]	pme	The PME structure.
[in]	nAtoms	The number of particles.
[in]	charges	The pointer to the array of particle charges.

void pme_gpu_get_timings	(	const gmx_pme_t *	pme,
		gmx_wallclock_gpu_pme_t *	timings
	)

Copies the PME GPU timings to the gmx_wallclock_gpu_pme_t structure (for log output). To be called at the run end.

Parameters

[in]	pme	The PME structure.
[in]	timings	The gmx_wallclock_gpu_pme_t structure.

void pme_gpu_launch_complex_transforms	(	gmx_pme_t *	pme,
		gmx_wallcycle_t	wcycle
	)

Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.

Parameters

[in]	pme	The PME data structure.
[in]	wcycle	The wallclock counter.

void pme_gpu_launch_gather	(	const gmx_pme_t *	pme,
		gmx_wallcycle_t	wcycle,
		PmeForceOutputHandling	forceTreatment
	)

Launches last stage of PME on GPU - force gathering and D2H force transfer.

Parameters

[in]	pme	The PME data structure.
[in]	wcycle	The wallclock counter.
[in]	forceTreatment	Tells how data should be treated. The gathering kernel either stores the output reciprocal forces into the host array, or copies its contents to the GPU first and accumulates. The reduction is non-atomic.

void pme_gpu_launch_spread	(	gmx_pme_t *	pme,
		const rvec *	x,
		gmx_wallcycle_t	wcycle
	)

Launches first stage of PME on GPU - H2D input transfers, spreading kernel, and D2H grid transfer if needed.

Parameters

[in]	pme	The PME data structure.
[in]	x	The array of local atoms' coordinates.
[in]	wcycle	The wallclock counter.

void pme_gpu_prepare_computation	(	gmx_pme_t *	pme,
		bool	needToUpdateBox,
		const matrix	box,
		gmx_wallcycle_t	wcycle,
		int	flags
	)

Prepares PME on GPU computation (updating the box if needed)

Parameters

[in]	pme	The PME data structure.
[in]	needToUpdateBox	Tells if the stored unit cell parameters should be updated from `box`.
[in]	box	The unit cell box.
[in]	wcycle	The wallclock counter.
[in]	flags	The combination of flags to affect this PME computation. The flags are the GMX_PME_ flags from pme.h.

void pme_gpu_reset_timings ( const gmx_pme_t * pme )

Resets the PME GPU timings. To be called at the reset step.

Parameters

[in] pme The PME structure.

bool pme_gpu_supports_input	(	const t_inputrec *	ir,
		std::string *	error
	)

Checks whether the input system allows to run PME on GPU. TODO: this mostly duplicates an internal PME assert function pme_gpu_check_restrictions(), except that works with a formed gmx_pme_t structure. Should that one go away/work with inputrec?

Parameters

[in]	ir	Input system.
[out]	error	The error message if the input is not supported on GPU.

Returns: true if PME can run on GPU with this input, false otherwise.

bool pme_gpu_task_enabled ( const gmx_pme_t * pme )

inline

Tells if PME is enabled to run on GPU (not necessarily active at the moment).

Todo:: This is a rather static data that should be managed by the hardware assignment manager. For now, it is synonymous with the active PME codepath (in the absence of dynamic switching).

Parameters

[in] pme The PME data structure.

Returns: true if PME can run on GPU, false otherwise.

bool pme_gpu_try_finish_task	(	const gmx_pme_t *	pme,
		gmx_wallcycle_t	wcycle,
		gmx::ArrayRef< const gmx::RVec > *	forces,
		matrix	virial,
		real *	energy,
		GpuTaskCompletion	completionKind
	)

Attempts to complete PME GPU tasks.

The completionKind argument controls whether the function blocks until all PME GPU tasks enqueued completed (as pme_gpu_wait_finish_task() does) or only checks and returns immediately if they did not. When blocking or the tasks have completed it also gets the output forces by assigning the ArrayRef to the forces pointer passed in. Virial/energy are also outputs if they were to be computed.

Note: also launches the reinitalization of the PME output buffers. TODO: this should be moved out to avoid miscounting its wall-time (as wait iso launch).

Parameters

[in]	pme	The PME data structure.
[in]	wcycle	The wallclock counter.
[out]	forces	The output forces.
[out]	virial	The output virial matrix.
[out]	energy	The output energy.
[in]	completionKind	Indicates whether PME task completion should only be checked rather than waited for

Returns: True if the PME GPU tasks have completed

void pme_gpu_wait_finish_task	(	const gmx_pme_t *	pme,
		gmx_wallcycle_t	wcycle,
		gmx::ArrayRef< const gmx::RVec > *	forces,
		matrix	virial,
		real *	energy
	)

Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy (if they were to be computed).

Parameters

[in]	pme	The PME data structure.
[out]	wcycle	The wallclock counter.
[out]	forces	The output forces.
[out]	virial	The output virial matrix.
[out]	energy	The output energy.

PmeRunMode pme_run_mode ( const gmx_pme_t * pme )

Returns the active PME codepath (CPU, GPU, mixed).

Todo:: This is a rather static data that should be managed by the higher level task scheduler.

Parameters

[in] pme The PME data structure.

Returns: active PME codepath.

Description

Macros

Enumerations

Functions

Macro Definition Documentation

Enumeration Type Documentation

Function Documentation