#include "gmxpre.h"
#include "pme.h"
#include "config.h"
#include <cassert>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <list>
#include "gromacs/domdec/domdec.h"
#include "gromacs/ewald/ewald_utils.h"
#include "gromacs/fft/parallel_3dfft.h"
#include "gromacs/fileio/pdbio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/hardware/hw_info.h"
#include "gromacs/math/gmxcomplex.h"
#include "gromacs/math/invertmatrix.h"
#include "gromacs/math/units.h"
#include "gromacs/math/vec.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/mdtypes/simulation_workload.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/timing/cyclecounter.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/timing/walltime_accounting.h"
#include "gromacs/topology/topology.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/gmxomp.h"
#include "gromacs/utility/logger.h"
#include "gromacs/utility/real.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/stringutil.h"
#include "gromacs/utility/unique_cptr.h"
#include "calculate_spline_moduli.h"
#include "pme_gather.h"
#include "pme_gpu_internal.h"
#include "pme_grid.h"
#include "pme_internal.h"
#include "pme_redistribute.h"
#include "pme_solve.h"
#include "pme_spline_work.h"
#include "pme_spread.h"

Include dependency graph for pme.cpp:

Description

This file contains function definitions necessary for computing energies and forces for the PME long-ranged part (Coulomb and LJ).

Author: Erik Lindahl erik@.nosp@m.kth..nosp@m.se; Berk Hess hess@.nosp@m.kth..nosp@m.se

Functions
static bool	addMessageIfNotSupported (const std::list< std::string > &errorReasons, std::string *error)
	Help build a descriptive message in `error` if there are `errorReasons` why PME on GPU is not supported. More...

bool	pme_gpu_supports_build (std::string *error)
	Checks whether the GROMACS build allows to run PME on GPU. TODO: this partly duplicates an internal PME assert function pme_gpu_check_restrictions(), except that works with a formed gmx_pme_t structure. Should that one go away/work with inputrec? More...

bool	pme_gpu_supports_hardware (const gmx_hw_info_t &hwinfo, std::string *error)
	Checks whether the detected (GPU) hardware allows to run PME on GPU. More...

bool	pme_gpu_supports_input (const t_inputrec &ir, std::string *error)
	Checks whether the input system allows to run PME on GPU. TODO: this partly duplicates an internal PME assert function pme_gpu_check_restrictions(), except that works with a formed gmx_pme_t structure. Should that one go away/work with inputrec? More...

static bool	pme_gpu_check_restrictions (const gmx_pme_t pme, std::string error)
	Finds out if PME with given inputs is possible to run on GPU. This function is an internal final check, validating the whole PME structure on creation, but it still duplicates the preliminary checks from the above (externally exposed) pme_gpu_supports_input() - just in case. More...

PmeRunMode	pme_run_mode (const gmx_pme_t *pme)
	Returns the active PME codepath (CPU, GPU, mixed). More...

gmx::PinningPolicy	pme_get_pinning_policy ()
	Return the pinning policy appropriate for this build configuration for relevant buffers used for PME task on this rank (e.g. running on a GPU).

static void	setup_coordinate_communication (PmeAtomComm *atc)
	Set up coordinate communication.

static int	mult_up (int n, int f)
	Round `n` up to the next multiple of `f`.

static double	estimate_pme_load_imbalance (struct gmx_pme_t *pme)
	Return estimate of the load imbalance from the PME grid not being a good match for the number of PME ranks.

static void	init_overlap_comm (pme_overlap_t *ol, int norder, MPI_Comm comm, int nnodes, int nodeid, int ndata, int commplainsize)
	Initialize data structure for communication.

int	minimalPmeGridSize (int pmeOrder)
	Return the smallest allowed PME grid size for `pmeOrder`.

bool	gmx_pme_check_restrictions (int pme_order, int nkx, int nky, int nkz, int numPmeDomainsAlongX, bool useThreads, bool errorsAreFatal)
	Check restrictions on pme_order and the PME grid nkx,nky,nkz. More...

static int	div_round_up (int enumerator, int denominator)
	Round `enumerator`.

gmx_pme_t *	gmx_pme_init (const t_commrec cr, const NumPmeDomains &numPmeDomains, const t_inputrec ir, gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj, gmx_bool bReproducible, real ewaldcoeff_q, real ewaldcoeff_lj, int nthread, PmeRunMode runMode, PmeGpu pmeGpu, const DeviceContext deviceContext, const DeviceStream deviceStream, const PmeGpuProgram pmeGpuProgram, const gmx::MDLogger &mdlog)
	Construct PME data. More...

void	gmx_pme_reinit (struct gmx_pme_t *pmedata, const t_commrec cr, struct gmx_pme_t pme_src, const t_inputrec ir, const ivec grid_size, real ewaldcoeff_q, real ewaldcoeff_lj)
	As gmx_pme_init, but takes most settings, except the grid/Ewald coefficients, from pme_src. This is only called when the PME cut-off/grid size changes.

void	gmx_pme_calc_energy (gmx_pme_t pme, gmx::ArrayRef< const gmx::RVec > x, gmx::ArrayRef< const real > q, real V)
	Calculate the PME grid energy V for n charges. More...

static void	calc_initial_lb_coeffs (gmx::ArrayRef< real > coefficient, const real local_c6, const real local_sigma)
	Calculate initial Lorentz-Berthelot coefficients for LJ-PME.

static void	calc_next_lb_coeffs (gmx::ArrayRef< real > coefficient, const real *local_sigma)
	Calculate next Lorentz-Berthelot coefficients for LJ-PME.

int	gmx_pme_do (struct gmx_pme_t pme, gmx::ArrayRef< const gmx::RVec > coordinates, gmx::ArrayRef< gmx::RVec > forces, real chargeA[], real chargeB[], real c6A[], real c6B[], real sigmaA[], real sigmaB[], const matrix box, const t_commrec cr, int maxshift_x, int maxshift_y, t_nrnb nrnb, gmx_wallcycle wcycle, matrix vir_q, matrix vir_lj, real energy_q, real energy_lj, real lambda_q, real lambda_lj, real dvdlambda_q, real dvdlambda_lj, const gmx::StepWorkload &stepWork)
	Do a PME calculation on a CPU for the long range electrostatics and/or LJ. More...

void	gmx_pme_destroy (gmx_pme_t *pme)
	Destroys the PME data structure.

void	gmx_pme_reinit_atoms (gmx_pme_t pme, const int numAtoms, const real chargesA, const real *chargesB)
	This function updates the local atom data on GPU after DD (charges, coordinates, etc.). TODO: it should update the PME CPU atom data as well. (currently PME CPU call gmx_pme_do() gets passed the input pointers for each computation). More...

bool	gmx_pme_grid_matches (const gmx_pme_t &pme, const ivec grid_size)
	Return whether the grid of `pme` is identical to `grid_size`.

Variables
const int	gmxCacheLineSize = 64
	Number of bytes in a cache line. More...

Function Documentation

static bool addMessageIfNotSupported	(	const std::list< std::string > &	errorReasons,
		std::string *	error
	)

static

Help build a descriptive message in error if there are errorReasons why PME on GPU is not supported.

Returns: Whether the lack of errorReasons indicate there is support.

void gmx_pme_calc_energy	(	gmx_pme_t *	pme,
		gmx::ArrayRef< const gmx::RVec >	x,
		gmx::ArrayRef< const real >	q,
		real *	V
	)

Calculate the PME grid energy V for n charges.

The potential (found in pme) must have been found already with a call to gmx_pme_do(). Note that the charges are not spread on the grid in the pme struct. Currently does not work in parallel or with free energy.

bool gmx_pme_check_restrictions	(	int	pme_order,
		int	nkx,
		int	nky,
		int	nkz,
		int	numPmeDomainsAlongX,
		bool	useThreads,
		bool	errorsAreFatal
	)

Check restrictions on pme_order and the PME grid nkx,nky,nkz.

With errorsAreFatal=true, an exception or fatal error is generated on violation of restrictions. With errorsAreFatal=false, false is returned on violation of restrictions. When all restrictions are obeyed, true is returned. Argument useThreads tells if any MPI rank doing PME uses more than 1 threads. If at calling useThreads is unknown, pass true for conservative checking.

The PME GPU restrictions are checked separately during pme_gpu_init().

int gmx_pme_do	(	struct gmx_pme_t *	pme,
		gmx::ArrayRef< const gmx::RVec >	coordinates,
		gmx::ArrayRef< gmx::RVec >	forces,
		real	chargeA[],
		real	chargeB[],
		real	c6A[],
		real	c6B[],
		real	sigmaA[],
		real	sigmaB[],
		const matrix	box,
		const t_commrec *	cr,
		int	maxshift_x,
		int	maxshift_y,
		t_nrnb *	nrnb,
		gmx_wallcycle *	wcycle,
		matrix	vir_q,
		matrix	vir_lj,
		real *	energy_q,
		real *	energy_lj,
		real	lambda_q,
		real	lambda_lj,
		real *	dvdlambda_q,
		real *	dvdlambda_lj,
		const gmx::StepWorkload &	stepWork
	)

Do a PME calculation on a CPU for the long range electrostatics and/or LJ.

Computes the PME forces and the energy and viral, when requested, for all atoms in coordinates. Forces, when requested, are added to the buffer forces, which is allowed to contain more elements than the number of elements in coordinates. The meaning of flags is defined above, and determines which parts of the calculation are performed.

Returns: 0 indicates all well, non zero is an error code.

gmx_pme_t* gmx_pme_init	(	const t_commrec *	cr,
		const NumPmeDomains &	numPmeDomains,
		const t_inputrec *	ir,
		gmx_bool	bFreeEnergy_q,
		gmx_bool	bFreeEnergy_lj,
		gmx_bool	bReproducible,
		real	ewaldcoeff_q,
		real	ewaldcoeff_lj,
		int	nthread,
		PmeRunMode	runMode,
		PmeGpu *	pmeGpu,
		const DeviceContext *	deviceContext,
		const DeviceStream *	deviceStream,
		const PmeGpuProgram *	pmeGpuProgram,
		const gmx::MDLogger &	mdlog
	)

Construct PME data.

Exceptions

gmx::InconsistentInputError if input grid sizes/PME order are inconsistent.

Returns: Pointer to newly allocated and initialized PME data.

Todo:: We should evolve something like a GpuManager that holds DeviceInformation* and PmeGpuProgram* and perhaps other related things whose lifetime can/should exceed that of a task (or perhaps task manager). See Issue #2522.

void gmx_pme_reinit_atoms	(	gmx_pme_t *	pme,
		int	numAtoms,
		const real *	chargesA,
		const real *	chargesB
	)

This function updates the local atom data on GPU after DD (charges, coordinates, etc.). TODO: it should update the PME CPU atom data as well. (currently PME CPU call gmx_pme_do() gets passed the input pointers for each computation).

Parameters

[in,out]	pme	The PME structure.
[in]	numAtoms	The number of particles.
[in]	chargesA	The pointer to the array of particle charges in the normal state or FEP state A. Can be nullptr if PME is not performed on the GPU.
[in]	chargesB	The pointer to the array of particle charges in state B. Only used if charges are perturbed and can otherwise be nullptr.

static bool pme_gpu_check_restrictions	(	const gmx_pme_t *	pme,
		std::string *	error
	)

static

Finds out if PME with given inputs is possible to run on GPU. This function is an internal final check, validating the whole PME structure on creation, but it still duplicates the preliminary checks from the above (externally exposed) pme_gpu_supports_input() - just in case.

Parameters

[in]	pme	The PME structure.
[out]	error	The error message if the input is not supported on GPU.

Returns: True if this PME input is possible to run on GPU, false otherwise.

bool pme_gpu_supports_build ( std::string * error )

Checks whether the GROMACS build allows to run PME on GPU. TODO: this partly duplicates an internal PME assert function pme_gpu_check_restrictions(), except that works with a formed gmx_pme_t structure. Should that one go away/work with inputrec?

Parameters

[out] error If non-null, the error message when PME is not supported on GPU.

Returns: true if PME can run on GPU on this build, false otherwise.

bool pme_gpu_supports_hardware	(	const gmx_hw_info_t &	hwinfo,
		std::string *	error
	)

Checks whether the detected (GPU) hardware allows to run PME on GPU.

Parameters

[in]	hwinfo	Information about the detected hardware
[out]	error	If non-null, the error message when PME is not supported on GPU.

Returns: true if PME can run on GPU on this build, false otherwise.

bool pme_gpu_supports_input	(	const t_inputrec &	ir,
		std::string *	error
	)

Checks whether the input system allows to run PME on GPU. TODO: this partly duplicates an internal PME assert function pme_gpu_check_restrictions(), except that works with a formed gmx_pme_t structure. Should that one go away/work with inputrec?

Parameters

[in]	ir	Input system.
[out]	error	If non-null, the error message if the input is not supported on GPU.

Returns: true if PME can run on GPU with this input, false otherwise.

PmeRunMode pme_run_mode ( const gmx_pme_t * pme )

Returns the active PME codepath (CPU, GPU, mixed).

Todo:: This is a rather static data that should be managed by the higher level task scheduler.

Parameters

[in] pme The PME data structure.

Returns: active PME codepath.

Variable Documentation

const int gmxCacheLineSize = 64

Number of bytes in a cache line.

Must also be a multiple of the SIMD and SIMD4 register size, to preserve alignment.

Description

Functions

Variables

Function Documentation

Variable Documentation