Gromacs
2026.0-dev-20241204-d69d709
|
#include "gromacs/fft/fft.h"
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/gpu_macros.h"
#include "pme_gpu_types_host.h"
#include "pme_output.h"
This file contains internal function definitions for performing the PME calculations on GPU. These are not meant to be exposed outside of the PME GPU code. As of now, their bodies are still in the common pme_gpu.cpp files.
Classes | |
class | gmx::ArrayRef< typename > |
STL-like interface to a C array of T (or part of a std container of T). More... | |
Macros | |
#define | FEP_STATE_A 0 |
Grid index of FEP state A (or unperturbed system) | |
#define | FEP_STATE_B 1 |
Grid index of FEP state B. | |
Typedefs | |
typedef struct gmx_parallel_3dfft * | gmx_parallel_3dfft_t |
Enumerations | |
enum | PmeSplineDataType { Values, Derivatives } |
Type of spline data. | |
enum | GridOrdering { YZX, XYZ, Count } |
PME grid dimension ordering (from major to minor) | |
Functions | |
int | pme_gpu_get_atom_data_block_size () |
Returns the size of the block size requirement. More... | |
int | pme_gpu_get_atoms_per_warp (const PmeGpu *pmeGpu) |
Return the number of atoms per warp. | |
void | pme_gpu_synchronize (const PmeGpu *pmeGpu) |
Synchronizes the current computation, waiting for the GPU kernels/transfers to finish. More... | |
void | pme_gpu_alloc_energy_virial (PmeGpu *pmeGpu) |
Allocates the fixed size energy and virial buffer both on GPU and CPU. More... | |
void | pme_gpu_free_energy_virial (PmeGpu *pmeGpu) |
Frees the energy and virial memory both on GPU and CPU. More... | |
void | pme_gpu_clear_energy_virial (const PmeGpu *pmeGpu, bool gpuGraphWithSeparatePmeRank) |
Clears the energy and virial memory on GPU with 0. Should be called at the end of PME computation which returned energy/virial. More... | |
void | pme_gpu_realloc_and_copy_bspline_values (PmeGpu *pmeGpu, int gridIndex=0) |
Reallocates and copies the pre-computed B-spline values to the GPU. More... | |
void | pme_gpu_free_bspline_values (PmeGpu *pmeGpu) |
Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers). More... | |
void | pme_gpu_realloc_forces (PmeGpu *pmeGpu) |
Reallocates the GPU buffer for the PME forces. More... | |
void | pme_gpu_free_forces (const PmeGpu *pmeGpu) |
Frees the GPU buffer for the PME forces. More... | |
void | pme_gpu_copy_input_forces (PmeGpu *pmeGpu) |
Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered forces). To be called e.g. after the bonded calculations. More... | |
void | pme_gpu_copy_output_forces (PmeGpu *pmeGpu) |
Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage. More... | |
bool | pme_gpu_stream_query (const PmeGpu *pmeGpu) |
Checks whether work in the PME GPU stream has completed. More... | |
void | pme_gpu_realloc_and_copy_input_coefficients (const PmeGpu *pmeGpu, const float *h_coefficients, int gridIndex=0) |
Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer. Clears the padded part if needed. More... | |
void | pme_gpu_free_coefficients (const PmeGpu *pmeGpu) |
Frees the charges/coefficients on the GPU. More... | |
void | pme_gpu_realloc_spline_data (PmeGpu *pmeGpu) |
Reallocates the buffers on the GPU and the host for the atoms spline data. More... | |
void | pme_gpu_free_spline_data (PmeGpu *pmeGpu) |
Frees the buffers on the GPU for the atoms spline data. More... | |
void | pme_gpu_realloc_grid_indices (PmeGpu *pmeGpu) |
Reallocates the buffers on the GPU and the host for the particle gridline indices. More... | |
void | pme_gpu_free_grid_indices (PmeGpu *pmeGpu) |
Frees the buffer on the GPU for the particle gridline indices. More... | |
void | pme_gpu_realloc_grids (PmeGpu *pmeGpu) |
Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU. More... | |
void | pme_gpu_free_grids (const PmeGpu *pmeGpu) |
Frees the real space grid and the complex reciprocal grid (if needed) on the GPU. More... | |
void | pme_gpu_reinit_haloexchange (PmeGpu *pmeGpu) |
Reinitialize PME halo exchange parameters and staging device buffers for MPI communication. More... | |
void | pme_gpu_free_haloexchange (const PmeGpu *pmeGpu) |
Frees device staging buffers used for PME halo exchange. More... | |
void | pme_gpu_clear_grids (const PmeGpu *pmeGpu) |
Clears the real space grid on the GPU. Should be called at the end of each computation. More... | |
void | pme_gpu_realloc_and_copy_fract_shifts (PmeGpu *pmeGpu) |
Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU. More... | |
void | pme_gpu_free_fract_shifts (const PmeGpu *pmeGpu) |
Frees the pre-computed fractional coordinates' shifts on the GPU. More... | |
void | pme_gpu_copy_input_gather_grid (const PmeGpu *pmeGpu, const float *h_grid, int gridIndex=0) |
Copies the input real-space grid from the host to the GPU. More... | |
void | pme_gpu_copy_output_spread_grid (const PmeGpu *pmeGpu, float *h_grid, int gridIndex=0) |
Copies the output real-space grid from the GPU to the host. More... | |
void | pme_gpu_copy_output_spread_atom_data (PmeGpu *pmeGpu) |
Copies the spread output spline data and gridline indices from the GPU to the host. More... | |
void | pme_gpu_copy_input_gather_atom_data (const PmeGpu *pmeGpu) |
Copies the gather input spline data and gridline indices from the host to the GPU. More... | |
void | pme_gpu_sync_spread_grid (const PmeGpu *pmeGpu) |
Waits for the grid copying to the host-side buffer after spreading to finish. More... | |
void | pme_gpu_reinit_3dfft (const PmeGpu *pmeGpu) |
Initializes the CUDA FFT structures. More... | |
void | pme_gpu_destroy_3dfft (const PmeGpu *pmeGpu) |
Destroys the CUDA FFT structures. More... | |
void | pme_gpu_spread (PmeGpu *pmeGpu, GpuEventSynchronizer *xReadyOnDevice, gmx::ArrayRef< PmeAndFftGrids > h_grids, bool computeSplines, bool spreadCharges, real lambda, bool useGpuDirectComm, gmx::PmeCoordinateReceiverGpu *pmeCoordinateReceiverGpu, bool useMdGpuGraph, gmx_wallcycle *wcycle) |
A GPU spline computation and charge spreading function. More... | |
void | pme_gpu_3dfft (const PmeGpu *pmeGpu, enum gmx_fft_direction direction, int gridIndex=0) |
3D FFT R2C/C2R routine. More... | |
void | pme_gpu_solve (PmeGpu *pmeGpu, int gridIndex, t_complex *h_grid, GridOrdering gridOrdering, bool computeEnergyAndVirial) |
A GPU Fourier space solving function. More... | |
void | pme_gpu_gather (PmeGpu *pmeGpu, gmx::ArrayRef< PmeAndFftGrids > h_grids, float lambda, gmx_wallcycle *wcycle, bool computeVirial) |
A GPU force gathering function. More... | |
void | pme_gpu_set_kernelparam_coordinates (const PmeGpu *pmeGpu, DeviceBuffer< gmx::RVec > d_x) |
Sets the device pointer to coordinate data. More... | |
DeviceBuffer< gmx::RVec > | pme_gpu_get_kernelparam_forces (const PmeGpu *pmeGpu) |
Return pointer to device copy of force data. More... | |
void | pme_gpu_set_kernelparam_useNvshmem (const PmeGpu *pmeGpu, bool useNvshmem) |
GpuEventSynchronizer * | pme_gpu_get_forces_ready_synchronizer (const PmeGpu *pmeGpu) |
Return pointer to the sync object triggered after the PME force calculation completion. More... | |
const PmeGpuSettings & | pme_gpu_settings (const PmeGpu *pmeGpu) |
Returns the PME GPU settings. More... | |
PmeGpuStaging & | pme_gpu_staging (PmeGpu *pmeGpu) |
Returns the PME GPU staging object. More... | |
void | pme_gpu_set_testing (PmeGpu *pmeGpu, bool testing) |
Sets whether the PME module is running in testing mode. More... | |
void | pme_gpu_getEnergyAndVirial (const gmx_pme_t &pme, float lambda, PmeOutput *output) |
Returns the energy and virial GPU outputs, useful for testing. More... | |
PmeOutput | pme_gpu_getOutput (gmx_pme_t *pme, bool computeEnergyAndVirial, real lambdaQ) |
Returns the GPU outputs (forces, energy and virial) More... | |
void | pme_gpu_update_input_box (PmeGpu *pmeGpu, const matrix box) |
Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation(). More... | |
void | pme_gpu_get_real_grid_sizes (const PmeGpu *pmeGpu, gmx::IVec *gridSize, gmx::IVec *paddedGridSize) |
Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests. More... | |
void | pme_gpu_reinit (gmx_pme_t *pme, const DeviceContext *deviceContext, const DeviceStream *deviceStream, const PmeGpuProgram *pmeGpuProgram, bool useMdGpuGraph) |
(Re-)initializes the PME GPU data at the beginning of the run or on DLB. More... | |
void | pme_gpu_destroy (PmeGpu *pmeGpu) |
Destroys the PME GPU data at the end of the run. More... | |
void | pme_gpu_reinit_atoms (PmeGpu *pmeGpu, int nAtoms, const real *chargesA, const real *chargesB=nullptr) |
Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU. More... | |
void | pme_gpu_reinit_computation (const PmeGpu *pmeGpu) |
The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing. More... | |
PmeOutput | pme_gpu_wait_finish_task (gmx_pme_t *pme, bool computeEnergyAndVirial, real lambdaQ, gmx_wallcycle *wcycle) |
Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy (if they were to be computed). More... | |
void pme_gpu_3dfft | ( | const PmeGpu * | pmeGpu, |
enum gmx_fft_direction | direction, | ||
int | gridIndex = 0 |
||
) |
3D FFT R2C/C2R routine.
[in] | pmeGpu | The PME GPU structure. |
[in] | direction | Transform direction (real-to-complex or complex-to-real) |
[in] | gridIndex | The index of the grid to use. 0 is Coulomb in the normal state or FEP state A and 1 is Coulomb in FEP state B. |
void pme_gpu_alloc_energy_virial | ( | PmeGpu * | pmeGpu | ) |
Allocates the fixed size energy and virial buffer both on GPU and CPU.
[in,out] | pmeGpu | The PME GPU structure. |
void pme_gpu_clear_energy_virial | ( | const PmeGpu * | pmeGpu, |
bool | gpuGraphWithSeparatePmeRank | ||
) |
Clears the energy and virial memory on GPU with 0. Should be called at the end of PME computation which returned energy/virial.
[in] | pmeGpu | The PME GPU structure. |
[in] | gpuGraphWithSeparatePmeRank | Whether MD GPU Graph with separate PME rank is in use. |
void pme_gpu_clear_grids | ( | const PmeGpu * | pmeGpu | ) |
Clears the real space grid on the GPU. Should be called at the end of each computation.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_copy_input_forces | ( | PmeGpu * | pmeGpu | ) |
Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered forces). To be called e.g. after the bonded calculations.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_copy_input_gather_atom_data | ( | const PmeGpu * | pmeGpu | ) |
Copies the gather input spline data and gridline indices from the host to the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_copy_input_gather_grid | ( | const PmeGpu * | pmeGpu, |
const float * | h_grid, | ||
int | gridIndex = 0 |
||
) |
Copies the input real-space grid from the host to the GPU.
[in] | pmeGpu | The PME GPU structure. |
[in] | h_grid | The host-side grid buffer. |
[in] | gridIndex | The index of the grid to use. 0 is Coulomb in the normal state or FEP state A and 1 is Coulomb in FEP state B. |
void pme_gpu_copy_output_forces | ( | PmeGpu * | pmeGpu | ) |
Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_copy_output_spread_atom_data | ( | PmeGpu * | pmeGpu | ) |
Copies the spread output spline data and gridline indices from the GPU to the host.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_copy_output_spread_grid | ( | const PmeGpu * | pmeGpu, |
float * | h_grid, | ||
int | gridIndex = 0 |
||
) |
Copies the output real-space grid from the GPU to the host.
[in] | pmeGpu | The PME GPU structure. |
[out] | h_grid | The host-side grid buffer. |
[in] | gridIndex | The index of the grid to use. 0 is Coulomb in the normal state or FEP state A and 1 is Coulomb in FEP state B. |
void pme_gpu_destroy | ( | PmeGpu * | pmeGpu | ) |
Destroys the PME GPU data at the end of the run.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_destroy_3dfft | ( | const PmeGpu * | pmeGpu | ) |
Destroys the CUDA FFT structures.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_bspline_values | ( | PmeGpu * | pmeGpu | ) |
Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_coefficients | ( | const PmeGpu * | pmeGpu | ) |
Frees the charges/coefficients on the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_energy_virial | ( | PmeGpu * | pmeGpu | ) |
Frees the energy and virial memory both on GPU and CPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_forces | ( | const PmeGpu * | pmeGpu | ) |
Frees the GPU buffer for the PME forces.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_fract_shifts | ( | const PmeGpu * | pmeGpu | ) |
Frees the pre-computed fractional coordinates' shifts on the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_grid_indices | ( | PmeGpu * | pmeGpu | ) |
Frees the buffer on the GPU for the particle gridline indices.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_grids | ( | const PmeGpu * | pmeGpu | ) |
Frees the real space grid and the complex reciprocal grid (if needed) on the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_haloexchange | ( | const PmeGpu * | pmeGpu | ) |
Frees device staging buffers used for PME halo exchange.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_spline_data | ( | PmeGpu * | pmeGpu | ) |
Frees the buffers on the GPU for the atoms spline data.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_gather | ( | PmeGpu * | pmeGpu, |
gmx::ArrayRef< PmeAndFftGrids > | h_grids, | ||
float | lambda, | ||
gmx_wallcycle * | wcycle, | ||
bool | computeVirial | ||
) |
A GPU force gathering function.
[in] | pmeGpu | The PME GPU structure. |
[in] | h_grids | The host-side grid buffers and FFT setup (used only in testing mode). |
[in] | lambda | The lambda value to use. |
[in] | wcycle | The wallclock counter. |
[in] | computeVirial | Whether this is a virial step. |
int pme_gpu_get_atom_data_block_size | ( | ) |
Returns the size of the block size requirement.
The GPU version of PME requires that the coordinates array have a size divisible by the returned number.
GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer | ( | const PmeGpu * | pmeGpu | ) |
Return pointer to the sync object triggered after the PME force calculation completion.
[in] | pmeGpu | The PME GPU structure. |
DeviceBuffer<gmx::RVec> pme_gpu_get_kernelparam_forces | ( | const PmeGpu * | pmeGpu | ) |
Return pointer to device copy of force data.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_get_real_grid_sizes | ( | const PmeGpu * | pmeGpu, |
gmx::IVec * | gridSize, | ||
gmx::IVec * | paddedGridSize | ||
) |
Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests.
[in] | pmeGpu | The PME GPU structure. |
[out] | gridSize | Pointer to the grid dimensions to fill in. |
[out] | paddedGridSize | Pointer to the padded grid dimensions to fill in. |
void pme_gpu_getEnergyAndVirial | ( | const gmx_pme_t & | pme, |
float | lambda, | ||
PmeOutput * | output | ||
) |
Returns the energy and virial GPU outputs, useful for testing.
It is the caller's responsibility to be aware of whether the GPU handled the solve stage.
[in] | pme | The PME structure. |
[in] | lambda | The lambda value to use when calculating the results. |
[out] | output | Pointer to output where energy and virial should be stored. |
PmeOutput pme_gpu_getOutput | ( | gmx_pme_t * | pme, |
bool | computeEnergyAndVirial, | ||
real | lambdaQ | ||
) |
Returns the GPU outputs (forces, energy and virial)
[in] | pme | The PME structure. |
[in] | computeEnergyAndVirial | Whether the energy and virial are being computed |
[in] | lambdaQ | The Coulomb lambda to use when finalizing the output. |
void pme_gpu_realloc_and_copy_bspline_values | ( | PmeGpu * | pmeGpu, |
int | gridIndex = 0 |
||
) |
Reallocates and copies the pre-computed B-spline values to the GPU.
[in,out] | pmeGpu | The PME GPU structure. |
[in] | gridIndex | The index of the grid to use. 0 is Coulomb in the normal state or FEP state A and 1 is Coulomb in FEP state B. |
void pme_gpu_realloc_and_copy_fract_shifts | ( | PmeGpu * | pmeGpu | ) |
Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_and_copy_input_coefficients | ( | const PmeGpu * | pmeGpu, |
const float * | h_coefficients, | ||
int | gridIndex = 0 |
||
) |
Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer. Clears the padded part if needed.
[in] | pmeGpu | The PME GPU structure. |
[in] | h_coefficients | The input atom charges/coefficients. |
[in] | gridIndex | The index of the grid to use. 0 is Coulomb in the normal state or FEP state A and 1 is Coulomb in FEP state B. |
Does not need to be done for every PME computation, only whenever the local charges change. (So, in the beginning of the run, or on DD step).
void pme_gpu_realloc_forces | ( | PmeGpu * | pmeGpu | ) |
Reallocates the GPU buffer for the PME forces.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_grid_indices | ( | PmeGpu * | pmeGpu | ) |
Reallocates the buffers on the GPU and the host for the particle gridline indices.
[in,out] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_grids | ( | PmeGpu * | pmeGpu | ) |
Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_spline_data | ( | PmeGpu * | pmeGpu | ) |
Reallocates the buffers on the GPU and the host for the atoms spline data.
[in,out] | pmeGpu | The PME GPU structure. |
void pme_gpu_reinit | ( | gmx_pme_t * | pme, |
const DeviceContext * | deviceContext, | ||
const DeviceStream * | deviceStream, | ||
const PmeGpuProgram * | pmeGpuProgram, | ||
bool | useMdGpuGraph | ||
) |
(Re-)initializes the PME GPU data at the beginning of the run or on DLB.
[in,out] | pme | The PME structure. |
[in] | deviceContext | The GPU context. |
[in] | deviceStream | The GPU stream. |
[in,out] | pmeGpuProgram | The handle to the program/kernel data created outside (e.g. in unit tests/runner) |
[in] | useMdGpuGraph | Whether MD GPU Graph is in use |
gmx::NotImplementedError | if this generally valid PME structure is not valid for GPU runs. |
void pme_gpu_reinit_3dfft | ( | const PmeGpu * | pmeGpu | ) |
Initializes the CUDA FFT structures.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_reinit_atoms | ( | PmeGpu * | pmeGpu, |
int | nAtoms, | ||
const real * | chargesA, | ||
const real * | chargesB = nullptr |
||
) |
Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
[in] | pmeGpu | The PME GPU structure. |
[in] | nAtoms | The number of particles. |
[in] | chargesA | The pointer to the host-side array of particle charges in the unperturbed state or FEP state A. |
[in] | chargesB | The pointer to the host-side array of particle charges in FEP state B. |
This is a function that should only be called in the beginning of the run and on domain decomposition. Should be called before the pme_gpu_set_io_ranges.
void pme_gpu_reinit_computation | ( | const PmeGpu * | pmeGpu | ) |
The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
This clears the device-side working buffers in preparation for new computation.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_reinit_haloexchange | ( | PmeGpu * | pmeGpu | ) |
Reinitialize PME halo exchange parameters and staging device buffers for MPI communication.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_set_kernelparam_coordinates | ( | const PmeGpu * | pmeGpu, |
DeviceBuffer< gmx::RVec > | d_x | ||
) |
Sets the device pointer to coordinate data.
[in] | pmeGpu | The PME GPU structure. |
[in] | d_x | Pointer to coordinate data |
|
inline |
Sets whether the PME module is running in testing mode.
[in] | pmeGpu | The PME GPU structure. |
[in] | testing | Whether testing mode is on. |
|
inline |
Returns the PME GPU settings.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_solve | ( | PmeGpu * | pmeGpu, |
int | gridIndex, | ||
t_complex * | h_grid, | ||
GridOrdering | gridOrdering, | ||
bool | computeEnergyAndVirial | ||
) |
A GPU Fourier space solving function.
[in] | pmeGpu | The PME GPU structure. |
[in] | gridIndex | The index of the grid to use. 0 is Coulomb in the normal state or FEP state A and 1 is Coulomb in FEP state B. |
[in,out] | h_grid | The host-side input and output Fourier grid buffer (used only with testing or host-side FFT) |
[in] | gridOrdering | Specifies the dimenion ordering of the complex grid. TODO: store this information? |
[in] | computeEnergyAndVirial | Tells if the energy and virial computation should be performed. |
void pme_gpu_spread | ( | PmeGpu * | pmeGpu, |
GpuEventSynchronizer * | xReadyOnDevice, | ||
gmx::ArrayRef< PmeAndFftGrids > | h_grids, | ||
bool | computeSplines, | ||
bool | spreadCharges, | ||
real | lambda, | ||
bool | useGpuDirectComm, | ||
gmx::PmeCoordinateReceiverGpu * | pmeCoordinateReceiverGpu, | ||
bool | useMdGpuGraph, | ||
gmx_wallcycle * | wcycle | ||
) |
A GPU spline computation and charge spreading function.
[in] | pmeGpu | The PME GPU structure. |
[in] | xReadyOnDevice | Event synchronizer indicating that the coordinates are ready in the device memory; can be nullptr when invoked on a separate PME rank or from PME tests. |
[out] | h_grids | The host-side grid buffers and FFT setup (used only if the result of the spread is expected on the host, e.g. testing or host-side FFT) |
[in] | computeSplines | Should the computation of spline parameters and gridline indices be performed. |
[in] | spreadCharges | Should the charges/coefficients be spread on the grid. |
[in] | lambda | The lambda value of the current system state. |
[in] | useGpuDirectComm | Whether direct GPU PME-PP communication is active |
[in] | pmeCoordinateReceiverGpu | Coordinate receiver object, which must be valid when direct GPU PME-PP communication is active |
[in] | useMdGpuGraph | Whether MD GPU Graph is in use. |
[in] | wcycle | The wallclock counter. |
|
inline |
Returns the PME GPU staging object.
[in] | pmeGpu | The PME GPU structure. |
bool pme_gpu_stream_query | ( | const PmeGpu * | pmeGpu | ) |
Checks whether work in the PME GPU stream has completed.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_sync_spread_grid | ( | const PmeGpu * | pmeGpu | ) |
Waits for the grid copying to the host-side buffer after spreading to finish.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_synchronize | ( | const PmeGpu * | pmeGpu | ) |
Synchronizes the current computation, waiting for the GPU kernels/transfers to finish.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_update_input_box | ( | PmeGpu * | pmeGpu, |
const matrix | box | ||
) |
Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
[in] | pmeGpu | The PME GPU structure. |
[in] | box | The unit cell box. |
PmeOutput pme_gpu_wait_finish_task | ( | gmx_pme_t * | pme, |
bool | computeEnergyAndVirial, | ||
real | lambdaQ, | ||
gmx_wallcycle * | wcycle | ||
) |
Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy (if they were to be computed).
[in] | pme | The PME data structure. |
[in] | computeEnergyAndVirial | Tells if the energy and virial computation should be performed. |
[in] | lambdaQ | The Coulomb lambda to use when calculating the results. |
[out] | wcycle | The wallclock counter. |