Gromacs
2026.0-dev-20241204-d69d709
|
#include "gmxpre.h"
#include "config.h"
#include "gromacs/utility/gmxassert.h"
#include "pme_gpu_internal.h"
This file contains internal implementation stubs for performing the PME calculations on GPU.
Functions | |
int | pme_gpu_get_atoms_per_warp (const PmeGpu *) |
Return the number of atoms per warp. | |
void | pme_gpu_synchronize (const PmeGpu *) |
Synchronizes the current computation, waiting for the GPU kernels/transfers to finish. More... | |
void | pme_gpu_spread (PmeGpu *, GpuEventSynchronizer *, gmx::ArrayRef< PmeAndFftGrids >, bool, bool, real, bool, gmx::PmeCoordinateReceiverGpu *, bool, gmx_wallcycle *) |
A GPU spline computation and charge spreading function. More... | |
void | pme_gpu_solve (PmeGpu *, int, t_complex *, GridOrdering, bool) |
A GPU Fourier space solving function. More... | |
void | pme_gpu_gather (PmeGpu *, gmx::ArrayRef< PmeAndFftGrids >, float, gmx_wallcycle *, bool) |
A GPU force gathering function. More... | |
void | pme_gpu_set_kernelparam_coordinates (const PmeGpu *, DeviceBuffer< gmx::RVec >) |
Sets the device pointer to coordinate data. More... | |
DeviceBuffer< gmx::RVec > | pme_gpu_get_kernelparam_forces (const PmeGpu *) |
Return pointer to device copy of force data. More... | |
void | pme_gpu_set_kernelparam_useNvshmem (const PmeGpu *, bool) |
GpuEventSynchronizer * | pme_gpu_get_forces_ready_synchronizer (const PmeGpu *) |
Return pointer to the sync object triggered after the PME force calculation completion. More... | |
void | pme_gpu_getEnergyAndVirial (const gmx_pme_t &, float, PmeOutput *) |
Returns the energy and virial GPU outputs, useful for testing. More... | |
PmeOutput | pme_gpu_getOutput (gmx_pme_t *, bool, real) |
Returns the GPU outputs (forces, energy and virial) More... | |
void | pme_gpu_update_input_box (PmeGpu *, const matrix) |
Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation(). More... | |
void | pme_gpu_get_real_grid_sizes (const PmeGpu *, gmx::IVec *, gmx::IVec *) |
Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests. More... | |
void | pme_gpu_reinit (gmx_pme_t *, const DeviceContext *, const DeviceStream *, const PmeGpuProgram *, bool) |
(Re-)initializes the PME GPU data at the beginning of the run or on DLB. More... | |
void | pme_gpu_destroy (PmeGpu *) |
Destroys the PME GPU data at the end of the run. More... | |
void | pme_gpu_reinit_atoms (PmeGpu *, int, const real *, const real *) |
Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU. More... | |
void | pme_gpu_sync_spread_grid (const PmeGpu *) |
Waits for the grid copying to the host-side buffer after spreading to finish. More... | |
void | pme_gpu_3dfft (const PmeGpu *, enum gmx_fft_direction, int) |
3D FFT R2C/C2R routine. More... | |
void | pme_gpu_clear_grids (const PmeGpu *) |
Clears the real space grid on the GPU. Should be called at the end of each computation. More... | |
void | pme_gpu_clear_energy_virial (const PmeGpu *, bool) |
Clears the energy and virial memory on GPU with 0. Should be called at the end of PME computation which returned energy/virial. More... | |
int | pme_gpu_get_atom_data_block_size () |
Returns the size of the block size requirement. More... | |
void | pme_gpu_update_timings (const PmeGpu *) |
Finalizes all the active PME GPU stage timings for the current computation. Should be called at the end of every computation. More... | |
void | pme_gpu_reinit_timings (const PmeGpu *) |
Updates the internal list of active PME GPU stages (if timings are enabled). More... | |
void | pme_gpu_reset_timings (const PmeGpu *) |
Resets the PME GPU timings. To be called at the reset MD step. More... | |
void | pme_gpu_get_timings (const PmeGpu *, gmx_wallclock_gpu_pme_t *) |
Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end. More... | |
bool | pme_gpu_stream_query (const PmeGpu *) |
Checks whether work in the PME GPU stream has completed. More... | |
void pme_gpu_3dfft | ( | const PmeGpu * | pmeGpu, |
enum gmx_fft_direction | direction, | ||
int | gridIndex = 0 |
||
) |
3D FFT R2C/C2R routine.
[in] | pmeGpu | The PME GPU structure. |
[in] | direction | Transform direction (real-to-complex or complex-to-real) |
[in] | gridIndex | The index of the grid to use. 0 is Coulomb in the normal state or FEP state A and 1 is Coulomb in FEP state B. |
void pme_gpu_clear_energy_virial | ( | const PmeGpu * | pmeGpu, |
bool | gpuGraphWithSeparatePmeRank | ||
) |
Clears the energy and virial memory on GPU with 0. Should be called at the end of PME computation which returned energy/virial.
[in] | pmeGpu | The PME GPU structure. |
[in] | gpuGraphWithSeparatePmeRank | Whether MD GPU Graph with separate PME rank is in use. |
void pme_gpu_clear_grids | ( | const PmeGpu * | pmeGpu | ) |
Clears the real space grid on the GPU. Should be called at the end of each computation.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_destroy | ( | PmeGpu * | pmeGpu | ) |
Destroys the PME GPU data at the end of the run.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_gather | ( | PmeGpu * | pmeGpu, |
gmx::ArrayRef< PmeAndFftGrids > | h_grids, | ||
float | lambda, | ||
gmx_wallcycle * | wcycle, | ||
bool | computeVirial | ||
) |
A GPU force gathering function.
[in] | pmeGpu | The PME GPU structure. |
[in] | h_grids | The host-side grid buffers and FFT setup (used only in testing mode). |
[in] | lambda | The lambda value to use. |
[in] | wcycle | The wallclock counter. |
[in] | computeVirial | Whether this is a virial step. |
int pme_gpu_get_atom_data_block_size | ( | ) |
Returns the size of the block size requirement.
The GPU version of PME requires that the coordinates array have a size divisible by the returned number.
GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer | ( | const PmeGpu * | pmeGpu | ) |
Return pointer to the sync object triggered after the PME force calculation completion.
[in] | pmeGpu | The PME GPU structure. |
DeviceBuffer<gmx::RVec> pme_gpu_get_kernelparam_forces | ( | const PmeGpu * | pmeGpu | ) |
Return pointer to device copy of force data.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_get_real_grid_sizes | ( | const PmeGpu * | pmeGpu, |
gmx::IVec * | gridSize, | ||
gmx::IVec * | paddedGridSize | ||
) |
Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests.
[in] | pmeGpu | The PME GPU structure. |
[out] | gridSize | Pointer to the grid dimensions to fill in. |
[out] | paddedGridSize | Pointer to the padded grid dimensions to fill in. |
void pme_gpu_get_timings | ( | const PmeGpu * | pmeGpu, |
gmx_wallclock_gpu_pme_t * | timings | ||
) |
Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end.
[in] | pmeGpu | The PME GPU structure. |
[in] | timings | The gmx_wallclock_gpu_pme_t structure. |
void pme_gpu_getEnergyAndVirial | ( | const gmx_pme_t & | pme, |
float | lambda, | ||
PmeOutput * | output | ||
) |
Returns the energy and virial GPU outputs, useful for testing.
It is the caller's responsibility to be aware of whether the GPU handled the solve stage.
[in] | pme | The PME structure. |
[in] | lambda | The lambda value to use when calculating the results. |
[out] | output | Pointer to output where energy and virial should be stored. |
PmeOutput pme_gpu_getOutput | ( | gmx_pme_t * | pme, |
bool | computeEnergyAndVirial, | ||
real | lambdaQ | ||
) |
Returns the GPU outputs (forces, energy and virial)
[in] | pme | The PME structure. |
[in] | computeEnergyAndVirial | Whether the energy and virial are being computed |
[in] | lambdaQ | The Coulomb lambda to use when finalizing the output. |
void pme_gpu_reinit | ( | gmx_pme_t * | pme, |
const DeviceContext * | deviceContext, | ||
const DeviceStream * | deviceStream, | ||
const PmeGpuProgram * | pmeGpuProgram, | ||
bool | useMdGpuGraph | ||
) |
(Re-)initializes the PME GPU data at the beginning of the run or on DLB.
[in,out] | pme | The PME structure. |
[in] | deviceContext | The GPU context. |
[in] | deviceStream | The GPU stream. |
[in,out] | pmeGpuProgram | The handle to the program/kernel data created outside (e.g. in unit tests/runner) |
[in] | useMdGpuGraph | Whether MD GPU Graph is in use |
gmx::NotImplementedError | if this generally valid PME structure is not valid for GPU runs. |
void pme_gpu_reinit_atoms | ( | PmeGpu * | pmeGpu, |
int | nAtoms, | ||
const real * | chargesA, | ||
const real * | chargesB = nullptr |
||
) |
Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
[in] | pmeGpu | The PME GPU structure. |
[in] | nAtoms | The number of particles. |
[in] | chargesA | The pointer to the host-side array of particle charges in the unperturbed state or FEP state A. |
[in] | chargesB | The pointer to the host-side array of particle charges in FEP state B. |
This is a function that should only be called in the beginning of the run and on domain decomposition. Should be called before the pme_gpu_set_io_ranges.
void pme_gpu_reinit_timings | ( | const PmeGpu * | pmeGpu | ) |
Updates the internal list of active PME GPU stages (if timings are enabled).
[in] | pmeGpu | The PME GPU data structure. |
void pme_gpu_reset_timings | ( | const PmeGpu * | pmeGpu | ) |
Resets the PME GPU timings. To be called at the reset MD step.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_set_kernelparam_coordinates | ( | const PmeGpu * | pmeGpu, |
DeviceBuffer< gmx::RVec > | d_x | ||
) |
Sets the device pointer to coordinate data.
[in] | pmeGpu | The PME GPU structure. |
[in] | d_x | Pointer to coordinate data |
void pme_gpu_solve | ( | PmeGpu * | pmeGpu, |
int | gridIndex, | ||
t_complex * | h_grid, | ||
GridOrdering | gridOrdering, | ||
bool | computeEnergyAndVirial | ||
) |
A GPU Fourier space solving function.
[in] | pmeGpu | The PME GPU structure. |
[in] | gridIndex | The index of the grid to use. 0 is Coulomb in the normal state or FEP state A and 1 is Coulomb in FEP state B. |
[in,out] | h_grid | The host-side input and output Fourier grid buffer (used only with testing or host-side FFT) |
[in] | gridOrdering | Specifies the dimenion ordering of the complex grid. TODO: store this information? |
[in] | computeEnergyAndVirial | Tells if the energy and virial computation should be performed. |
void pme_gpu_spread | ( | PmeGpu * | pmeGpu, |
GpuEventSynchronizer * | xReadyOnDevice, | ||
gmx::ArrayRef< PmeAndFftGrids > | h_grids, | ||
bool | computeSplines, | ||
bool | spreadCharges, | ||
real | lambda, | ||
bool | useGpuDirectComm, | ||
gmx::PmeCoordinateReceiverGpu * | pmeCoordinateReceiverGpu, | ||
bool | useMdGpuGraph, | ||
gmx_wallcycle * | wcycle | ||
) |
A GPU spline computation and charge spreading function.
[in] | pmeGpu | The PME GPU structure. |
[in] | xReadyOnDevice | Event synchronizer indicating that the coordinates are ready in the device memory; can be nullptr when invoked on a separate PME rank or from PME tests. |
[out] | h_grids | The host-side grid buffers and FFT setup (used only if the result of the spread is expected on the host, e.g. testing or host-side FFT) |
[in] | computeSplines | Should the computation of spline parameters and gridline indices be performed. |
[in] | spreadCharges | Should the charges/coefficients be spread on the grid. |
[in] | lambda | The lambda value of the current system state. |
[in] | useGpuDirectComm | Whether direct GPU PME-PP communication is active |
[in] | pmeCoordinateReceiverGpu | Coordinate receiver object, which must be valid when direct GPU PME-PP communication is active |
[in] | useMdGpuGraph | Whether MD GPU Graph is in use. |
[in] | wcycle | The wallclock counter. |
bool pme_gpu_stream_query | ( | const PmeGpu * | pmeGpu | ) |
Checks whether work in the PME GPU stream has completed.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_sync_spread_grid | ( | const PmeGpu * | pmeGpu | ) |
Waits for the grid copying to the host-side buffer after spreading to finish.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_synchronize | ( | const PmeGpu * | pmeGpu | ) |
Synchronizes the current computation, waiting for the GPU kernels/transfers to finish.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_update_input_box | ( | PmeGpu * | pmeGpu, |
const matrix | box | ||
) |
Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
[in] | pmeGpu | The PME GPU structure. |
[in] | box | The unit cell box. |
void pme_gpu_update_timings | ( | const PmeGpu * | pmeGpu | ) |
Finalizes all the active PME GPU stage timings for the current computation. Should be called at the end of every computation.
[in] | pmeGpu | The PME GPU structure. |