Gromacs
2019-beta2
|
#include "gmxpre.h"
#include "pme-gpu-internal.h"
#include "config.h"
#include <list>
#include <string>
#include "gromacs/compat/make_unique.h"
#include "gromacs/ewald/ewald-utils.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/math/invertmatrix.h"
#include "gromacs/math/units.h"
#include "gromacs/timing/gpu_timing.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/logger.h"
#include "gromacs/utility/stringutil.h"
#include "gromacs/gpu_utils/pmalloc_cuda.h"
#include "pme.cuh"
#include "gromacs/ewald/pme.h"
#include "pme-gpu-3dfft.h"
#include "pme-gpu-program-impl.h"
#include "pme-gpu-timings.h"
#include "pme-gpu-types-host.h"
#include "pme-gpu-types-host-impl.h"
#include "pme-gpu-utils.h"
#include "pme-grid.h"
#include "pme-internal.h"
This file contains internal function implementations for performing the PME calculations on GPU.
Note that this file is compiled as regular C++ source in OpenCL builds, but it is treated as CUDA source in CUDA-enabled GPU builds.
Functions | |
static PmeGpuKernelParamsBase * | pme_gpu_get_kernel_params_base_ptr (const PmeGpu *pmeGpu) |
Wrapper for getting a pointer to the plain C++ part of the GPU kernel parameters structure. More... | |
int | pme_gpu_get_atom_data_alignment (const PmeGpu *) |
Returns the number of atoms per chunk in the atom charges/coordinates data layout. Depends on CUDA-specific block sizes, needed for the atom data padding. More... | |
int | pme_gpu_get_atoms_per_warp (const PmeGpu *pmeGpu) |
Returns the number of atoms per chunk in the atom spline theta/dtheta data layout. More... | |
void | pme_gpu_synchronize (const PmeGpu *pmeGpu) |
Synchronizes the current computation, waiting for the GPU kernels/transfers to finish. More... | |
void | pme_gpu_alloc_energy_virial (PmeGpu *pmeGpu) |
Allocates the fixed size energy and virial buffer both on GPU and CPU. More... | |
void | pme_gpu_free_energy_virial (PmeGpu *pmeGpu) |
Frees the energy and virial memory both on GPU and CPU. More... | |
void | pme_gpu_clear_energy_virial (const PmeGpu *pmeGpu) |
Clears the energy and virial memory on GPU with 0. Should be called at the end of PME computation which returned energy/virial. More... | |
void | pme_gpu_realloc_and_copy_bspline_values (PmeGpu *pmeGpu) |
Reallocates and copies the pre-computed B-spline values to the GPU. More... | |
void | pme_gpu_free_bspline_values (const PmeGpu *pmeGpu) |
Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers). More... | |
void | pme_gpu_realloc_forces (PmeGpu *pmeGpu) |
Reallocates the GPU buffer for the PME forces. More... | |
void | pme_gpu_free_forces (const PmeGpu *pmeGpu) |
Frees the GPU buffer for the PME forces. More... | |
void | pme_gpu_copy_input_forces (PmeGpu *pmeGpu) |
Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered forces). To be called e.g. after the bonded calculations. More... | |
void | pme_gpu_copy_output_forces (PmeGpu *pmeGpu) |
Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage. More... | |
void | pme_gpu_realloc_coordinates (const PmeGpu *pmeGpu) |
Reallocates the input coordinates buffer on the GPU (and clears the padded part if needed). More... | |
void | pme_gpu_copy_input_coordinates (const PmeGpu *pmeGpu, const rvec *h_coordinates) |
Copies the input coordinates from the CPU buffer onto the GPU. More... | |
void | pme_gpu_free_coordinates (const PmeGpu *pmeGpu) |
Frees the coordinates on the GPU. More... | |
void | pme_gpu_realloc_and_copy_input_coefficients (const PmeGpu *pmeGpu, const float *h_coefficients) |
Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer. Clears the padded part if needed. More... | |
void | pme_gpu_free_coefficients (const PmeGpu *pmeGpu) |
Frees the charges/coefficients on the GPU. More... | |
void | pme_gpu_realloc_spline_data (PmeGpu *pmeGpu) |
Reallocates the buffers on the GPU and the host for the atoms spline data. More... | |
void | pme_gpu_free_spline_data (const PmeGpu *pmeGpu) |
Frees the buffers on the GPU for the atoms spline data. More... | |
void | pme_gpu_realloc_grid_indices (PmeGpu *pmeGpu) |
Reallocates the buffers on the GPU and the host for the particle gridline indices. More... | |
void | pme_gpu_free_grid_indices (const PmeGpu *pmeGpu) |
Frees the buffer on the GPU for the particle gridline indices. More... | |
void | pme_gpu_realloc_grids (PmeGpu *pmeGpu) |
Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU. More... | |
void | pme_gpu_free_grids (const PmeGpu *pmeGpu) |
Frees the real space grid and the complex reciprocal grid (if needed) on the GPU. More... | |
void | pme_gpu_clear_grids (const PmeGpu *pmeGpu) |
Clears the real space grid on the GPU. Should be called at the end of each computation. More... | |
void | pme_gpu_realloc_and_copy_fract_shifts (PmeGpu *pmeGpu) |
Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU. More... | |
void | pme_gpu_free_fract_shifts (const PmeGpu *pmeGpu) |
Frees the pre-computed fractional coordinates' shifts on the GPU. More... | |
bool | pme_gpu_stream_query (const PmeGpu *pmeGpu) |
Checks whether work in the PME GPU stream has completed. More... | |
void | pme_gpu_copy_input_gather_grid (const PmeGpu *pmeGpu, float *h_grid) |
Copies the input real-space grid from the host to the GPU. More... | |
void | pme_gpu_copy_output_spread_grid (const PmeGpu *pmeGpu, float *h_grid) |
Copies the output real-space grid from the GPU to the host. More... | |
void | pme_gpu_copy_output_spread_atom_data (const PmeGpu *pmeGpu) |
Copies the spread output spline data and gridline indices from the GPU to the host. More... | |
void | pme_gpu_copy_input_gather_atom_data (const PmeGpu *pmeGpu) |
Copies the gather input spline data and gridline indices from the host to the GPU. More... | |
void | pme_gpu_sync_spread_grid (const PmeGpu *pmeGpu) |
Waits for the grid copying to the host-side buffer after spreading to finish. More... | |
void | pme_gpu_init_internal (PmeGpu *pmeGpu) |
Does the one-time GPU-framework specific PME initialization. For CUDA, the PME stream is created with the highest priority. More... | |
void | pme_gpu_destroy_specific (const PmeGpu *pmeGpu) |
Destroys the PME GPU-framework specific data. Should be called last in the PME GPU destructor. More... | |
void | pme_gpu_reinit_3dfft (const PmeGpu *pmeGpu) |
Initializes the CUDA FFT structures. More... | |
void | pme_gpu_destroy_3dfft (const PmeGpu *pmeGpu) |
Destroys the CUDA FFT structures. More... | |
int | getSplineParamFullIndex (int order, int splineIndex, int dimIndex, int atomIndex, int atomsPerWarp) |
Gets a unique index to an element in a spline parameter buffer (theta/dtheta), which is laid out for GPU spread/gather kernels. The index is wrt the execution block, in range(0, atomsPerBlock * order * DIM). This is a wrapper, only used in unit tests. More... | |
gmx::ArrayRef< gmx::RVec > | pme_gpu_get_forces (PmeGpu *pmeGpu) |
Returns the GPU gathering staging forces buffer. More... | |
void | pme_gpu_get_energy_virial (const PmeGpu *pmeGpu, real *energy, matrix virial) |
Returns the output virial and energy of the PME solving. More... | |
void | pme_gpu_update_input_box (PmeGpu *pmeGpu, const matrix box) |
Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation(). More... | |
static void | pme_gpu_reinit_grids (PmeGpu *pmeGpu) |
(Re-)initializes all the PME GPU data related to the grid size and cut-off. More... | |
static void | pme_gpu_copy_common_data_from (const gmx_pme_t *pme) |
Copies everything useful from the PME CPU to the PME GPU structure. The goal is to minimize interaction with the PME CPU structure in the GPU code. More... | |
static void | pme_gpu_init (gmx_pme_t *pme, const gmx_device_info_t *gpuInfo, PmeGpuProgramHandle pmeGpuProgram) |
Initializes the PME GPU data at the beginning of the run. TODO: this should become PmeGpu::PmeGpu() More... | |
void | pme_gpu_transform_spline_atom_data (const PmeGpu *pmeGpu, const pme_atomcomm_t *atc, PmeSplineDataType type, int dimIndex, PmeLayoutTransform transform) |
Rearranges the atom spline data between the GPU and host layouts. Only used for test purposes so far, likely to be horribly slow. More... | |
void | pme_gpu_get_real_grid_sizes (const PmeGpu *pmeGpu, gmx::IVec *gridSize, gmx::IVec *paddedGridSize) |
Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests. More... | |
void | pme_gpu_reinit (gmx_pme_t *pme, const gmx_device_info_t *gpuInfo, PmeGpuProgramHandle pmeGpuProgram) |
(Re-)initializes the PME GPU data at the beginning of the run or on DLB. More... | |
void | pme_gpu_destroy (PmeGpu *pmeGpu) |
Destroys the PME GPU data at the end of the run. More... | |
void | pme_gpu_reinit_atoms (PmeGpu *pmeGpu, const int nAtoms, const real *charges) |
Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU. More... | |
void | pme_gpu_3dfft (const PmeGpu *pmeGpu, gmx_fft_direction dir, int grid_index) |
3D FFT R2C/C2R routine. More... | |
std::pair< int, int > | pmeGpuCreateGrid (const PmeGpu *pmeGpu, int blockCount) |
Given possibly large blockCount , returns a compact 1D or 2D grid for kernel scheduling, to minimize number of unused blocks. | |
void | pme_gpu_spread (const PmeGpu *pmeGpu, int gridIndex, real *h_grid, bool computeSplines, bool spreadCharges) |
A GPU spline computation and charge spreading function. More... | |
void | pme_gpu_solve (const PmeGpu *pmeGpu, t_complex *h_grid, GridOrdering gridOrdering, bool computeEnergyAndVirial) |
A GPU Fourier space solving function. More... | |
void | pme_gpu_gather (PmeGpu *pmeGpu, PmeForceOutputHandling forceTreatment, const float *h_grid) |
A GPU force gathering function. More... | |
int getSplineParamFullIndex | ( | int | order, |
int | splineIndex, | ||
int | dimIndex, | ||
int | atomIndex, | ||
int | atomsPerWarp | ||
) |
Gets a unique index to an element in a spline parameter buffer (theta/dtheta), which is laid out for GPU spread/gather kernels. The index is wrt the execution block, in range(0, atomsPerBlock * order * DIM). This is a wrapper, only used in unit tests.
[in] | order | PME order |
[in] | splineIndex | Spline contribution index (from 0 to order - 1) |
[in] | dimIndex | Dimension index (from 0 to 2) |
[in] | atomIndex | Atom index wrt the block. |
[in] | atomsPerWarp | Number of atoms processed by a warp. |
void pme_gpu_3dfft | ( | const PmeGpu * | pmeGpu, |
enum gmx_fft_direction | direction, | ||
int | gridIndex | ||
) |
3D FFT R2C/C2R routine.
[in] | pmeGpu | The PME GPU structure. |
[in] | direction | Transform direction (real-to-complex or complex-to-real) |
[in] | gridIndex | Index of the PME grid - unused, assumed to be 0. |
void pme_gpu_alloc_energy_virial | ( | PmeGpu * | pmeGpu | ) |
Allocates the fixed size energy and virial buffer both on GPU and CPU.
[in,out] | pmeGpu | The PME GPU structure. |
void pme_gpu_clear_energy_virial | ( | const PmeGpu * | pmeGpu | ) |
Clears the energy and virial memory on GPU with 0. Should be called at the end of PME computation which returned energy/virial.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_clear_grids | ( | const PmeGpu * | pmeGpu | ) |
Clears the real space grid on the GPU. Should be called at the end of each computation.
[in] | pmeGpu | The PME GPU structure. |
|
static |
Copies everything useful from the PME CPU to the PME GPU structure. The goal is to minimize interaction with the PME CPU structure in the GPU code.
[in] | pme | The PME structure. |
void pme_gpu_copy_input_coordinates | ( | const PmeGpu * | pmeGpu, |
const rvec * | h_coordinates | ||
) |
Copies the input coordinates from the CPU buffer onto the GPU.
[in] | pmeGpu | The PME GPU structure. |
[in] | h_coordinates | Input coordinates (XYZ rvec array). |
Needs to be called for every PME computation. The coordinates are then used in the spline calculation.
void pme_gpu_copy_input_forces | ( | PmeGpu * | pmeGpu | ) |
Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered forces). To be called e.g. after the bonded calculations.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_copy_input_gather_atom_data | ( | const PmeGpu * | pmeGpu | ) |
Copies the gather input spline data and gridline indices from the host to the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_copy_input_gather_grid | ( | const PmeGpu * | pmeGpu, |
float * | h_grid | ||
) |
Copies the input real-space grid from the host to the GPU.
[in] | pmeGpu | The PME GPU structure. |
[in] | h_grid | The host-side grid buffer. |
void pme_gpu_copy_output_forces | ( | PmeGpu * | pmeGpu | ) |
Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_copy_output_spread_atom_data | ( | const PmeGpu * | pmeGpu | ) |
Copies the spread output spline data and gridline indices from the GPU to the host.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_copy_output_spread_grid | ( | const PmeGpu * | pmeGpu, |
float * | h_grid | ||
) |
Copies the output real-space grid from the GPU to the host.
[in] | pmeGpu | The PME GPU structure. |
[out] | h_grid | The host-side grid buffer. |
void pme_gpu_destroy | ( | PmeGpu * | pmeGpu | ) |
Destroys the PME GPU data at the end of the run.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_destroy_3dfft | ( | const PmeGpu * | pmeGpu | ) |
Destroys the CUDA FFT structures.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_destroy_specific | ( | const PmeGpu * | pmeGpu | ) |
Destroys the PME GPU-framework specific data. Should be called last in the PME GPU destructor.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_bspline_values | ( | const PmeGpu * | pmeGpu | ) |
Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_coefficients | ( | const PmeGpu * | pmeGpu | ) |
Frees the charges/coefficients on the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_coordinates | ( | const PmeGpu * | pmeGpu | ) |
Frees the coordinates on the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_energy_virial | ( | PmeGpu * | pmeGpu | ) |
Frees the energy and virial memory both on GPU and CPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_forces | ( | const PmeGpu * | pmeGpu | ) |
Frees the GPU buffer for the PME forces.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_fract_shifts | ( | const PmeGpu * | pmeGpu | ) |
Frees the pre-computed fractional coordinates' shifts on the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_grid_indices | ( | const PmeGpu * | pmeGpu | ) |
Frees the buffer on the GPU for the particle gridline indices.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_grids | ( | const PmeGpu * | pmeGpu | ) |
Frees the real space grid and the complex reciprocal grid (if needed) on the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_free_spline_data | ( | const PmeGpu * | pmeGpu | ) |
Frees the buffers on the GPU for the atoms spline data.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_gather | ( | PmeGpu * | pmeGpu, |
PmeForceOutputHandling | forceTreatment, | ||
const float * | h_grid | ||
) |
A GPU force gathering function.
[in] | pmeGpu | The PME GPU structure. |
[in] | forceTreatment | Tells how data in h_forces should be treated. TODO: determine efficiency/balance of host/device-side reductions. |
[in] | h_grid | The host-side grid buffer (used only in testing mode) |
int pme_gpu_get_atom_data_alignment | ( | const PmeGpu * | pmeGpu | ) |
Returns the number of atoms per chunk in the atom charges/coordinates data layout. Depends on CUDA-specific block sizes, needed for the atom data padding.
[in] | pmeGpu | The PME GPU structure. |
int pme_gpu_get_atoms_per_warp | ( | const PmeGpu * | pmeGpu | ) |
Returns the number of atoms per chunk in the atom spline theta/dtheta data layout.
[in] | pmeGpu | The PME GPU structure. |
Returns the output virial and energy of the PME solving.
[in] | pmeGpu | The PME GPU structure. |
[out] | energy | The output energy. |
[out] | virial | The output virial matrix. |
gmx::ArrayRef<gmx::RVec> pme_gpu_get_forces | ( | PmeGpu * | pmeGpu | ) |
Returns the GPU gathering staging forces buffer.
[in] | pmeGpu | The PME GPU structure. |
|
static |
Wrapper for getting a pointer to the plain C++ part of the GPU kernel parameters structure.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_get_real_grid_sizes | ( | const PmeGpu * | pmeGpu, |
gmx::IVec * | gridSize, | ||
gmx::IVec * | paddedGridSize | ||
) |
Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests.
[in] | pmeGpu | The PME GPU structure. |
[out] | gridSize | Pointer to the grid dimensions to fill in. |
[out] | paddedGridSize | Pointer to the padded grid dimensions to fill in. |
|
static |
Initializes the PME GPU data at the beginning of the run. TODO: this should become PmeGpu::PmeGpu()
[in,out] | pme | The PME structure. |
[in,out] | gpuInfo | The GPU information structure. |
[in] | pmeGpuProgram | The handle to the program/kernel data created outside (e.g. in unit tests/runner) |
void pme_gpu_init_internal | ( | PmeGpu * | pmeGpu | ) |
Does the one-time GPU-framework specific PME initialization. For CUDA, the PME stream is created with the highest priority.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_and_copy_bspline_values | ( | PmeGpu * | pmeGpu | ) |
Reallocates and copies the pre-computed B-spline values to the GPU.
[in,out] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_and_copy_fract_shifts | ( | PmeGpu * | pmeGpu | ) |
Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_and_copy_input_coefficients | ( | const PmeGpu * | pmeGpu, |
const float * | h_coefficients | ||
) |
Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer. Clears the padded part if needed.
[in] | pmeGpu | The PME GPU structure. |
[in] | h_coefficients | The input atom charges/coefficients. |
Does not need to be done for every PME computation, only whenever the local charges change. (So, in the beginning of the run, or on DD step).
void pme_gpu_realloc_coordinates | ( | const PmeGpu * | pmeGpu | ) |
Reallocates the input coordinates buffer on the GPU (and clears the padded part if needed).
[in] | pmeGpu | The PME GPU structure. |
Needs to be called on every DD step/in the beginning.
void pme_gpu_realloc_forces | ( | PmeGpu * | pmeGpu | ) |
Reallocates the GPU buffer for the PME forces.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_grid_indices | ( | PmeGpu * | pmeGpu | ) |
Reallocates the buffers on the GPU and the host for the particle gridline indices.
[in,out] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_grids | ( | PmeGpu * | pmeGpu | ) |
Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_realloc_spline_data | ( | PmeGpu * | pmeGpu | ) |
Reallocates the buffers on the GPU and the host for the atoms spline data.
[in,out] | pmeGpu | The PME GPU structure. |
void pme_gpu_reinit | ( | gmx_pme_t * | pme, |
const gmx_device_info_t * | gpuInfo, | ||
PmeGpuProgramHandle | pmeGpuProgram | ||
) |
(Re-)initializes the PME GPU data at the beginning of the run or on DLB.
[in,out] | pme | The PME structure. |
[in] | gpuInfo | The GPU information structure. |
[in] | pmeGpuProgram | The PME GPU program data |
gmx::NotImplementedError | if this generally valid PME structure is not valid for GPU runs. |
void pme_gpu_reinit_3dfft | ( | const PmeGpu * | pmeGpu | ) |
Initializes the CUDA FFT structures.
[in] | pmeGpu | The PME GPU structure. |
Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
[in] | pmeGpu | The PME GPU structure. |
[in] | nAtoms | The number of particles. |
[in] | charges | The pointer to the host-side array of particle charges. |
This is a function that should only be called in the beginning of the run and on domain decomposition. Should be called before the pme_gpu_set_io_ranges.
|
static |
(Re-)initializes all the PME GPU data related to the grid size and cut-off.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_solve | ( | const PmeGpu * | pmeGpu, |
t_complex * | h_grid, | ||
GridOrdering | gridOrdering, | ||
bool | computeEnergyAndVirial | ||
) |
A GPU Fourier space solving function.
[in] | pmeGpu | The PME GPU structure. |
[in,out] | h_grid | The host-side input and output Fourier grid buffer (used only with testing or host-side FFT) |
[in] | gridOrdering | Specifies the dimenion ordering of the complex grid. TODO: store this information? |
[in] | computeEnergyAndVirial | Tells if the energy and virial computation should also be performed. |
void pme_gpu_spread | ( | const PmeGpu * | pmeGpu, |
int | gridIndex, | ||
real * | h_grid, | ||
bool | computeSplines, | ||
bool | spreadCharges | ||
) |
A GPU spline computation and charge spreading function.
[in] | pmeGpu | The PME GPU structure. |
[in] | gridIndex | Index of the PME grid - unused, assumed to be 0. |
[out] | h_grid | The host-side grid buffer (used only if the result of the spread is expected on the host, e.g. testing or host-side FFT) |
[in] | computeSplines | Should the computation of spline parameters and gridline indices be performed. |
[in] | spreadCharges | Should the charges/coefficients be spread on the grid. |
bool pme_gpu_stream_query | ( | const PmeGpu * | pmeGpu | ) |
Checks whether work in the PME GPU stream has completed.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_sync_spread_grid | ( | const PmeGpu * | pmeGpu | ) |
Waits for the grid copying to the host-side buffer after spreading to finish.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_synchronize | ( | const PmeGpu * | pmeGpu | ) |
Synchronizes the current computation, waiting for the GPU kernels/transfers to finish.
[in] | pmeGpu | The PME GPU structure. |
void pme_gpu_transform_spline_atom_data | ( | const PmeGpu * | pmeGpu, |
const pme_atomcomm_t * | atc, | ||
PmeSplineDataType | type, | ||
int | dimIndex, | ||
PmeLayoutTransform | transform | ||
) |
Rearranges the atom spline data between the GPU and host layouts. Only used for test purposes so far, likely to be horribly slow.
[in] | pmeGpu | The PME GPU structure. |
[out] | atc | The PME CPU atom data structure (with a single-threaded layout). |
[in] | type | The spline data type (values or derivatives). |
[in] | dimIndex | Dimension index. |
[in] | transform | Layout transform type |
void pme_gpu_update_input_box | ( | PmeGpu * | pmeGpu, |
const matrix | box | ||
) |
Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
[in] | pmeGpu | The PME GPU structure. |
[in] | box | The unit cell box. |