#include "gmxpre.h"
#include "gromacs/fft/parallel_3dfft.h"
#include "pme_gpu_grid.h"
#include "pme_gpu_types.h"
#include "pme_gpu_types_host.h"
#include "pme_gpu_types_host_impl.h"
Implements stubs of high-level PME GPU functions for OpenCL.
- Author
- Gaurav Garg gauga.nosp@m.rg@n.nosp@m.vidia.nosp@m..com
|
void | pmeGpuGridHaloExchange (const PmeGpu *, gmx_wallcycle *) |
| Grid Halo exchange after PME spread ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after spread. Consider using events for this synchnozation. More...
|
|
void | pmeGpuGridHaloExchangeReverse (const PmeGpu *, gmx_wallcycle *) |
| Grid reverse Halo exchange before PME gather ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after FFT to PME grid conversion. Consider using events for this synchnozation. More...
|
|
template<bool forward> |
void | convertPmeGridToFftGrid (const PmeGpu *, float *, gmx_parallel_3dfft_t *, const int) |
| Copy PME Grid with overlap region to host FFT grid and vice-versa. Used in mixed mode PME decomposition. More...
|
|
template<bool forward> |
void | convertPmeGridToFftGrid (const PmeGpu *, DeviceBuffer< float > *, const int) |
| Copy PME Grid with overlap region to device FFT grid and vice-versa. Used in full GPU PME decomposition. More...
|
|
template void | convertPmeGridToFftGrid< true > (const PmeGpu *, float *, gmx_parallel_3dfft_t *, const int) |
|
template void | convertPmeGridToFftGrid< false > (const PmeGpu *, float *, gmx_parallel_3dfft_t *, const int) |
|
template void | convertPmeGridToFftGrid< true > (const PmeGpu *, DeviceBuffer< float > *, const int) |
|
template void | convertPmeGridToFftGrid< false > (const PmeGpu *, DeviceBuffer< float > *, const int) |
|
Copy PME Grid with overlap region to host FFT grid and vice-versa. Used in mixed mode PME decomposition.
- Parameters
-
[in] | pmeGpu | The PME GPU structure. |
[in] | h_fftRealGrid | FFT grid on host |
[in] | fftSetup | Host FFT setup structure |
[in] | gridIndex | Grid index which is to be converted |
- Template Parameters
-
pmeToFft | A boolean which tells if this is conversion from PME grid to FFT grid or reverse |
template<bool forward>
void convertPmeGridToFftGrid |
( |
const PmeGpu * |
pmeGpu, |
|
|
DeviceBuffer< float > * |
d_fftRealGrid, |
|
|
int |
gridIndex |
|
) |
| |
Copy PME Grid with overlap region to device FFT grid and vice-versa. Used in full GPU PME decomposition.
- Parameters
-
[in] | pmeGpu | The PME GPU structure. |
[in] | d_fftRealGrid | FFT grid on device |
[in] | gridIndex | Grid index which is to be converted |
- Template Parameters
-
pmeToFft | A boolean which tells if this is conversion from PME grid to FFT grid or reverse |
void pmeGpuGridHaloExchange |
( |
const PmeGpu * |
pmeGpu, |
|
|
gmx_wallcycle * |
wcycle |
|
) |
| |
Grid Halo exchange after PME spread ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after spread. Consider using events for this synchnozation.
- Parameters
-
[in] | pmeGpu | The PME GPU structure. |
[in] | wcycle | The wallclock counter. |
void pmeGpuGridHaloExchangeReverse |
( |
const PmeGpu * |
pmeGpu, |
|
|
gmx_wallcycle * |
wcycle |
|
) |
| |
Grid reverse Halo exchange before PME gather ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after FFT to PME grid conversion. Consider using events for this synchnozation.
- Parameters
-
[in] | pmeGpu | The PME GPU structure. |
[in] | wcycle | The wallclock counter. |