#include "gmxpre.h"
#include "config.h"
#include <cstdlib>
#include "gromacs/fft/parallel_3dfft.h"
#include "gromacs/gpu_utils/gmxsycl.h"
#include "gromacs/gpu_utils/syclutils.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/vec.h"
#include "pme_gpu_grid.h"
#include "pme_gpu_types.h"
#include "pme_gpu_types_host.h"
#include "pme_gpu_types_host_impl.h"
Implements PME GPU halo exchange and PME GPU - Host FFT grid conversion functions. These functions are used for PME decomposition in mixed-mode.
- Author
- Mark Abraham mark..nosp@m.j.ab.nosp@m.raham.nosp@m.@gma.nosp@m.il.co.nosp@m.m
|
|
using | mode = sycl::access_mode |
| |
|
| void | pmeGpuGridHaloExchange (const PmeGpu *pmeGpu, gmx_wallcycle *wcycle) |
| | Grid Halo exchange after PME spread ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after spread. Consider using events for this synchnozation. More...
|
| |
| void | pmeGpuGridHaloExchangeReverse (const PmeGpu *pmeGpu, gmx_wallcycle *wcycle) |
| | Grid reverse Halo exchange before PME gather ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after FFT to PME grid conversion. Consider using events for this synchnozation. More...
|
| |
| template<bool pmeToFft> |
| void | convertPmeGridToFftGrid (const PmeGpu *pmeGpu, float *h_fftRealGrid, gmx_parallel_3dfft *fftSetup, const int gridIndex) |
| | Copy PME Grid with overlap region to host FFT grid and vice-versa. Used in mixed mode PME decomposition. More...
|
| |
| template<bool pmeToFft> |
| void | convertPmeGridToFftGrid (const PmeGpu *pmeGpu, DeviceBuffer< float > *d_fftRealGrid, const int gridIndex) |
| | Copy PME Grid with overlap region to device FFT grid and vice-versa. Used in full GPU PME decomposition. More...
|
| |
|
template void | convertPmeGridToFftGrid< true > (const PmeGpu *pmeGpu, float *h_fftRealGrid, gmx_parallel_3dfft *fftSetup, const int gridIndex) |
| |
|
template void | convertPmeGridToFftGrid< false > (const PmeGpu *pmeGpu, float *h_fftRealGrid, gmx_parallel_3dfft *fftSetup, const int gridIndex) |
| |
|
template void | convertPmeGridToFftGrid< true > (const PmeGpu *pmeGpu, DeviceBuffer< float > *d_fftRealGrid, const int gridIndex) |
| |
|
template void | convertPmeGridToFftGrid< false > (const PmeGpu *pmeGpu, DeviceBuffer< float > *d_fftRealGrid, const int gridIndex) |
| |
template<bool pmeToFft>
| void convertPmeGridToFftGrid |
( |
const PmeGpu * |
pmeGpu, |
|
|
float * |
h_fftRealGrid, |
|
|
gmx_parallel_3dfft * |
fftSetup, |
|
|
int |
gridIndex |
|
) |
| |
Copy PME Grid with overlap region to host FFT grid and vice-versa. Used in mixed mode PME decomposition.
- Parameters
-
| [in] | pmeGpu | The PME GPU structure. |
| [in] | h_fftRealGrid | FFT grid on host |
| [in] | fftSetup | Host FFT setup structure |
| [in] | gridIndex | Grid index which is to be converted |
- Template Parameters
-
| pmeToFft | A boolean which tells if this is conversion from PME grid to FFT grid or reverse |
template<bool pmeToFft>
| void convertPmeGridToFftGrid |
( |
const PmeGpu * |
pmeGpu, |
|
|
DeviceBuffer< float > * |
d_fftRealGrid, |
|
|
int |
gridIndex |
|
) |
| |
Copy PME Grid with overlap region to device FFT grid and vice-versa. Used in full GPU PME decomposition.
- Parameters
-
| [in] | pmeGpu | The PME GPU structure. |
| [in] | d_fftRealGrid | FFT grid on device |
| [in] | gridIndex | Grid index which is to be converted |
- Template Parameters
-
| pmeToFft | A boolean which tells if this is conversion from PME grid to FFT grid or reverse |
| void pmeGpuGridHaloExchange |
( |
const PmeGpu * |
pmeGpu, |
|
|
gmx_wallcycle * |
wcycle |
|
) |
| |
Grid Halo exchange after PME spread ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after spread. Consider using events for this synchnozation.
- Parameters
-
| [in] | pmeGpu | The PME GPU structure. |
| [in] | wcycle | The wallclock counter. |
| void pmeGpuGridHaloExchangeReverse |
( |
const PmeGpu * |
pmeGpu, |
|
|
gmx_wallcycle * |
wcycle |
|
) |
| |
Grid reverse Halo exchange before PME gather ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after FFT to PME grid conversion. Consider using events for this synchnozation.
- Parameters
-
| [in] | pmeGpu | The PME GPU structure. |
| [in] | wcycle | The wallclock counter. |
| constexpr int sc_subGroupSize = 32 |
|
static |
Sub-group size for conversion kernels.
Chosen to match relevant hardware widths on supported hardware. In particular, PonteVecchio is 32-wide.