#include "gmxpre.h"
#include "gromacs/fft/parallel_3dfft.h"
#include "pme_gpu_grid.h"
#include "pme_gpu_types.h"
#include "pme_gpu_types_host.h"
#include "pme_gpu_types_host_impl.h"

Include dependency graph for pme_gpu_ocl_stubs.cpp:

Description

Implements stubs of high-level PME GPU functions for OpenCL.

Author: Gaurav Garg gauga.nosp@m.rg@n.nosp@m.vidia.nosp@m..com

Functions
void	pmeGpuGridHaloExchange (const PmeGpu , gmx_wallcycle )
	Grid Halo exchange after PME spread ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after spread. Consider using events for this synchnozation. More...

void	pmeGpuGridHaloExchangeReverse (const PmeGpu , gmx_wallcycle )
	Grid reverse Halo exchange before PME gather ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after FFT to PME grid conversion. Consider using events for this synchnozation. More...

template<bool forward>
void	convertPmeGridToFftGrid (const PmeGpu , float , gmx_parallel_3dfft_t *, const int)
	Copy PME Grid with overlap region to host FFT grid and vice-versa. Used in mixed mode PME decomposition. More...

template<bool forward>
void	convertPmeGridToFftGrid (const PmeGpu , DeviceBuffer< float > , const int)
	Copy PME Grid with overlap region to device FFT grid and vice-versa. Used in full GPU PME decomposition. More...

template void	convertPmeGridToFftGrid< true > (const PmeGpu , float , gmx_parallel_3dfft_t *, const int)

template void	convertPmeGridToFftGrid< false > (const PmeGpu , float , gmx_parallel_3dfft_t *, const int)

template void	convertPmeGridToFftGrid< true > (const PmeGpu , DeviceBuffer< float > , const int)

template void	convertPmeGridToFftGrid< false > (const PmeGpu , DeviceBuffer< float > , const int)

Function Documentation

template<bool forward>

void convertPmeGridToFftGrid	(	const PmeGpu *	pmeGpu,
		float *	h_fftRealGrid,
		gmx_parallel_3dfft_t *	fftSetup,
		int	gridIndex
	)

Copy PME Grid with overlap region to host FFT grid and vice-versa. Used in mixed mode PME decomposition.

Parameters

[in]	pmeGpu	The PME GPU structure.
[in]	h_fftRealGrid	FFT grid on host
[in]	fftSetup	Host FFT setup structure
[in]	gridIndex	Grid index which is to be converted

Template Parameters

pmeToFft A boolean which tells if this is conversion from PME grid to FFT grid or reverse

template<bool forward>

void convertPmeGridToFftGrid	(	const PmeGpu *	pmeGpu,
		DeviceBuffer< float > *	d_fftRealGrid,
		int	gridIndex
	)

Copy PME Grid with overlap region to device FFT grid and vice-versa. Used in full GPU PME decomposition.

Parameters

[in]	pmeGpu	The PME GPU structure.
[in]	d_fftRealGrid	FFT grid on device
[in]	gridIndex	Grid index which is to be converted

Template Parameters

pmeToFft A boolean which tells if this is conversion from PME grid to FFT grid or reverse

void pmeGpuGridHaloExchange	(	const PmeGpu *	pmeGpu,
		gmx_wallcycle *	wcycle
	)

Grid Halo exchange after PME spread ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after spread. Consider using events for this synchnozation.

Parameters

[in]	pmeGpu	The PME GPU structure.
[in]	wcycle	The wallclock counter.

void pmeGpuGridHaloExchangeReverse	(	const PmeGpu *	pmeGpu,
		gmx_wallcycle *	wcycle
	)

Grid reverse Halo exchange before PME gather ToDo: Current implementation transfers halo region from/to only immediate neighbours And, expects that overlapSize <= local grid width. Implement exchange with multiple neighbors to remove this limitation ToDo: Current implementation synchronizes pmeStream to make sure data is ready on GPU after FFT to PME grid conversion. Consider using events for this synchnozation.

Parameters

[in]	pmeGpu	The PME GPU structure.
[in]	wcycle	The wallclock counter.

Description

Functions

Function Documentation