#include "gmxpre.h"
#include <assert.h>
#include <math.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/gpu_utils/oclutils.h"
#include "gromacs/hardware/detecthardware.h"
#include "gromacs/hardware/gpu_hw_info.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/mdlib/force_flags.h"
#include "gromacs/mdlib/nb_verlet.h"
#include "gromacs/mdlib/nbnxn_consts.h"
#include "gromacs/mdlib/nbnxn_gpu.h"
#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
#include "gromacs/mdlib/nbnxn_gpu_jit_support.h"
#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/timing/gpu_timing.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/real.h"
#include "gromacs/utility/smalloc.h"
#include "nbnxn_ocl_internal.h"
#include "nbnxn_ocl_types.h"

Include dependency graph for nbnxn_ocl_data_mgmt.cpp:

Description

Define OpenCL implementation of nbnxn_gpu_data_mgmt.h.

Author: Anca Hamuraru anca@.nosp@m.stre.nosp@m.amcom.nosp@m.puti.nosp@m.ng.eu; Dimitrios Karkoulis dimit.nosp@m.ris..nosp@m.karko.nosp@m.ulis.nosp@m.@gmai.nosp@m.l.co.nosp@m.m; Teemu Virolainen teemu.nosp@m.@str.nosp@m.eamco.nosp@m.mput.nosp@m.ing.e.nosp@m.u; Szilárd Páll pall..nosp@m.szil.nosp@m.ard@g.nosp@m.mail.nosp@m..com

Functions
bool	useLjCombRule (int vdwType)
	Returns true if LJ combination rules are used in the non-bonded kernels. More...

void	ocl_free_buffered (cl_mem d_ptr, int n, int nalloc)
	Free device buffers. More...

void	ocl_realloc_buffered (cl_mem d_dest, void h_src, size_t type_size, int curr_size, int curr_alloc_size, int req_size, cl_context context, cl_command_queue s, bool bAsync=true, cl_event *copy_event=NULL)
	Reallocation device buffers. More...

static void	free_ocl_buffer (cl_mem *buffer)
	Releases the input OpenCL buffer.

static void	init_ewald_coulomb_force_table (const interaction_const_t ic, cl_nbparam_t nbp, const gmx_device_runtime_data_t *runData)
	Tabulates the Ewald Coulomb force and initializes the size/scale and the table GPU array. More...

static void	init_atomdata_first (cl_atomdata_t ad, int ntypes, gmx_device_runtime_data_t runData)
	Initializes the atomdata structure first time, it only gets filled at pair-search.

static void	set_cutoff_parameters (cl_nbparam_t nbp, const interaction_const_t ic)
	Copies all parameters related to the cut-off from ic to nbp.

static void	map_interaction_types_to_gpu_kernel_flavors (const interaction_const_t ic, int combRule, int gpu_eeltype, int *gpu_vdwtype)
	Returns the kinds of electrostatics and Vdw OpenCL kernels that will be used. More...

static void	init_nbparam (cl_nbparam_t nbp, const interaction_const_t ic, const nbnxn_atomdata_t nbat, const gmx_device_runtime_data_t runData)
	Initializes the nonbonded parameter data structure.

void	nbnxn_gpu_pme_loadbal_update_param (const nonbonded_verlet_t nbv, const interaction_const_t ic)
	This function is documented in the header file.

static void	init_plist (cl_plist_t *pl)
	Initializes the pair list data structure.

static void	init_timers (cl_timers_t *t, bool bUseTwoStreams)
	Initializes the timer data structure.

static void	init_timings (gmx_wallclock_gpu_t *t)
	Initializes the timings data structure.

static void	nbnxn_gpu_create_context (gmx_device_runtime_data_t runtimeData, const gmx_device_info_t devInfo, int rank)
	Creates context for OpenCL GPU given by `mygpu`. More...

static cl_kernel	nbnxn_gpu_create_kernel (gmx_nbnxn_ocl_t nb, const char kernel_name)
	Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure.

static void	nbnxn_ocl_clear_e_fshift (gmx_nbnxn_ocl_t *nb)
	Clears nonbonded shift force output array and energy outputs on the GPU.

static void	nbnxn_gpu_init_kernels (gmx_nbnxn_ocl_t *nb)
	Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure.

static void	nbnxn_ocl_init_const (gmx_nbnxn_ocl_t nb, const interaction_const_t ic, const nonbonded_verlet_group_t *nbv_group)
	Initializes simulation constant data. More...

void	nbnxn_gpu_init (gmx_nbnxn_ocl_t *p_nb, const gmx_gpu_info_t gpu_info, const gmx_gpu_opt_t gpu_opt, const interaction_const_t ic, nonbonded_verlet_group_t *nbv_grp, int my_gpu_index, int rank, gmx_bool bLocalAndNonlocal)
	This function is documented in the header file.

static void	nbnxn_ocl_clear_f (gmx_nbnxn_ocl_t *nb, int natoms_clear)
	Clears the first natoms_clear elements of the GPU nonbonded force output array.

void	nbnxn_gpu_clear_outputs (gmx_nbnxn_ocl_t *nb, int flags)
	This function is documented in the header file.

void	nbnxn_gpu_init_pairlist (gmx_nbnxn_ocl_t nb, const nbnxn_pairlist_t h_plist, int iloc)
	This function is documented in the header file.

void	nbnxn_gpu_upload_shiftvec (gmx_nbnxn_ocl_t nb, const nbnxn_atomdata_t nbatom)
	This function is documented in the header file.

void	nbnxn_gpu_init_atomdata (gmx_nbnxn_ocl_t nb, const struct nbnxn_atomdata_t nbat)
	This function is documented in the header file.

void	free_kernel (cl_kernel *kernel_ptr)
	Releases an OpenCL kernel pointer.

void	free_kernels (cl_kernel *kernels, int count)
	Releases a list of OpenCL kernel pointers.

static void	free_gpu_device_runtime_data (gmx_device_runtime_data_t *runData)
	Free the OpenCL runtime data (context and program). More...

void	nbnxn_gpu_free (gmx_nbnxn_ocl_t *nb)
	This function is documented in the header file.

gmx_wallclock_gpu_t *	nbnxn_gpu_get_timings (gmx_nbnxn_ocl_t *nb)
	This function is documented in the header file.

void	nbnxn_gpu_reset_timings (nonbonded_verlet_t *nbv)
	This function is documented in the header file.

int	nbnxn_gpu_min_ci_balanced (gmx_nbnxn_ocl_t *nb)
	This function is documented in the header file.

gmx_bool	nbnxn_gpu_is_kernel_ewald_analytical (const gmx_nbnxn_ocl_t *nb)
	This function is documented in the header file.

Variables
static unsigned int	gpu_min_ci_balanced_factor = 50
	This parameter should be determined heuristically from the kernel execution times. More...

Function Documentation

static void free_gpu_device_runtime_data ( gmx_device_runtime_data_t * runData )

static

Free the OpenCL runtime data (context and program).

The function releases the OpenCL context and program assuciated with the device that the calling PP rank is running on.

Parameters

runData [in] porinter to the structure with runtime data.

static void init_ewald_coulomb_force_table	(	const interaction_const_t *	ic,
		cl_nbparam_t *	nbp,
		const gmx_device_runtime_data_t *	runData
	)

static

Tabulates the Ewald Coulomb force and initializes the size/scale and the table GPU array.

If called with an already allocated table, it just re-uploads the table.

static void map_interaction_types_to_gpu_kernel_flavors	(	const interaction_const_t *	ic,
		int	combRule,
		int *	gpu_eeltype,
		int *	gpu_vdwtype
	)

static

Returns the kinds of electrostatics and Vdw OpenCL kernels that will be used.

Respectively, these values are from enum eelOcl and enum evdwOcl.

static void nbnxn_gpu_create_context	(	gmx_device_runtime_data_t *	runtimeData,
		const gmx_device_info_t *	devInfo,
		int	rank
	)

static

Creates context for OpenCL GPU given by mygpu.

A fatal error results if creation fails.

Parameters

[in,out]	runtimeData	runtime data including program and context
[in]	devInfo	device info struct
[in]	rank	MPI rank (for error reporting)

static void nbnxn_ocl_init_const	(	gmx_nbnxn_ocl_t *	nb,
		const interaction_const_t *	ic,
		const nonbonded_verlet_group_t *	nbv_group
	)

static

Initializes simulation constant data.

Initializes members of the atomdata and nbparam structs and clears e/fshift output buffers.

void ocl_free_buffered	(	cl_mem	d_ptr,
		int *	n,
		int *	nalloc
	)

Free device buffers.

If the pointers to the size variables are NULL no resetting happens.

void ocl_realloc_buffered	(	cl_mem *	d_dest,
		void *	h_src,
		size_t	type_size,
		int *	curr_size,
		int *	curr_alloc_size,
		int	req_size,
		cl_context	context,
		cl_command_queue	s,
		bool	bAsync = `true`,
		cl_event *	copy_event = `NULL`
	)

Reallocation device buffers.

Reallocation of the memory pointed by d_ptr and copying of the data from the location pointed by h_src host-side pointer is done. Allocation is buffered and therefore freeing is only needed if the previously allocated space is not enough. The H2D copy is launched in command queue s and can be done synchronously or asynchronously (the default is the latter). If copy_event is not NULL, on return it will contain an event object identifying the H2D copy. The event can further be used to queue a wait for this operation or to query profiling information. OpenCL equivalent of cu_realloc_buffered.

bool useLjCombRule ( int vdwType )

Returns true if LJ combination rules are used in the non-bonded kernels.

Full doc in nbnxn_ocl_internal.h

Variable Documentation

unsigned int gpu_min_ci_balanced_factor = 50

static

This parameter should be determined heuristically from the kernel execution times.

This value is best for small systems on a single AMD Radeon R9 290X (and about 5% faster than 40, which is the default for CUDA devices). Larger simulation systems were quite insensitive to the value of this parameter.

Description

Functions

Variables

Function Documentation

Variable Documentation