Gromacs
2020.4
|
#include "gmxpre.h"
#include <assert.h>
#include <stdlib.h>
#include "thread_mpi/atomic.h"
#include "gromacs/gpu_utils/gputraits_ocl.h"
#include "gromacs/gpu_utils/oclutils.h"
#include "gromacs/hardware/hw_info.h"
#include "gromacs/mdtypes/simulation_workload.h"
#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/gpu_common.h"
#include "gromacs/nbnxm/gpu_common_utils.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/nbnxm_gpu.h"
#include "gromacs/nbnxm/pairlist.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/timing/gpu_timing.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
#include "nbnxm_ocl_internal.h"
#include "nbnxm_ocl_types.h"
Define OpenCL implementation of nbnxm_gpu.h.
TODO (psz):
Functions | |
static void | Nbnxm::validate_global_work_size (const KernelLaunchConfig &config, int work_dim, const gmx_device_info_t *dinfo) |
Validates the input global work size parameter. | |
static cl_kernel | Nbnxm::selectPruneKernel (cl_kernel kernel_pruneonly[], bool firstPrunePass) |
Return a pointer to the prune kernel version to be executed at the current invocation. More... | |
static cl_kernel | Nbnxm::select_nbnxn_kernel (gmx_nbnxn_ocl_t *nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune) |
Return a pointer to the kernel version to be executed at the current step. OpenCL kernel objects are cached in nb. If the requested kernel is not found in the cache, it will be created and the cache will be updated. | |
static int | Nbnxm::calc_shmem_required_nonbonded (int vdwType, bool bPrefetchLjParam) |
Calculates the amount of shared memory required by the nonbonded kernel in use. | |
static void | Nbnxm::fillin_ocl_structures (cl_nbparam_t *nbp, cl_nbparam_params_t *nbparams_params) |
Initializes data structures that are going to be sent to the OpenCL device. More... | |
static void | Nbnxm::sync_ocl_event (cl_command_queue stream, cl_event *ocl_event) |
Enqueues a wait for event completion. More... | |
void | Nbnxm::gpu_copy_xq_to_gpu (gmx_nbnxn_ocl_t *nb, const nbnxn_atomdata_t *nbatom, const AtomLocality atomLocality) |
Launch asynchronously the xq buffer host to device copy. | |
void | Nbnxm::gpu_launch_kernel (gmx_nbnxn_ocl_t *nb, const gmx::StepWorkload &stepWork, const Nbnxm::InteractionLocality iloc) |
Launch GPU kernel. More... | |
static int | Nbnxm::calc_shmem_required_prune (const int num_threads_z) |
Calculates the amount of shared memory required by the prune kernel. More... | |
void | Nbnxm::gpu_launch_kernel_pruneonly (gmx_nbnxn_gpu_t *nb, const InteractionLocality iloc, const int numParts) |
Launch the pairlist prune only kernel for the given locality. numParts tells in how many parts, i.e. calls the list will be pruned. | |
void | Nbnxm::gpu_launch_cpyback (gmx_nbnxn_ocl_t *nb, struct nbnxn_atomdata_t *nbatom, const gmx::StepWorkload &stepWork, const AtomLocality aloc) |
Launch asynchronously the download of nonbonded forces from the GPU (and energies/shift forces if required). | |
int | Nbnxm::nbnxn_gpu_pick_ewald_kernel_type (const interaction_const_t &ic) |
Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. | |
Variables | |
static const char * | Nbnxm::nb_kfunc_noener_noprune_ptr [eelOclNR][evdwOclNR] |
Force-only kernel function names. | |
static const char * | Nbnxm::nb_kfunc_ener_noprune_ptr [eelOclNR][evdwOclNR] |
Force + energy kernel function pointers. | |
static const char * | Nbnxm::nb_kfunc_noener_prune_ptr [eelOclNR][evdwOclNR] |
Force + pruning kernel function pointers. | |
static const char * | Nbnxm::nb_kfunc_ener_prune_ptr [eelOclNR][evdwOclNR] |
Force + energy + pruning kernel function pointers. | |
static const int | Nbnxm::c_numClPerSupercl = 8 |
Convenience constants. | |
static const int | Nbnxm::c_clSize = c_nbnxnGpuClusterSize |