Gromacs  2025.0-dev-20241009-5c23d5f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Classes | Macros | Typedefs | Functions
nbnxm_sycl_kernel_body.h File Reference
#include "gromacs/gpu_utils/device_utils_hip_sycl.h"
#include "gromacs/gpu_utils/devicebuffer.h"
#include "gromacs/gpu_utils/gmxsycl.h"
#include "gromacs/math/functions.h"
#include "gromacs/mdtypes/simulation_workload.h"
#include "gromacs/nbnxm/nbnxm_enums.h"
#include "gromacs/nbnxm/nbnxm_kernel_utils.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/utility/template_mp.h"
#include "nbnxm_sycl_kernel.h"
#include "nbnxm_sycl_kernel_utils.h"
#include "nbnxm_sycl_types.h"
+ Include dependency graph for nbnxm_sycl_kernel_body.h:
+ This graph shows which files directly or indirectly include this file:

Description

NBNXM SYCL kernels.

Classes

class  gmx::NbnxmKernel< doPruneNBL, doCalcEnergies, elecType, vdwType, subGroupSize >
 Class name for NBNXM kernel. More...
 

Macros

#define GMX_NBNXM_ENABLE_PACKED_FLOAT3   1
 Macro to control the enablement of manually-packed Float3 structure. More...
 

Typedefs

using gmx::FCiFloat3 = Float3
 

Functions

constexpr bool gmx::c_avoidFloatingPointAtomics (PairlistType layoutType)
 Should we avoid FP atomics to the same location from the same work-group? More...
 
static void gmx::reduceForceJShuffle (Float3 f, const sycl::nd_item< 3 > &itemIdx, const int tidxi, const int aidx, sycl::global_ptr< Float3 > a_f)
 Reduce c_clSize j-force components using shifts and atomically accumulate into a_f. More...
 
template<int subGroupSize, int groupSize>
static float gmx::groupReduce (const sycl::nd_item< 3 > itemIdx, const unsigned int tidxi, sycl::local_ptr< float > sm_buf, float valueToReduce)
 Do workgroup-level reduction of a single float. More...
 
static void gmx::reduceForceJGeneric (sycl::local_ptr< float > sm_buf, Float3 f, const sycl::nd_item< 3 > &itemIdx, const int tidxi, const int tidxj, const int aidx, sycl::global_ptr< Float3 > a_f)
 Reduce c_clSize j-force components using local memory and atomically accumulate into a_f. More...
 
template<bool useShuffleReduction>
static void gmx::reduceForceJ (sycl::local_ptr< float > sm_buf, Float3 f, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int aidx, sycl::global_ptr< Float3 > a_f)
 Reduce c_clSize j-force components using either shifts or local memory and atomically accumulate into a_f.
 
template<typename FCiBufferWrapperX , typename FCiBufferWrapperY , typename FCiBufferWrapperZ >
static void gmx::reduceForceIAndFShiftGeneric (sycl::local_ptr< float > sm_buf, const FCiBufferWrapperX &fCiBufX, const FCiBufferWrapperY &fCiBufY, const FCiBufferWrapperZ &fCiBufZ, const bool calcFShift, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int sci, const int shift, sycl::global_ptr< Float3 > a_f, sycl::global_ptr< Float3 > a_fShift)
 Local memory-based i-force reduction. More...
 
template<int numShuffleReductionSteps, typename FCiBufferWrapperX , typename FCiBufferWrapperY , typename FCiBufferWrapperZ >
static std::enable_if_t
< numShuffleReductionSteps!=1,
void > 
gmx::reduceForceIAndFShiftShuffles (const FCiBufferWrapperX &fCiBufX, const FCiBufferWrapperY &fCiBufY, const FCiBufferWrapperZ &fCiBufZ, const bool calcFShift, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int sci, const int shift, sycl::global_ptr< Float3 > a_f, sycl::global_ptr< Float3 > a_fShift)
 Shuffle-based i-force reduction. More...
 
template<int numShuffleReductionSteps, typename FCiBufferWrapperX , typename FCiBufferWrapperY , typename FCiBufferWrapperZ >
static std::enable_if_t
< numShuffleReductionSteps==1,
void > 
gmx::reduceForceIAndFShiftShuffles (const FCiBufferWrapperX &fCiBufX, const FCiBufferWrapperY &fCiBufY, const FCiBufferWrapperZ &fCiBufZ, const bool calcFShift, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int sci, const int shift, sycl::global_ptr< Float3 > a_f, sycl::global_ptr< Float3 > a_fShift)
 reduceForceIAndFShiftShuffles specialization for single-step reduction (e.g., Intel iGPUs). More...
 
template<bool useShuffleReduction, int subGroupSize, typename FCiBufferWrapperX , typename FCiBufferWrapperY , typename FCiBufferWrapperZ >
static void gmx::reduceForceIAndFShift (sycl::local_ptr< float > sm_buf, const FCiBufferWrapperX &fCiBufX, const FCiBufferWrapperY &fCiBufY, const FCiBufferWrapperZ &fCiBufZ, const bool calcFShift, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int sci, const int shift, sycl::global_ptr< Float3 > a_f, sycl::global_ptr< Float3 > a_fShift)
 Final i-force reduction. More...
 
template<int subGroupSize, bool doPruneNBL, bool doCalcEnergies, enum ElecType elecType, enum VdwType vdwType>
static auto gmx::nbnxmKernel (sycl::handler &cgh, const Float4 *__restrict__ gm_xq, Float3 *__restrict__ gm_f, const Float3 *__restrict__ gm_shiftVec, Float3 *__restrict__ gm_fShift, float *__restrict__ gm_energyElec, float *__restrict__ gm_energyVdw, nbnxn_cj_packed_t *__restrict__ gm_plistCJPacked, const nbnxn_sci_t *__restrict__ gm_plistSci, const nbnxn_excl_t *__restrict__ gm_plistExcl, const Float2 *__restrict__ gm_ljComb, const int *__restrict__ gm_atomTypes, const Float2 *__restrict__ gm_nbfp, const Float2 *__restrict__ gm_nbfpComb, const float *__restrict__ gm_coulombTab, int *__restrict__ gm_sciHistogram, int *__restrict__ gm_sciCount, const int numTypes, const float rCoulombSq, const float rVdwSq, const float twoKRf, const float ewaldBeta, const float rlistOuterSq, const float ewaldShift, const float epsFac, const float ewaldCoeffLJ_2, const float cRF, const shift_consts_t dispersionShift, const shift_consts_t repulsionShift, const switch_consts_t vdwSwitch, const float rVdwSwitch, const float ljEwaldShift, const float coulombTabScale, const bool calcShift)
 Main kernel for NBNXM. More...
 
template<int subGroupSize, bool doPruneNBL, bool doCalcEnergies, enum ElecType elecType, enum VdwType vdwType, class... Args>
static void gmx::launchNbnxmKernel (const DeviceStream &deviceStream, const int numSci, Args &&...args)
 NBNXM kernel launch code.
 
template<int subGroupSize, bool doPruneNBL, bool doCalcEnergies, class... Args>
void gmx::chooseAndLaunchNbnxmKernel (enum ElecType elecType, enum VdwType vdwType, Args &&...args)
 Select templated kernel and launch it.
 
template<bool isMultiGpuBoard, bool doPruneNBL, bool doCalcEnergies>
void gmx::launchNbnxmKernelHelper (NbnxmGpu *nb, const StepWorkload &stepWork, InteractionLocality iloc)
 

Macro Definition Documentation

#define GMX_NBNXM_ENABLE_PACKED_FLOAT3   1

Macro to control the enablement of manually-packed Float3 structure.

If enabled (default), the explicit packed math will be used on devices where it is known to be beneficial (currently, AMD MI250X / gfx90a). If disabled, packed math will never be explicitly used.

This only controls the use of AmdPackedFloat3 datastructure, not the layout of fCi buffer.

See issue #4854