|
static Float2 | Nbnxm::convertSigmaEpsilonToC6C12 (const float sigma, const float epsilon) |
| Convert sigma and epsilon VdW parameters to c6 ,c12 pair.
|
|
template<bool doCalcEnergies> |
static void | Nbnxm::ljForceSwitch (const shift_consts_t dispersionShift, const shift_consts_t repulsionShift, const float rVdwSwitch, const float c6, const float c12, const float rInv, const float r2, sycl::private_ptr< float > fInvR, sycl::private_ptr< float > eLJ) |
| Calculate force and energy for a pair of atoms, VdW force-switch flavor.
|
|
template<enum VdwType vdwType> |
static float | Nbnxm::calculateLJEwaldC6Grid (const sycl::global_ptr< const Float2 > a_nbfpComb, const int typeI, const int typeJ) |
| Fetch C6 grid contribution coefficients and return the product of these.
|
|
template<bool doCalcEnergies, enum VdwType vdwType> |
static void | Nbnxm::ljEwaldComb (const sycl::global_ptr< const Float2 > a_nbfpComb, const float sh_lj_ewald, const int typeI, const int typeJ, const float r2, const float r2Inv, const float lje_coeff2, const float lje_coeff6_6, const float int_bit, sycl::private_ptr< float > fInvR, sycl::private_ptr< float > eLJ) |
| Calculate LJ-PME grid force contribution with geometric or LB combination rule.
|
|
template<bool doCalcEnergies> |
static void | Nbnxm::ljPotentialSwitch (const switch_consts_t vdwSwitch, const float rVdwSwitch, const float rInv, const float r2, sycl::private_ptr< float > fInvR, sycl::private_ptr< float > eLJ) |
| Apply potential switch.
|
|
static float | Nbnxm::pmeCorrF (const float z2) |
| Calculate analytical Ewald correction term.
|
|
template<typename T > |
static T | Nbnxm::lerp (T d0, T d1, T t) |
| Linear interpolation using exactly two FMA operations. More...
|
|
static float | Nbnxm::interpolateCoulombForceR (const sycl::global_ptr< const float > a_coulombTab, const float coulombTabScale, const float r) |
| Interpolate Ewald coulomb force correction using the F*r table.
|
|
static void | Nbnxm::reduceForceJShuffle (Float3 f, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int aidx, sycl::global_ptr< Float3 > a_f) |
| Reduce c_clSize j-force components using shifts and atomically accumulate into a_f. More...
|
|
template<int subGroupSize, int groupSize> |
static float | Nbnxm::groupReduce (const sycl::nd_item< 3 > itemIdx, const unsigned int tidxi, sycl::local_ptr< float > sm_buf, float valueToReduce) |
| Do workgroup-level reduction of a single float . More...
|
|
static void | Nbnxm::reduceForceJGeneric (sycl::local_ptr< float > sm_buf, Float3 f, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int aidx, sycl::global_ptr< Float3 > a_f) |
| Reduce c_clSize j-force components using local memory and atomically accumulate into a_f. More...
|
|
template<bool useShuffleReduction> |
static void | Nbnxm::reduceForceJ (sycl::local_ptr< float > sm_buf, Float3 f, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int aidx, sycl::global_ptr< Float3 > a_f) |
| Reduce c_clSize j-force components using either shifts or local memory and atomically accumulate into a_f.
|
|
static void | Nbnxm::reduceForceIAndFShiftGeneric (sycl::local_ptr< float > sm_buf, const float fCiBufX[c_nbnxnGpuNumClusterPerSupercluster], const float fCiBufY[c_nbnxnGpuNumClusterPerSupercluster], const float fCiBufZ[c_nbnxnGpuNumClusterPerSupercluster], const bool calcFShift, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int sci, const int shift, sycl::global_ptr< Float3 > a_f, sycl::global_ptr< Float3 > a_fShift) |
| Local memory-based i-force reduction. More...
|
|
template<int numShuffleReductionSteps> |
static void | Nbnxm::reduceForceIAndFShiftShuffles (const float fCiBufX[c_nbnxnGpuNumClusterPerSupercluster], const float fCiBufY[c_nbnxnGpuNumClusterPerSupercluster], const float fCiBufZ[c_nbnxnGpuNumClusterPerSupercluster], const bool calcFShift, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int sci, const int shift, sycl::global_ptr< Float3 > a_f, sycl::global_ptr< Float3 > a_fShift) |
| Shuffle-based i-force reduction. More...
|
|
template<> |
void | Nbnxm::reduceForceIAndFShiftShuffles< 1 > (const float fCiBufX[c_nbnxnGpuNumClusterPerSupercluster], const float fCiBufY[c_nbnxnGpuNumClusterPerSupercluster], const float fCiBufZ[c_nbnxnGpuNumClusterPerSupercluster], const bool calcFShift, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int sci, const int shift, sycl::global_ptr< Float3 > a_f, sycl::global_ptr< Float3 > a_fShift) |
| reduceForceIAndFShiftShuffles specialization for single-step reduction (e.g., Intel iGPUs). More...
|
|
template<bool useShuffleReduction, int subGroupSize> |
static void | Nbnxm::reduceForceIAndFShift (sycl::local_ptr< float > sm_buf, const float fCiBufX[c_nbnxnGpuNumClusterPerSupercluster], const float fCiBufY[c_nbnxnGpuNumClusterPerSupercluster], const float fCiBufZ[c_nbnxnGpuNumClusterPerSupercluster], const bool calcFShift, const sycl::nd_item< 3 > itemIdx, const int tidxi, const int tidxj, const int sci, const int shift, sycl::global_ptr< Float3 > a_f, sycl::global_ptr< Float3 > a_fShift) |
| Final i-force reduction. More...
|
|
template<int subGroupSize, bool doPruneNBL, bool doCalcEnergies, enum ElecType elecType, enum VdwType vdwType> |
static auto | Nbnxm::nbnxmKernel (sycl::handler &cgh, DeviceAccessor< Float4, mode::read > a_xq, DeviceAccessor< Float3, mode::read_write > a_f, DeviceAccessor< Float3, mode::read > a_shiftVec, DeviceAccessor< Float3, mode::read_write > a_fShift, OptionalAccessor< float, mode::read_write, doCalcEnergies > a_energyElec, OptionalAccessor< float, mode::read_write, doCalcEnergies > a_energyVdw, DeviceAccessor< nbnxn_cj_packed_t, doPruneNBL?mode::read_write:mode::read > a_plistCJPacked, DeviceAccessor< nbnxn_sci_t, mode::read > a_plistSci, DeviceAccessor< nbnxn_excl_t, mode::read > a_plistExcl, OptionalAccessor< Float2, mode::read, ljComb< vdwType >> a_ljComb, OptionalAccessor< int, mode::read,!ljComb< vdwType >> a_atomTypes, OptionalAccessor< Float2, mode::read,!ljComb< vdwType >> a_nbfp, OptionalAccessor< Float2, mode::read, ljEwald< vdwType >> a_nbfpComb, OptionalAccessor< float, mode::read, elecEwaldTab< elecType >> a_coulombTab, const int numTypes, const float rCoulombSq, const float rVdwSq, const float twoKRf, const float ewaldBeta, const float rlistOuterSq, const float ewaldShift, const float epsFac, const float ewaldCoeffLJ_2, const float cRF, const shift_consts_t dispersionShift, const shift_consts_t repulsionShift, const switch_consts_t vdwSwitch, const float rVdwSwitch, const float ljEwaldShift, const float coulombTabScale, const bool calcShift) |
| Main kernel for NBNXM. More...
|
|
template<int subGroupSize, bool doPruneNBL, bool doCalcEnergies, enum ElecType elecType, enum VdwType vdwType, class... Args> |
static void | Nbnxm::launchNbnxmKernel (const DeviceStream &deviceStream, const int numSci, Args &&...args) |
| NBNXM kernel launch code.
|
|
template<int subGroupSize, bool doPruneNBL, bool doCalcEnergies, class... Args> |
void | Nbnxm::chooseAndLaunchNbnxmKernel (enum ElecType elecType, enum VdwType vdwType, Args &&...args) |
| Select templated kernel and launch it.
|
|
template<int subGroupSize, bool doPruneNBL, bool doCalcEnergies> |
void | Nbnxm::launchNbnxmKernelHelper (NbnxmGpu *nb, const gmx::StepWorkload &stepWork, const InteractionLocality iloc) |
|