Implements PME force gathering in SYCL.
- Author
- Andrey Alekseenko al42a.nosp@m.nd@g.nosp@m.mail..nosp@m.com
|
#define | INSTANTIATE_3(order, numGrids, readGlobal, threadsPerAtom, subGroupSize) template class PmeGatherKernel<order, true, true, numGrids, readGlobal, threadsPerAtom, subGroupSize>; |
| Kernel instantiations.
|
|
#define | INSTANTIATE_2(order, numGrids, threadsPerAtom, subGroupSize) |
|
#define | INSTANTIATE(order, subGroupSize) |
|
|
float | readGridSize (const float *realGridSizeFP, const int dimIndex) |
| Use loads from constant address space indexed by constant offsets rather than dynamic index-based accesses to the grid size data to avoid local memory operations and related large overhead. More...
|
|
template<int order, int atomDataSize, int workGroupSize, int subGroupSize> |
void | reduceAtomForces (sycl::nd_item< 3 > itemIdx, sycl::local_ptr< Float3 > sm_forces, const int atomIndexLocal, const int splineIndex, const int gmx_unused lineIndex, const float realGridSizeFP[3], float &fx, float &fy, float &fz) |
| Reduce the partial force contributions. More...
|
|
template<int order, int atomsPerWarp, bool wrapX, bool wrapY> |
void | sumForceComponents (sycl::private_ptr< float > fx, sycl::private_ptr< float > fy, sycl::private_ptr< float > fz, const int ithyMin, const int ithyMax, const int ixBase, const int iz, const int nx, const int ny, const int pny, const int pnz, const int atomIndexLocal, const int splineIndexBase, const sycl::float2 tdz, const sycl::local_ptr< int > sm_gridlineIndices, const sycl::local_ptr< float > sm_theta, const sycl::local_ptr< float > sm_dtheta, const sycl::global_ptr< const float > gm_grid) |
| Calculate the sum of the force partial components (in X, Y and Z) More...
|
|
void | calculateAndStoreGridForces (sycl::local_ptr< Float3 > sm_forces, const int forceIndexLocal, const int forceIndexGlobal, const Float3 &recipBox0, const Float3 &recipBox1, const Float3 &recipBox2, const float scale, const sycl::global_ptr< const float > gm_coefficients) |
| Calculate the grid forces and store them in shared memory. More...
|
|
template<int order, bool wrapX, bool wrapY, int numGrids, bool readGlobal, ThreadsPerAtom threadsPerAtom, int subGroupSize> |
auto | pmeGatherKernel (sycl::handler &cgh, const int nAtoms, const float *__restrict__ gm_gridA, const float *__restrict__ gm_gridB, const float *__restrict__ gm_coefficientsA, const float *__restrict__ gm_coefficientsB, const Float3 *__restrict__ gm_coordinates, Float3 *__restrict__ gm_forces, const float *__restrict__ gm_theta, const float *__restrict__ gm_dtheta, const int *__restrict__ gm_gridlineIndices, const float *__restrict__ gm_fractShiftsTable, const int *__restrict__ gm_gridlineIndicesTable, const gmx::IVec tablesOffsets, const gmx::IVec realGridSize, const gmx::RVec realGridSizeFP, const gmx::IVec realGridSizePadded, const gmx::RVec currentRecipBox0, const gmx::RVec currentRecipBox1, const gmx::RVec currentRecipBox2, const float scale) |
| A SYCL kernel which gathers the atom forces from the grid. The grid is assumed to be wrapped in dimension Z. More...
|
|
template<int order, bool wrapX, bool wrapY, int numGrids, bool readGlobal, ThreadsPerAtom threadsPerAtom, int subGroupSize>
auto pmeGatherKernel |
( |
sycl::handler & |
cgh, |
|
|
const int |
nAtoms, |
|
|
const float *__restrict__ |
gm_gridA, |
|
|
const float *__restrict__ |
gm_gridB, |
|
|
const float *__restrict__ |
gm_coefficientsA, |
|
|
const float *__restrict__ |
gm_coefficientsB, |
|
|
const Float3 *__restrict__ |
gm_coordinates, |
|
|
Float3 *__restrict__ |
gm_forces, |
|
|
const float *__restrict__ |
gm_theta, |
|
|
const float *__restrict__ |
gm_dtheta, |
|
|
const int *__restrict__ |
gm_gridlineIndices, |
|
|
const float *__restrict__ |
gm_fractShiftsTable, |
|
|
const int *__restrict__ |
gm_gridlineIndicesTable, |
|
|
const gmx::IVec |
tablesOffsets, |
|
|
const gmx::IVec |
realGridSize, |
|
|
const gmx::RVec |
realGridSizeFP, |
|
|
const gmx::IVec |
realGridSizePadded, |
|
|
const gmx::RVec |
currentRecipBox0, |
|
|
const gmx::RVec |
currentRecipBox1, |
|
|
const gmx::RVec |
currentRecipBox2, |
|
|
const float |
scale |
|
) |
| |
A SYCL kernel which gathers the atom forces from the grid. The grid is assumed to be wrapped in dimension Z.
- Template Parameters
-
order | PME interpolation order. |
wrapX | A boolean which tells if the grid overlap in dimension X should be wrapped. |
wrapY | A boolean which tells if the grid overlap in dimension Y should be wrapped. |
numGrids | The number of grids to use in the kernel. Can be 1 or 2. |
readGlobal | Tells if we should read spline values from global memory. |
threadsPerAtom | How many threads work on each atom. |
subGroupSize | Size of the sub-group. |