Implements helper routines for PME gather and spline routines.
- Author
- Andrey Alekseenko al42a.nosp@m.nd@g.nosp@m.mail..nosp@m.com
|
template<typename T > |
void | anonymous_namespace{pme_gpu_calculate_splines_sycl.h}::assertIsFinite (T arg) |
| Asserts if the argument is finite. More...
|
|
template<> |
void | anonymous_namespace{pme_gpu_calculate_splines_sycl.h}::assertIsFinite (Float3 gmx_unused arg) |
|
template<typename T > |
void | anonymous_namespace{pme_gpu_calculate_splines_sycl.h}::assertIsFinite (T gmx_unused arg) |
|
template<int order, int atomsPerSubGroup> |
static int | getSplineParamIndexBase (int subGroupIndex, int atomSubGroupIndex) |
| Gets a base of the unique index to an element in a spline parameter buffer (theta/dtheta), which is laid out for GPU spread/gather kernels. The base only corresponds to the atom index within the execution block. Feed the result into getSplineParamIndex() to get a full index. TODO: it's likely that both parameters can be just replaced with a single atom index, as they are derived from it. Do that, verifying that the generated code is not bloated, and/or revise the spline indexing scheme. Removing warp dependency would also be nice (and would probably coincide with removing c_pmeSpreadGatherAtomsPerWarp). More...
|
|
template<int order, int atomsPerSubGroup> |
static int | getSplineParamIndex (int paramIndexBase, int dimIndex, int splineIndex) |
| Gets a unique index to an element in a spline parameter buffer (theta/dtheta), which is laid out for GPU spread/gather kernels. The index is wrt to the execution block, in range(0, atomsPerBlock * order * DIM). This function consumes result of getSplineParamIndexBase() and adjusts it for dimIndex and splineIndex . More...
|
|
static bool | pmeGpuCheckAtomCharge (const float charge) |
| An inline function for skipping the zero-charge atoms when we have c_skipNeutralAtoms set to true . More...
|
|
template<typename T , int atomsPerWorkGroup, int dataCountPerAtom> |
static void | pmeGpuStageAtomData (sycl::local_ptr< T > sm_destination, const sycl::global_ptr< const T > gm_source, sycl::nd_item< 3 > itemIdx) |
| General purpose function for loading atom-related data from global to shared memory. More...
|
|
template<int order, int atomsPerBlock, int atomsPerWarp, bool writeSmDtheta, bool writeGlobal, int numGrids, int subGroupSize> |
static void | calculateSplines (const int atomIndexOffset, const Float3 atomX, const float atomCharge, const gmx::IVec tablesOffsets, const gmx::RVec realGridSizeFP, const gmx::RVec currentRecipBox0, const gmx::RVec currentRecipBox1, const gmx::RVec currentRecipBox2, sycl::global_ptr< float > gm_theta, sycl::global_ptr< float > gm_dtheta, sycl::global_ptr< int > gm_gridlineIndices, const sycl::global_ptr< const float > gm_fractShiftsTable, const sycl::global_ptr< const int > gm_gridlineIndicesTable, sycl::local_ptr< float > sm_theta, sycl::local_ptr< float > sm_dtheta, sycl::local_ptr< int > sm_gridlineIndices, sycl::local_ptr< float > sm_fractCoords, sycl::nd_item< 3 > itemIdx) |
| PME GPU spline parameter and gridline indices calculation. This corresponds to the CPU functions calc_interpolation_idx() and make_bsplines(). First stage of the whole kernel. More...
|
|
template<int order, int atomsPerBlock, int atomsPerWarp, bool writeSmDtheta, bool writeGlobal, int numGrids, int subGroupSize>
static void calculateSplines |
( |
const int |
atomIndexOffset, |
|
|
const Float3 |
atomX, |
|
|
const float |
atomCharge, |
|
|
const gmx::IVec |
tablesOffsets, |
|
|
const gmx::RVec |
realGridSizeFP, |
|
|
const gmx::RVec |
currentRecipBox0, |
|
|
const gmx::RVec |
currentRecipBox1, |
|
|
const gmx::RVec |
currentRecipBox2, |
|
|
sycl::global_ptr< float > |
gm_theta, |
|
|
sycl::global_ptr< float > |
gm_dtheta, |
|
|
sycl::global_ptr< int > |
gm_gridlineIndices, |
|
|
const sycl::global_ptr< const float > |
gm_fractShiftsTable, |
|
|
const sycl::global_ptr< const int > |
gm_gridlineIndicesTable, |
|
|
sycl::local_ptr< float > |
sm_theta, |
|
|
sycl::local_ptr< float > |
sm_dtheta, |
|
|
sycl::local_ptr< int > |
sm_gridlineIndices, |
|
|
sycl::local_ptr< float > |
sm_fractCoords, |
|
|
sycl::nd_item< 3 > |
itemIdx |
|
) |
| |
|
inlinestatic |
PME GPU spline parameter and gridline indices calculation. This corresponds to the CPU functions calc_interpolation_idx() and make_bsplines(). First stage of the whole kernel.
- Template Parameters
-
order | PME interpolation order. |
atomsPerBlock | Number of atoms processed by a block - should be accounted for in the sizes of the shared memory arrays. |
atomsPerWarp | Number of atoms processed by a warp |
writeSmDtheta | Bool controlling if the theta derivative should be written to shared memory. Enables calculation of dtheta if set. |
writeGlobal | A boolean which tells if the theta values and gridlines should be written to global memory. Enables calculation of dtheta if set. |
numGrids | The number of grids using the splines. |
subGroupSize | The size of a sub-group (warp). |
- Parameters
-
[in] | atomIndexOffset | Starting atom index for the execution block in the global memory. |
[in] | atomX | Coordinates of atom processed by thread. |
[in] | atomCharge | Charge/coefficient of atom processed by thread. |
[in] | tablesOffsets | Offsets for X/Y/Z components of gm_fractShiftsTable and gm_gridlineIndicesTable . |
[in] | realGridSizeFP | Real-space grid dimensions, converted to floating point. |
[in] | currentRecipBox0 | Current reciprocal (inverted unit cell) box, vector 1. |
[in] | currentRecipBox1 | Current reciprocal (inverted unit cell) box, vector 2. |
[in] | currentRecipBox2 | Current reciprocal (inverted unit cell) box, vector 3. |
[out] | gm_theta | Atom spline values in the global memory. Used only if writeGlobal is true . |
[out] | gm_dtheta | Derivatives of atom spline values in the global memory. Used only if writeGlobal is true . |
[out] | gm_gridlineIndices | Atom gridline indices in the global memory. Used only if writeGlobal is true . |
[in] | gm_fractShiftsTable | Fractional shifts lookup table in the global memory. |
[in] | gm_gridlineIndicesTable | Gridline indices lookup table in the global memory. |
[out] | sm_theta | Atom spline values in the local memory. |
[out] | sm_dtheta | Derivatives of atom spline values in the local memory. |
[out] | sm_gridlineIndices | Atom gridline indices in the local memory. |
[out] | sm_fractCoords | Fractional coordinates in the local memory. |
[in] | itemIdx | SYCL thread ID. |
template<int order, int atomsPerSubGroup>
static int getSplineParamIndex |
( |
int |
paramIndexBase, |
|
|
int |
dimIndex, |
|
|
int |
splineIndex |
|
) |
| |
|
inlinestatic |
Gets a unique index to an element in a spline parameter buffer (theta/dtheta), which is laid out for GPU spread/gather kernels. The index is wrt to the execution block, in range(0, atomsPerBlock * order * DIM). This function consumes result of getSplineParamIndexBase() and adjusts it for dimIndex
and splineIndex
.
- Template Parameters
-
order | PME order |
atomsPerSubGroup | Number of atoms processed by a sub group |
- Parameters
-
[in] | paramIndexBase | Must be result of getSplineParamIndexBase(). |
[in] | dimIndex | Dimension index (from 0 to 2) |
[in] | splineIndex | Spline contribution index (from 0 to order - 1) |
- Returns
- Index into theta or dtheta array using GPU layout.
template<int order, int atomsPerSubGroup>
static int getSplineParamIndexBase |
( |
int |
subGroupIndex, |
|
|
int |
atomSubGroupIndex |
|
) |
| |
|
inlinestatic |
Gets a base of the unique index to an element in a spline parameter buffer (theta/dtheta), which is laid out for GPU spread/gather kernels. The base only corresponds to the atom index within the execution block. Feed the result into getSplineParamIndex() to get a full index. TODO: it's likely that both parameters can be just replaced with a single atom index, as they are derived from it. Do that, verifying that the generated code is not bloated, and/or revise the spline indexing scheme. Removing warp dependency would also be nice (and would probably coincide with removing c_pmeSpreadGatherAtomsPerWarp).
- Template Parameters
-
order | PME order |
atomsPerSubGroup | Number of atoms processed by a sub group |
- Parameters
-
[in] | subGroupIndex | Sub group index in the work group. |
[in] | atomSubGroupIndex | Atom index in the sub group (from 0 to atomsPerSubGroup - 1). |
- Returns
- Index into theta or dtheta array using GPU layout.