#include "config.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <algorithm>
#include "impl_reference_definitions.h"
#include "impl_reference_simd_double.h"

Include dependency graph for impl_reference_util_double.h:

This graph shows which files directly or indirectly include this file:

Description

Reference impl., higher-level double prec. SIMD utility functions.

Author: Erik Lindahl erik..nosp@m.lind.nosp@m.ahl@s.nosp@m.cili.nosp@m.felab.nosp@m..se

Higher-level SIMD utility functions, double precision.
These include generic functions to work with triplets of data, typically coordinates, and a few utility functions to load and update data in the nonbonded kernels. These functions should be available on all implementations.
static const int	gmx::c_simdBestPairAlignmentDouble = 2
	Best alignment to use for aligned pairs of double data. More...

template<int align>
static void gmx_simdcall	gmx::gatherLoadTranspose (const double base, const std::int32_t offset[], SimdDouble v0, SimdDouble v1, SimdDouble v2, SimdDouble *v3)
	Load 4 consecutive double from each of GMX_SIMD_DOUBLE_WIDTH offsets, and transpose into 4 SIMD double variables. More...

template<int align>
static void gmx_simdcall	gmx::gatherLoadTranspose (const double base, const std::int32_t offset[], SimdDouble v0, SimdDouble *v1)
	Load 2 consecutive double from each of GMX_SIMD_DOUBLE_WIDTH offsets, and transpose into 2 SIMD double variables. More...

template<int align>
static void gmx_simdcall	gmx::gatherLoadUTranspose (const double base, const std::int32_t offset[], SimdDouble v0, SimdDouble v1, SimdDouble v2)
	Load 3 consecutive doubles from each of GMX_SIMD_DOUBLE_WIDTH offsets, and transpose into 3 SIMD double variables. More...

template<int align>
static void gmx_simdcall	gmx::transposeScatterStoreU (double *base, const std::int32_t offset[], SimdDouble v0, SimdDouble v1, SimdDouble v2)
	Transpose and store 3 SIMD doubles to 3 consecutive addresses at GMX_SIMD_DOUBLE_WIDTH offsets. More...

template<int align>
static void gmx_simdcall	gmx::transposeScatterIncrU (double *base, const std::int32_t offset[], SimdDouble v0, SimdDouble v1, SimdDouble v2)
	Transpose and add 3 SIMD doubles to 3 consecutive addresses at GMX_SIMD_DOUBLE_WIDTH offsets. More...

template<int align>
static void gmx_simdcall	gmx::transposeScatterDecrU (double *base, const std::int32_t offset[], SimdDouble v0, SimdDouble v1, SimdDouble v2)
	Transpose and subtract 3 SIMD doubles to 3 consecutive addresses at GMX_SIMD_DOUBLE_WIDTH offsets. More...

static void gmx_simdcall	gmx::expandScalarsToTriplets (SimdDouble scalar, SimdDouble triplets0, SimdDouble triplets1, SimdDouble *triplets2)
	Expand each element of double SIMD variable into three identical consecutive elements in three SIMD outputs. More...

template<int align>
static void gmx_simdcall	gmx::gatherLoadBySimdIntTranspose (const double base, SimdDInt32 offset, SimdDouble v0, SimdDouble v1, SimdDouble v2, SimdDouble *v3)
	Load 4 consecutive doubles from each of GMX_SIMD_DOUBLE_WIDTH offsets specified by a SIMD integer, transpose into 4 SIMD double variables. More...

template<int align>
static void gmx_simdcall	gmx::gatherLoadUBySimdIntTranspose (const double base, SimdDInt32 offset, SimdDouble v0, SimdDouble *v1)
	Load 2 consecutive doubles from each of GMX_SIMD_DOUBLE_WIDTH offsets (unaligned) specified by SIMD integer, transpose into 2 SIMD doubles. More...

template<int align>
static void gmx_simdcall	gmx::gatherLoadBySimdIntTranspose (const double base, SimdDInt32 offset, SimdDouble v0, SimdDouble *v1)
	Load 2 consecutive doubles from each of GMX_SIMD_DOUBLE_WIDTH offsets specified by a SIMD integer, transpose into 2 SIMD double variables. More...

static double gmx_simdcall	gmx::reduceIncr4ReturnSum (double *m, SimdDouble v0, SimdDouble v1, SimdDouble v2, SimdDouble v3)
	Reduce each of four SIMD doubles, add those values to four consecutive doubles in memory, return sum. More...

Functions
Higher-level SIMD utilities accessing partial (half-width) SIMD doubles.
See the single-precision versions for documentation. Since double precision is typically half the width of single, this double version is likely only useful with 512-bit and larger implementations.
static SimdDouble gmx_simdcall	gmx::loadDualHsimd (const double m0, const double m1)
	Load low & high parts of SIMD double from different locations. More...

static SimdDouble gmx_simdcall	gmx::loadDuplicateHsimd (const double *m)
	Load half-SIMD-width double data, spread to both halves. More...

static SimdDouble gmx_simdcall	gmx::loadU1DualHsimd (const double *m)
	Load two doubles, spread 1st in low half, 2nd in high half. More...

static void gmx_simdcall	gmx::storeDualHsimd (double m0, double m1, SimdDouble a)
	Store low & high parts of SIMD double to different locations. More...

static void gmx_simdcall	gmx::incrDualHsimd (double m0, double m1, SimdDouble a)
	Add each half of SIMD variable to separate memory adresses. More...

static void gmx_simdcall	gmx::decr3Hsimd (double *m, SimdDouble a0, SimdDouble a1, SimdDouble a2)
	Add the two halves of three SIMD doubles, subtract the sum from three half-SIMD-width consecutive doubles in memory. More...

template<int align>
static void gmx_simdcall	gmx::gatherLoadTransposeHsimd (const double base0, const double base1, std::int32_t offset[], SimdDouble v0, SimdDouble v1)
	Load 2 consecutive doubles from each of GMX_SIMD_DOUBLE_WIDTH/2 offsets, transpose into SIMD double (low half from base0, high from base1). More...

static double gmx_simdcall	gmx::reduceIncr4ReturnSumHsimd (double *m, SimdDouble v0, SimdDouble v1)
	Reduce the 4 half-SIMD-with doubles in 2 SIMD variables (sum halves), increment four consecutive doubles in memory, return sum. More...

static SimdDouble gmx_simdcall	gmx::loadUNDuplicate4 (const double *m)
	Load N doubles and duplicate them 4 times each. More...

static SimdDouble gmx_simdcall	gmx::load4DuplicateN (const double *m)
	Load 4 doubles and duplicate them N times each. More...

static SimdDouble gmx_simdcall	gmx::loadU4NOffset (const double *m, int offset)
	Load doubles in blocks of 4 at fixed offsets. More...

Description

Higher-level SIMD utility functions, double precision.

Functions