Collaboration diagram for SIMD intrinsics interface (simd):

Description

Provides an architecture-independent way of doing SIMD coding.

Overview of the SIMD implementation is provided in Single-instruction Multiple-data (SIMD) coding. The details are documented in simd.h and the reference implementation impl_reference.h.

Author: Erik Lindahl erik..nosp@m.lind.nosp@m.ahl@s.nosp@m.cili.nosp@m.felab.nosp@m..se

SIMD implementation capability definitions
#define	GMX_SIMD_HAVE_FLOAT
	Defined when SIMD float support is present. More...

#define	GMX_SIMD_HAVE_DOUBLE
	Defined if SIMD double support is present.

#define	GMX_SIMD_HAVE_HARDWARE /* For Doxygen */
	Defined if SIMD is implemented with real hardware instructions.

#define	GMX_SIMD_HAVE_LOADU
	Defined if the SIMD implementation supports unaligned loads.

#define	GMX_SIMD_HAVE_STOREU
	Defined if the SIMD implementation supports unaligned stores.

#define	GMX_SIMD_HAVE_LOGICAL
	Defined if SIMD implementation has logical operations on floating-point data.

#define	GMX_SIMD_HAVE_FMA /* For Doxygen */
	Defined if SIMD fused multiply-add uses hardware instructions.

#define	GMX_SIMD_HAVE_FRACTION /* For Doxygen */
	Defined if the SIMD fraction has a direct hardware instruction.

#define	GMX_SIMD_HAVE_FINT32
	Defined if the SIMD implementation has gmx_simd_fint32_t.

#define	GMX_SIMD_HAVE_FINT32_EXTRACT
	Support for extracting integers from gmx_simd_fint32_t.

#define	GMX_SIMD_HAVE_FINT32_LOGICAL
	Defined if SIMD logical operations are supported for gmx_simd_fint32_t.

#define	GMX_SIMD_HAVE_FINT32_ARITHMETICS
	Defined if SIMD arithmetic operations are supported for gmx_simd_fint32_t.

#define	GMX_SIMD_HAVE_DINT32
	Defined if the SIMD implementation has gmx_simd_dint32_t. More...

#define	GMX_SIMD_HAVE_DINT32_EXTRACT
	Support for extracting integer from gmx_simd_dint32_t.

#define	GMX_SIMD_HAVE_DINT32_LOGICAL
	Defined if logical operations are supported for gmx_simd_dint32_t.

#define	GMX_SIMD_HAVE_DINT32_ARITHMETICS
	Defined if SIMD arithmetic operations are supported for gmx_simd_dint32_t.

#define	GMX_SIMD4_HAVE_FLOAT
	Defined if the implementation provides gmx_simd4_float_t.

#define	GMX_SIMD4_HAVE_DOUBLE
	Defined if the implementation provides gmx_simd4_double_t.

#define	GMX_SIMD_FLOAT_WIDTH 4
	Width of the gmx_simd_float_t datatype.

#define	GMX_SIMD_DOUBLE_WIDTH 4
	Width of the gmx_simd_double_t datatype.

#define	GMX_SIMD_FINT32_WIDTH GMX_SIMD_FLOAT_WIDTH
	Width of the gmx_simd_fint32_t datatype.

#define	GMX_SIMD_DINT32_WIDTH GMX_SIMD_DOUBLE_WIDTH
	Width of the gmx_simd_dint32_t datatype.

#define	GMX_SIMD_RSQRT_BITS 23
	Accuracy of SIMD 1/sqrt(x) lookup. Used to determine number of iterations.

#define	GMX_SIMD_RCP_BITS 23
	Accuracy of SIMD 1/x lookup. Used to determine number of iterations.

SIMD implementation load/store operations for single precision floating point
static gmx_simd_float_t	gmx_simd_load_f (const float *m)
	Load GMX_SIMD_FLOAT_WIDTH numbers from aligned memory. More...

static gmx_simd_float_t	gmx_simd_load1_f (const float *m)
	Set all SIMD variable elements to float pointed to by m (unaligned). More...

static gmx_simd_float_t	gmx_simd_set1_f (float r)
	Set all SIMD float variable elements to the value r. More...

static gmx_simd_float_t	gmx_simd_setzero_f ()
	Set all SIMD float variable elements to 0.0f. More...

static void	gmx_simd_store_f (float *m, gmx_simd_float_t a)
	Store the contents of the SIMD float variable pr to aligned memory m. More...

#define	gmx_simd_loadu_f gmx_simd_load_f
	Load SIMD float from unaligned memory. More...

#define	gmx_simd_storeu_f gmx_simd_store_f
	Store SIMD float to unaligned memory. More...

SIMD implementation load/store operations for double precision floating point
static gmx_simd_double_t	gmx_simd_load_d (const double *m)
	Load GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory. More...

static gmx_simd_double_t	gmx_simd_load1_d (const double *m)
	Set all SIMD variable elements to double pointed to by m (unaligned). More...

static gmx_simd_double_t	gmx_simd_set1_d (double r)
	Set all SIMD double variable elements to the value r. More...

static gmx_simd_double_t	gmx_simd_setzero_d ()
	Set all SIMD double variable elements to 0.0. More...

static void	gmx_simd_store_d (double *m, gmx_simd_double_t a)
	Store the contents of the SIMD double variable pr to aligned memory m. More...

#define	gmx_simd_loadu_d gmx_simd_load_d
	Load SIMD double from unaligned memory. More...

#define	gmx_simd_storeu_d gmx_simd_store_d
	Store SIMD double to unaligned memory. More...

SIMD implementation load/store operations for integers (corresponding to float)
static gmx_simd_fint32_t	gmx_simd_load_fi (const gmx_int32_t *m)
	Load aligned SIMD integer data, width corresponds to gmx_simd_float_t. More...

static gmx_simd_fint32_t	gmx_simd_set1_fi (gmx_int32_t b)
	Set SIMD from integer, width corresponds to gmx_simd_float_t. More...

static gmx_simd_fint32_t	gmx_simd_setzero_fi ()
	Set all SIMD variable elements to 0, width corresponds to gmx_simd_float_t. More...

static gmx_simd_fint32_t	gmx_simd_store_fi (int *m, gmx_simd_fint32_t a)
	Store aligned SIMD integer data, width corresponds to gmx_simd_float_t. More...

static gmx_int32_t	gmx_simd_extract_fi (gmx_simd_fint32_t a, int index)
	Extract element with index i from gmx_simd_fint32_t. More...

#define	gmx_simd_loadu_fi gmx_simd_load_fi
	Load unaligned integer SIMD data, width corresponds to gmx_simd_float_t. More...

#define	gmx_simd_storeu_fi gmx_simd_store_fi
	Store unaligned SIMD integer data, width corresponds to gmx_simd_float_t. More...

SIMD implementation load/store operations for integers (corresponding to double)
static gmx_simd_dint32_t	gmx_simd_load_di (const gmx_int32_t *m)
	Load aligned SIMD integer data, width corresponds to gmx_simd_double_t. More...

static gmx_simd_dint32_t	gmx_simd_set1_di (gmx_int32_t b)
	Set SIMD from integer, width corresponds to gmx_simd_double_t. More...

static gmx_simd_dint32_t	gmx_simd_setzero_di ()
	Set all SIMD variable elements to 0, width corresponds to gmx_simd_double_t. More...

static gmx_simd_dint32_t	gmx_simd_store_di (gmx_int32_t *m, gmx_simd_dint32_t a)
	Store aligned SIMD integer data, width corresponds to gmx_simd_double_t. More...

static gmx_int32_t	gmx_simd_extract_di (gmx_simd_dint32_t a, int index)
	Extract element with index i from gmx_simd_dint32_t. More...

#define	gmx_simd_loadu_di gmx_simd_load_di
	Load unaligned integer SIMD data, width corresponds to gmx_simd_double_t. More...

#define	gmx_simd_storeu_di gmx_simd_store_di
	Store unaligned SIMD integer data, width corresponds to gmx_simd_double_t. More...

SIMD implementation single precision floating-point bitwise logical operations
static gmx_simd_float_t	gmx_simd_and_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Bitwise and for two SIMD float variables. Supported with GMX_SIMD_HAVE_LOGICAL. More...

static gmx_simd_float_t	gmx_simd_andnot_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Bitwise andnot for SIMD float. c=(~a) & b. Supported with GMX_SIMD_HAVE_LOGICAL. More...

static gmx_simd_float_t	gmx_simd_or_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Bitwise or for SIMD float. Supported with GMX_SIMD_HAVE_LOGICAL. More...

static gmx_simd_float_t	gmx_simd_xor_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Bitwise xor for SIMD float. Supported with GMX_SIMD_HAVE_LOGICAL. More...

SIMD implementation single precision floating-point arithmetics
static gmx_simd_float_t	gmx_simd_add_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Add two float SIMD variables. More...

static gmx_simd_float_t	gmx_simd_sub_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Subtract two SIMD variables. More...

static gmx_simd_float_t	gmx_simd_mul_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Multiply two SIMD variables. More...

static gmx_simd_float_t	gmx_simd_rsqrt_f (gmx_simd_float_t x)
	SIMD 1.0/sqrt(x) lookup. More...

static gmx_simd_float_t	gmx_simd_rcp_f (gmx_simd_float_t x)
	SIMD 1.0/x lookup. More...

static gmx_simd_float_t	gmx_simd_fabs_f (gmx_simd_float_t a)
	SIMD Floating-point fabs(). More...

static gmx_simd_float_t	gmx_simd_fneg_f (gmx_simd_float_t a)
	SIMD floating-point negate. More...

static gmx_simd_float_t	gmx_simd_max_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Set each SIMD element to the largest from two variables. More...

static gmx_simd_float_t	gmx_simd_min_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Set each SIMD element to the smallest from two variables. More...

static gmx_simd_float_t	gmx_simd_round_f (gmx_simd_float_t a)
	Round to nearest integer value (in floating-point format). More...

static gmx_simd_float_t	gmx_simd_trunc_f (gmx_simd_float_t a)
	Truncate SIMD, i.e. round towards zero - common hardware instruction. More...

static gmx_simd_float_t	gmx_simd_fraction_f (gmx_simd_float_t a)
	Fraction of the SIMD floating point number. More...

static gmx_simd_float_t	gmx_simd_get_exponent_f (gmx_simd_float_t a)
	Extract (integer) exponent from single precision SIMD. More...

static gmx_simd_float_t	gmx_simd_get_mantissa_f (gmx_simd_float_t a)
	Get SIMD mantissa. More...

static gmx_simd_float_t	gmx_simd_set_exponent_f (gmx_simd_float_t a)
	Set (integer) exponent from single precision floating-point SIMD. More...

#define	gmx_simd_fmadd_f(a, b, c) gmx_simd_add_f(gmx_simd_mul_f(a, b), c)
	Fused-multiply-add. Result is a*b+c. More...

#define	gmx_simd_fmsub_f(a, b, c) gmx_simd_sub_f(gmx_simd_mul_f(a, b), c)
	Fused-multiply-subtract. Result is a*b-c. More...

#define	gmx_simd_fnmadd_f(a, b, c) gmx_simd_sub_f(c, gmx_simd_mul_f(a, b))
	Fused-negated-multiply-add. Result is -a*b+c. More...

#define	gmx_simd_fnmsub_f(a, b, c) gmx_simd_sub_f(gmx_simd_setzero_f(), gmx_simd_fmadd_f(a, b, c))
	Fused-negated-multiply-sub. Result is -a*b-c. More...

SIMD implementation single precision floating-point comparisons, boolean, selection.
static gmx_simd_fbool_t	gmx_simd_cmpeq_f (gmx_simd_float_t a, gmx_simd_float_t b)
	SIMD a==b for single SIMD. More...

static gmx_simd_fbool_t	gmx_simd_cmplt_f (gmx_simd_float_t a, gmx_simd_float_t b)
	SIMD a<b for single SIMD. More...

static gmx_simd_fbool_t	gmx_simd_cmple_f (gmx_simd_float_t a, gmx_simd_float_t b)
	SIMD a<=b for single SIMD. More...

static gmx_simd_fbool_t	gmx_simd_and_fb (gmx_simd_fbool_t a, gmx_simd_fbool_t b)
	Logical and on single precision SIMD booleans. More...

static gmx_simd_fbool_t	gmx_simd_or_fb (gmx_simd_fbool_t a, gmx_simd_fbool_t b)
	Logical or on single precision SIMD booleans. More...

static int	gmx_simd_anytrue_fb (gmx_simd_fbool_t a)
	Returns non-zero if any of the boolean in x is True, otherwise 0. More...

static gmx_simd_float_t	gmx_simd_blendzero_f (gmx_simd_float_t a, gmx_simd_fbool_t sel)
	Select from single precision SIMD variable where boolean is true. More...

static gmx_simd_float_t	gmx_simd_blendnotzero_f (gmx_simd_float_t a, gmx_simd_fbool_t sel)
	Select from single precision SIMD variable where boolean is false. More...

static gmx_simd_float_t	gmx_simd_blendv_f (gmx_simd_float_t a, gmx_simd_float_t b, gmx_simd_fbool_t sel)
	Vector-blend SIMD selection. More...

static float	gmx_simd_reduce_f (gmx_simd_float_t a)
	Return sum of all elements in SIMD float variable. More...

SIMD implementation double precision floating-point bitwise logical operations
static gmx_simd_double_t	gmx_simd_and_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Bitwise and for two SIMD double variables. Supported with GMX_SIMD_HAVE_LOGICAL. More...

static gmx_simd_double_t	gmx_simd_andnot_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Bitwise andnot for SIMD double. c=(~a) & b. Supported with GMX_SIMD_HAVE_LOGICAL. More...

static gmx_simd_double_t	gmx_simd_or_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Bitwise or for SIMD double. Supported with GMX_SIMD_HAVE_LOGICAL. More...

static gmx_simd_double_t	gmx_simd_xor_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Bitwise xor for SIMD double. Supported with GMX_SIMD_HAVE_LOGICAL. More...

SIMD implementation double precision floating-point arithmetics
static gmx_simd_double_t	gmx_simd_add_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Add two double SIMD variables. More...

static gmx_simd_double_t	gmx_simd_sub_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Add two float SIMD variables. More...

static gmx_simd_double_t	gmx_simd_mul_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Multiply two SIMD variables. More...

static gmx_simd_double_t	gmx_simd_rsqrt_d (gmx_simd_double_t x)
	SIMD 1.0/sqrt(x) lookup. More...

static gmx_simd_double_t	gmx_simd_rcp_d (gmx_simd_double_t x)
	1.0/x lookup. More...

static gmx_simd_double_t	gmx_simd_fabs_d (gmx_simd_double_t a)
	SIMD Floating-point fabs(). More...

static gmx_simd_double_t	gmx_simd_fneg_d (gmx_simd_double_t a)
	SIMD floating-point negate. More...

static gmx_simd_double_t	gmx_simd_max_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Set each SIMD element to the largest from two variables. More...

static gmx_simd_double_t	gmx_simd_min_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Set each SIMD element to the smallest from two variables. More...

static gmx_simd_double_t	gmx_simd_round_d (gmx_simd_double_t a)
	Round to nearest integer value (in double floating-point format). More...

static gmx_simd_double_t	gmx_simd_trunc_d (gmx_simd_double_t a)
	Truncate SIMD, i.e. round towards zero - common hardware instruction. More...

static gmx_simd_double_t	gmx_simd_fraction_d (gmx_simd_double_t a)
	Fraction of the SIMD floating point number. More...

static gmx_simd_double_t	gmx_simd_get_exponent_d (gmx_simd_double_t a)
	Extract (integer) exponent from double precision SIMD. More...

static gmx_simd_double_t	gmx_simd_get_mantissa_d (gmx_simd_double_t a)
	Get SIMD doublemantissa. More...

static gmx_simd_double_t	gmx_simd_set_exponent_d (gmx_simd_double_t a)
	Set (integer) exponent from single precision floating-point SIMD. More...

#define	gmx_simd_fmadd_d(a, b, c) gmx_simd_add_d(gmx_simd_mul_d(a, b), c)
	Fused-multiply-add. Result is a*b+c. More...

#define	gmx_simd_fmsub_d(a, b, c) gmx_simd_sub_d(gmx_simd_mul_d(a, b), c)
	Fused-multiply-subtract. Result is a*b-c. More...

#define	gmx_simd_fnmadd_d(a, b, c) gmx_simd_sub_d(c, gmx_simd_mul_d(a, b))
	Fused-negated-multiply-add. Result is -a*b+c. More...

#define	gmx_simd_fnmsub_d(a, b, c) gmx_simd_sub_d(gmx_simd_setzero_d(), gmx_simd_fmadd_d(a, b, c))
	Fused-negated-multiply-add. Result is -a*b-c. More...

SIMD implementation double precision floating-point comparison, boolean, selection.
static gmx_simd_dbool_t	gmx_simd_cmpeq_d (gmx_simd_double_t a, gmx_simd_double_t b)
	SIMD a==b for double SIMD. More...

static gmx_simd_dbool_t	gmx_simd_cmplt_d (gmx_simd_double_t a, gmx_simd_double_t b)
	SIMD a<b for double SIMD. More...

static gmx_simd_dbool_t	gmx_simd_cmple_d (gmx_simd_double_t a, gmx_simd_double_t b)
	SIMD a<=b for double SIMD. More...

static gmx_simd_dbool_t	gmx_simd_and_db (gmx_simd_dbool_t a, gmx_simd_dbool_t b)
	Logical and on double precision SIMD booleans. More...

static gmx_simd_dbool_t	gmx_simd_or_db (gmx_simd_dbool_t a, gmx_simd_dbool_t b)
	Logical or on double precision SIMD booleans. More...

static int	gmx_simd_anytrue_db (gmx_simd_dbool_t a)
	Returns non-zero if any of the boolean in x is True, otherwise 0. More...

static gmx_simd_double_t	gmx_simd_blendzero_d (gmx_simd_double_t a, gmx_simd_dbool_t sel)
	Select from double SIMD variable where boolean is true. More...

static gmx_simd_double_t	gmx_simd_blendnotzero_d (gmx_simd_double_t a, gmx_simd_dbool_t sel)
	Select from double SIMD variable where boolean is false. More...

static gmx_simd_double_t	gmx_simd_blendv_d (gmx_simd_double_t a, gmx_simd_double_t b, gmx_simd_dbool_t sel)
	Vector-blend double SIMD selection. More...

static double	gmx_simd_reduce_d (gmx_simd_double_t a)
	Return sum of all elements in SIMD double variable. More...

SIMD implementation integer (corresponding to float) bitwise logical operations
static gmx_simd_fint32_t	gmx_simd_slli_fi (gmx_simd_fint32_t a, int n)
	SIMD integer shift left logical, based on immediate value. More...

static gmx_simd_fint32_t	gmx_simd_srli_fi (gmx_simd_fint32_t a, int n)
	SIMD integer shift right logical, based on immediate value. More...

static gmx_simd_fint32_t	gmx_simd_and_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b)
	Integer SIMD bitwise and. More...

static gmx_simd_fint32_t	gmx_simd_andnot_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b)
	Integer SIMD bitwise not-and. More...

static gmx_simd_fint32_t	gmx_simd_or_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b)
	Integer SIMD bitwise or. More...

static gmx_simd_fint32_t	gmx_simd_xor_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b)
	Integer SIMD bitwise xor. More...

SIMD implementation integer (corresponding to float) arithmetics
static gmx_simd_fint32_t	gmx_simd_add_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b)
	Add SIMD integers. More...

static gmx_simd_fint32_t	gmx_simd_sub_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b)
	Subtract SIMD integers. More...

static gmx_simd_fint32_t	gmx_simd_mul_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b)
	Multiply SIMD integers. More...

SIMD implementation integer (corresponding to float) comparisons, boolean, selection
static gmx_simd_fibool_t	gmx_simd_cmpeq_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b)
	Equality comparison of two integers corresponding to float values. More...

static gmx_simd_fibool_t	gmx_simd_cmplt_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b)
	Less-than comparison of two SIMD integers corresponding to float values. More...

static gmx_simd_fibool_t	gmx_simd_and_fib (gmx_simd_fibool_t a, gmx_simd_fibool_t b)
	Logical AND on gmx_simd_fibool_t. More...

static gmx_simd_fibool_t	gmx_simd_or_fib (gmx_simd_fibool_t a, gmx_simd_fibool_t b)
	Logical OR on gmx_simd_fibool_t. More...

static int	gmx_simd_anytrue_fib (gmx_simd_fibool_t a)
	Returns non-zero if any of the boolean in x is True, otherwise 0. More...

static gmx_simd_fint32_t	gmx_simd_blendzero_fi (gmx_simd_fint32_t a, gmx_simd_fibool_t sel)
	Select from gmx_simd_fint32_t variable where boolean is true. More...

static gmx_simd_fint32_t	gmx_simd_blendnotzero_fi (gmx_simd_fint32_t a, gmx_simd_fibool_t sel)
	Select from gmx_simd_fint32_t variable where boolean is false. More...

static gmx_simd_fint32_t	gmx_simd_blendv_fi (gmx_simd_fint32_t a, gmx_simd_fint32_t b, gmx_simd_fibool_t sel)
	Vector-blend SIMD selection. More...

SIMD implementation integer (corresponding to double) bitwise logical operations
static gmx_simd_dint32_t	gmx_simd_slli_di (gmx_simd_dint32_t a, int n)
	SIMD integer shift left, based on immediate value. More...

static gmx_simd_dint32_t	gmx_simd_srli_di (gmx_simd_dint32_t a, int n)
	SIMD integer shift right, based on immediate value. More...

static gmx_simd_dint32_t	gmx_simd_and_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b)
	Integer bitwise and for SIMD variables. More...

static gmx_simd_dint32_t	gmx_simd_andnot_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b)
	Integer bitwise not-and for SIMD variables. More...

static gmx_simd_dint32_t	gmx_simd_or_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b)
	Integer bitwise or for SIMD variables. More...

static gmx_simd_dint32_t	gmx_simd_xor_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b)
	Integer bitwise xor for SIMD variables. More...

SIMD implementation integer (corresponding to double) arithmetics
static gmx_simd_dint32_t	gmx_simd_add_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b)
	Add SIMD integers, corresponding to double precision. More...

static gmx_simd_dint32_t	gmx_simd_sub_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b)
	Subtract SIMD integers, corresponding to double precision. More...

static gmx_simd_dint32_t	gmx_simd_mul_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b)
	Multiply SIMD integers, corresponding to double precision. More...

SIMD implementation integer (corresponding to double) comparisons, boolean selection
static gmx_simd_dibool_t	gmx_simd_cmpeq_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b)
	Equality comparison of two ints corresponding to double SIMD data. More...

static gmx_simd_dibool_t	gmx_simd_cmplt_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b)
	Less-than comparison of two ints corresponding to double SIMD data. More...

static gmx_simd_dibool_t	gmx_simd_and_dib (gmx_simd_dibool_t a, gmx_simd_dibool_t b)
	Logical AND on gmx_simd_dibool_t. More...

static gmx_simd_dibool_t	gmx_simd_or_dib (gmx_simd_dibool_t a, gmx_simd_dibool_t b)
	Logical OR on gmx_simd_dibool_t. More...

static int	gmx_simd_anytrue_dib (gmx_simd_dibool_t a)
	Returns non-zero if any of the double-int SIMD booleans in x is True, otherwise 0. More...

static gmx_simd_dint32_t	gmx_simd_blendzero_di (gmx_simd_dint32_t a, gmx_simd_dibool_t sel)
	Select from SIMD ints (corresponding to double) where boolean is true. More...

static gmx_simd_dint32_t	gmx_simd_blendnotzero_di (gmx_simd_dint32_t a, gmx_simd_dibool_t sel)
	Select from SIMD ints (corresponding to double) where boolean is false. More...

static gmx_simd_dint32_t	gmx_simd_blendv_di (gmx_simd_dint32_t a, gmx_simd_dint32_t b, gmx_simd_dibool_t sel)
	Vector-blend SIMD selection for double-int SIMD. More...

SIMD implementation conversion operations
static gmx_simd_fint32_t	gmx_simd_cvt_f2i (gmx_simd_float_t a)
	Round single precision floating point to integer. More...

static gmx_simd_fint32_t	gmx_simd_cvtt_f2i (gmx_simd_float_t a)
	Truncate single precision floating point to integer. More...

static gmx_simd_float_t	gmx_simd_cvt_i2f (gmx_simd_fint32_t a)
	Convert integer to single precision floating-point. More...

static gmx_simd_dint32_t	gmx_simd_cvt_d2i (gmx_simd_double_t a)
	Round double precision floating point to integer. More...

static gmx_simd_dint32_t	gmx_simd_cvtt_d2i (gmx_simd_double_t a)
	Truncate double precision floating point to integer. More...

static gmx_simd_double_t	gmx_simd_cvt_i2d (gmx_simd_dint32_t a)
	Convert integer to single precision floating-point. More...

static gmx_simd_fibool_t	gmx_simd_cvt_fb2fib (gmx_simd_fbool_t a)
	Convert from float boolean to corresponding integer boolean. More...

static gmx_simd_fbool_t	gmx_simd_cvt_fib2fb (gmx_simd_fibool_t a)
	Convert from integer boolean (corresponding to float) to float boolean. More...

static gmx_simd_dibool_t	gmx_simd_cvt_db2dib (gmx_simd_dbool_t a)
	Convert from double boolean to corresponding integer boolean. More...

static gmx_simd_dbool_t	gmx_simd_cvt_dib2db (gmx_simd_dibool_t a)
	Convert from integer boolean (corresponding to double) to double boolean. More...

static gmx_simd_double_t	gmx_simd_cvt_f2d (gmx_simd_float_t f)
	Convert SIMD float to double. More...

static gmx_simd_float_t	gmx_simd_cvt_d2f (gmx_simd_double_t d)
	Convert SIMD double to float. More...

static void	gmx_simd_cvt_f2dd (gmx_simd_float_t f, gmx_simd_double_t d0, gmx_simd_double_t d1)
	Convert SIMD float to double. More...

static gmx_simd_float_t	gmx_simd_cvt_dd2f (gmx_simd_double_t d0, gmx_simd_double_t d1)
	Convert SIMD double to float. More...

SIMD4. Constant width-4 SIMD types and instructions
static float	gmx_simd4_dotproduct3_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Return dot product of two single precision SIMD4 variables. More...

static double	gmx_simd4_dotproduct3_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Return dot product of two double precision SIMD4 variables. More...

#define	gmx_simd4_float_t gmx_simd_float_t
	SIMD4 float type. Available with GMX_SIMD4_HAVE_FLOAT. More...

#define	gmx_simd4_load_f gmx_simd_load_f
	Load SIMD4 float from aligned memory. More...

#define	gmx_simd4_load1_f gmx_simd_load1_f
	Set all elements of SIMD4 float from single pointer. More...

#define	gmx_simd4_set1_f gmx_simd_set1_f
	Set all SIMD4 float elements to the value r. More...

#define	gmx_simd4_store_f gmx_simd_store_f
	Store the contents of SIMD4 float pr to aligned memory m. More...

#define	gmx_simd4_loadu_f gmx_simd_loadu_f
	Load SIMD4 float from unaligned memory. More...

#define	gmx_simd4_storeu_f gmx_simd_storeu_f
	Store SIMD4 float to unaligned memory. More...

#define	gmx_simd4_setzero_f gmx_simd_setzero_f
	Set all SIMD4 float elements to 0. More...

#define	gmx_simd4_and_f gmx_simd_and_f
	Bitwise and for two SIMD4 float variables. More...

#define	gmx_simd4_andnot_f gmx_simd_andnot_f
	Bitwise andnot for two SIMD4 float variables. c=(~a) & b. More...

#define	gmx_simd4_or_f gmx_simd_or_f
	Bitwise or for two SIMD4 float variables. More...

#define	gmx_simd4_xor_f gmx_simd_xor_f
	Bitwise xor for two SIMD4 float variables. More...

#define	gmx_simd4_add_f gmx_simd_add_f
	Add two SIMD4 float variables. More...

#define	gmx_simd4_sub_f gmx_simd_sub_f
	Subtract two SIMD4 float variables. More...

#define	gmx_simd4_mul_f gmx_simd_mul_f
	Multiply two SIMD4 float variables. More...

#define	gmx_simd4_fmadd_f gmx_simd_fmadd_f
	Fused-multiply-add for SIMD4 float. Result is a*b+c. More...

#define	gmx_simd4_fmsub_f gmx_simd_fmsub_f
	Fused-multiply-subtract for SIMD4 float. Result is a*b-c. More...

#define	gmx_simd4_fnmadd_f gmx_simd_fnmadd_f
	Fused-negated-multiply-add for SIMD4 float. Result is -a*b+c. More...

#define	gmx_simd4_fnmsub_f gmx_simd_fnmsub_f
	Fused-negated-multiply-add for SIMD4 float. Result is -a*b-c. More...

#define	gmx_simd4_rsqrt_f gmx_simd_rsqrt_f
	Lookup of approximate 1/sqrt(x) for SIMD4 float. More...

#define	gmx_simd4_fabs_f gmx_simd_fabs_f
	Floating-point absolute value for SIMD4 float. More...

#define	gmx_simd4_fneg_f gmx_simd_fneg_f
	Floating-point negate for SIMD4 float. More...

#define	gmx_simd4_max_f gmx_simd_max_f
	Set each SIMD4 float element to the largest from two variables. More...

#define	gmx_simd4_min_f gmx_simd_min_f
	Set each SIMD4 float element to the smallest from two variables. More...

#define	gmx_simd4_round_f gmx_simd_round_f
	Round to nearest integer value for SIMD4 float. More...

#define	gmx_simd4_trunc_f gmx_simd_trunc_f
	Round to largest integral value for SIMD4 float. More...

#define	gmx_simd4_fbool_t gmx_simd_fbool_t
	SIMD4 variable type to use for logical comparisons on floats. More...

#define	gmx_simd4_cmpeq_f gmx_simd_cmpeq_f
	Equality comparison of two single precision SIMD4. More...

#define	gmx_simd4_cmplt_f gmx_simd_cmplt_f
	Less-than comparison of two single precision SIMD4. More...

#define	gmx_simd4_cmple_f gmx_simd_cmple_f
	Less-than comparison of two single precision SIMD4. More...

#define	gmx_simd4_and_fb gmx_simd_and_fb
	Logical AND on float SIMD4 booleans. More...

#define	gmx_simd4_or_fb gmx_simd_or_fb
	Logical OR on float SIMD4 booleans. More...

#define	gmx_simd4_anytrue_fb gmx_simd_anytrue_fb
	Returns non-zero if any of the SIMD4 boolean in x is True. More...

#define	gmx_simd4_blendzero_f gmx_simd_blendzero_f
	Select from single precision SIMD4 variable where boolean is true. More...

#define	gmx_simd4_blendnotzero_f gmx_simd_blendnotzero_f
	Select from single precision SIMD4 variable where boolean is false. More...

#define	gmx_simd4_blendv_f gmx_simd_blendv_f
	Vector-blend instruction form SIMD4 float. More...

#define	gmx_simd4_reduce_f gmx_simd_reduce_f
	Return sum of all elements in SIMD4 float. More...

#define	gmx_simd4_double_t gmx_simd_double_t
	SIMD4 double type. Available with GMX_SIMD4_HAVE_DOUBLE. More...

#define	gmx_simd4_load_d gmx_simd_load_d
	Double precision SIMD4 load aligned. More...

#define	gmx_simd4_load1_d gmx_simd_load1_d
	Double precision SIMD4 load single value to all elements. More...

#define	gmx_simd4_set1_d gmx_simd_set1_d
	Double precision SIMD4 set all elements from value. More...

#define	gmx_simd4_store_d gmx_simd_store_d
	Double precision SIMD4 store to aligned memory. More...

#define	gmx_simd4_loadu_d gmx_simd_loadu_d
	Load unaligned SIMD4 double. More...

#define	gmx_simd4_storeu_d gmx_simd_storeu_d
	Store unaligned SIMD4 double. More...

#define	gmx_simd4_setzero_d gmx_simd_setzero_d
	Set all elements in SIMD4 double to 0.0. More...

#define	gmx_simd4_and_d gmx_simd_and_d
	Bitwise and for two SIMD4 double variables. More...

#define	gmx_simd4_andnot_d gmx_simd_andnot_d
	Bitwise andnot for SIMD4 double. c=(~a) & b. More...

#define	gmx_simd4_or_d gmx_simd_or_d
	Bitwise or for SIMD4 double. More...

#define	gmx_simd4_xor_d gmx_simd_xor_d
	Bitwise xor for SIMD4 double. More...

#define	gmx_simd4_add_d gmx_simd_add_d
	Add two SIMD4 double values. More...

#define	gmx_simd4_sub_d gmx_simd_sub_d
	Subtract two SIMD4 double values. More...

#define	gmx_simd4_mul_d gmx_simd_mul_d
	Multiply two SIMD4 double values. More...

#define	gmx_simd4_fmadd_d gmx_simd_fmadd_d
	Fused-multiply-add for SIMD4 double. Result is a*b+c. More...

#define	gmx_simd4_fmsub_d gmx_simd_fmsub_d
	Fused-multiply-subtract for SIMD4 double. Result is a*b-c. More...

#define	gmx_simd4_fnmadd_d gmx_simd_fnmadd_d
	Fused-negated-multiply-add for SIMD4 double. Result is -a*b+c. More...

#define	gmx_simd4_fnmsub_d gmx_simd_fnmsub_d
	Fused-negated-multiply-sub for SIMD4 double. Result is -a*b-c. More...

#define	gmx_simd4_rsqrt_d gmx_simd_rsqrt_d
	SIMD4 double 1.0/sqrt(x) lookup. More...

#define	gmx_simd4_fabs_d gmx_simd_fabs_d
	SIMD4 double Floating-point fabs(). More...

#define	gmx_simd4_fneg_d gmx_simd_fneg_d
	SIMD4 double floating-point negate. More...

#define	gmx_simd4_max_d gmx_simd_max_d
	Set each SIMD4 element to the largest from two variables. More...

#define	gmx_simd4_min_d gmx_simd_min_d
	Set each SIMD4 element to the smallest from two variables. More...

#define	gmx_simd4_round_d gmx_simd_round_d
	Round SIMD4 double to nearest integer value (in floating-point format). More...

#define	gmx_simd4_trunc_d gmx_simd_trunc_d
	Truncate SIMD4 double, i.e. round towards zero. More...

#define	gmx_simd4_dbool_t gmx_simd_dbool_t
	SIMD4 variable type to use for logical comparisons on doubles. More...

#define	gmx_simd4_cmpeq_d gmx_simd_cmpeq_d
	Equality comparison of two double precision SIMD4 values. More...

#define	gmx_simd4_cmplt_d gmx_simd_cmplt_d
	Less-than comparison of two double precision SIMD4 values. More...

#define	gmx_simd4_cmple_d gmx_simd_cmple_d
	Less-than comparison of two double precision SIMD4 values. More...

#define	gmx_simd4_and_db gmx_simd_and_db
	Logical AND on double SIMD4 booleans. More...

#define	gmx_simd4_or_db gmx_simd_or_db
	Logical OR on double SIMD4 booleans. More...

#define	gmx_simd4_anytrue_db gmx_simd_anytrue_db
	Returns non-zero if any of the SIMD4 booleans in x is True. More...

#define	gmx_simd4_blendzero_d gmx_simd_blendzero_d
	Select from double precision SIMD4 variable where boolean is true. More...

#define	gmx_simd4_blendnotzero_d gmx_simd_blendnotzero_d
	Select from double precision SIMD4 variable where boolean is false. More...

#define	gmx_simd4_blendv_d gmx_simd_blendv_d
	Vector-blend instruction for SIMD4 double. More...

#define	gmx_simd4_reduce_d gmx_simd_reduce_d
	Return sum of all elements in SIMD4 double. More...

SIMD predefined macros to describe high-level capabilities
#define	GMX_SIMD
	GMX_SIMD indicates that some sort of SIMD support is present in software. More...

#define	GMX_SIMD4_WIDTH 4
	SIMD4 width is always 4, but use this for clarity in definitions. More...

#define	GMX_SIMD_HAVE_REAL
	Defined if gmx_simd_real_t is available. More...

#define	GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
	Width of gmx_simd_real_t. More...

#define	GMX_SIMD_HAVE_INT32
	Defined if gmx_simd_int32_t is available. More...

#define	GMX_SIMD_INT32_WIDTH GMX_SIMD_FINT32_WIDTH
	Width of gmx_simd_int32_t. More...

#define	GMX_SIMD_HAVE_INT32_EXTRACT
	Defined if gmx_simd_extract_i() is available. More...

#define	GMX_SIMD_HAVE_INT32_LOGICAL
	Defined if logical ops are supported on gmx_simd_int32_t. More...

#define	GMX_SIMD_HAVE_INT32_ARITHMETICS
	Defined if arithmetic ops are supported on gmx_simd_int32_t. More...

#define	GMX_SIMD4_HAVE_REAL
	Defined if gmx_simd4_real_t is available. More...

SIMD memory alignment operations
static float *	gmx_simd_align_f (float *p)
	Align a float pointer for usage with SIMD instructions. More...

static double *	gmx_simd_align_d (double *p)
	Align a double pointer for usage with SIMD instructions. More...

static int *	gmx_simd_align_fi (int *p)
	Align a (float) integer pointer for usage with SIMD instructions. More...

static int *	gmx_simd_align_di (int *p)
	Align a (double) integer pointer for usage with SIMD instructions. More...

static float *	gmx_simd4_align_f (float *p)
	Align a float pointer for usage with SIMD4 instructions. More...

static double *	gmx_simd4_align_d (double *p)
	Align a double pointer for usage with SIMD4 instructions. More...

#define	gmx_simd_align_r gmx_simd_align_f
	Align real memory for SIMD usage. More...

#define	gmx_simd_align_i gmx_simd_align_fi
	Align integer memory for SIMD usage. More...

SIMD data types
The actual storage of these types is implementation dependent. The documentation is generated from the reference implementation, but for normal usage this will likely not be what you are using.
#define	gmx_simd_real_t gmx_simd_float_t
	Real precision floating-point SIMD datatype. More...

#define	gmx_simd_int32_t gmx_simd_fint32_t
	32-bit integer SIMD type. More...

#define	gmx_simd_bool_t gmx_simd_fbool_t
	Boolean SIMD type for usage with gmx_simd_real_t. More...

#define	gmx_simd_ibool_t gmx_simd_fibool_t
	Boolean SIMD type for usage with gmx_simd_int32_t. More...

SIMD load/store operations on gmx_simd_real_t
Note Unaligned load/stores are only available when GMX_SIMD_HAVE_LOADU and GMX_SIMD_HAVE_STOREU are set, respectively.
#define	gmx_simd_load_r gmx_simd_load_f
	Load GMX_SIMD_REAL_WIDTH values from aligned memory to gmx_simd_real_t. More...

#define	gmx_simd_load1_r gmx_simd_load1_f
	Set all elements in gmx_simd_real_t from single value in memory. More...

#define	gmx_simd_set1_r gmx_simd_set1_f
	Set all elements in gmx_simd_real_t from a scalar. More...

#define	gmx_simd_store_r gmx_simd_store_f
	Store GMX_SIMD_REAL_WIDTH values from gmx_simd_real_t to aligned memory. More...

#define	gmx_simd_loadu_r gmx_simd_loadu_f
	Load GMX_SIMD_REAL_WIDTH values from unaligned memory to gmx_simd_real_t. More...

#define	gmx_simd_storeu_r gmx_simd_storeu_f
	Store GMX_SIMD_REAL_WIDTH values from gmx_simd_real_t to unaligned memory. More...

#define	gmx_simd_setzero_r gmx_simd_setzero_f
	Set all elements in gmx_simd_real_t to 0.0. More...

SIMD load/store operations on gmx_simd_int32_t
Note Unaligned load/stores are only available when GMX_SIMD_HAVE_LOADU and GMX_SIMD_HAVE_STOREU are set, respectively.
#define	gmx_simd_load_i gmx_simd_load_fi
	Load GMX_SIMD_INT32_WIDTH values from aligned memory to gmx_simd_int32_t . More...

#define	gmx_simd_set1_i gmx_simd_set1_fi
	Set all elements in gmx_simd_int32_t from a single integer. More...

#define	gmx_simd_store_i gmx_simd_store_fi
	Store GMX_SIMD_REAL_WIDTH values from gmx_simd_int32_t to aligned memory. More...

#define	gmx_simd_loadu_i gmx_simd_loadu_fi
	Load GMX_SIMD_REAL_WIDTH values from unaligned memory to gmx_simd_int32_t. More...

#define	gmx_simd_storeu_i gmx_simd_storeu_fi
	Store GMX_SIMD_REAL_WIDTH values from gmx_simd_int32_t to unaligned memory. More...

#define	gmx_simd_extract_i gmx_simd_extract_fi
	Extract single integer from gmx_simd_int32_t element. More...

#define	gmx_simd_setzero_i gmx_simd_setzero_fi
	Set all elements in gmx_simd_int32_t to 0. More...

SIMD floating-point logical operations on gmx_simd_real_t
These instructions are available if GMX_SIMD_HAVE_LOGICAL is defined.
#define	gmx_simd_and_r gmx_simd_and_f
	Bitwise and on two gmx_simd_real_t. More...

#define	gmx_simd_andnot_r gmx_simd_andnot_f
	Bitwise and-not on two gmx_simd_real_t; 1st arg is complemented. More...

#define	gmx_simd_or_r gmx_simd_or_f
	Bitwise or on two gmx_simd_real_t. More...

#define	gmx_simd_xor_r gmx_simd_xor_f
	Bitwise exclusive-or on two gmx_simd_real_t. More...

SIMD floating-point arithmetic operations on gmx_simd_real_t
#define	gmx_simd_add_r gmx_simd_add_f
	SIMD a+b for two gmx_simd_real_t. More...

#define	gmx_simd_sub_r gmx_simd_sub_f
	SIMD a-b for two gmx_simd_real_t. More...

#define	gmx_simd_mul_r gmx_simd_mul_f
	SIMD a*b for two gmx_simd_real_t. More...

#define	gmx_simd_fmadd_r gmx_simd_fmadd_f
	SIMD a*b+c for three gmx_simd_real_t. More...

#define	gmx_simd_fmsub_r gmx_simd_fmsub_f
	SIMD a*b-c for three gmx_simd_real_t. More...

#define	gmx_simd_fnmadd_r gmx_simd_fnmadd_f
	SIMD -a*b+c for three gmx_simd_real_t. More...

#define	gmx_simd_fnmsub_r gmx_simd_fnmsub_f
	SIMD -a*b-c for three gmx_simd_real_t. More...

#define	gmx_simd_rsqrt_r gmx_simd_rsqrt_f
	SIMD table lookup for 1/sqrt(x) approximation. More...

#define	gmx_simd_rcp_r gmx_simd_rcp_f
	SIMD table lookup for 1/x approximation. More...

#define	gmx_simd_fabs_r gmx_simd_fabs_f
	SIMD fabs(x) for gmx_simd_real_t. More...

#define	gmx_simd_fneg_r gmx_simd_fneg_f
	SIMD -x for gmx_simd_real_t. More...

#define	gmx_simd_max_r gmx_simd_max_f
	SIMD max(a,b) for each element in gmx_simd_real_t. More...

#define	gmx_simd_min_r gmx_simd_min_f
	SIMD min(a,b) for each element in gmx_simd_real_t. More...

#define	gmx_simd_round_r gmx_simd_round_f
	Round gmx_simd_real_t to nearest int, return gmx_simd_real_t. More...

#define	gmx_simd_trunc_r gmx_simd_trunc_f
	Truncate gmx_simd_real_t towards 0, return gmx_simd_real_t. More...

#define	gmx_simd_fraction_r gmx_simd_fraction_f
	SIMD Fraction, i.e. x-trunc(x) for gmx_simd_real_t. More...

#define	gmx_simd_get_exponent_r gmx_simd_get_exponent_f
	Return the FP exponent of a SIMD gmx_simd_real_t as a gmx_simd_real_t. More...

#define	gmx_simd_get_mantissa_r gmx_simd_get_mantissa_f
	Return the FP mantissa of a SIMD gmx_simd_real_t as a gmx_simd_real_t. More...

#define	gmx_simd_set_exponent_r gmx_simd_set_exponent_f
	Set the exponent of a SIMD gmx_simd_real_t from a gmx_simd_real_t. More...

SIMD comparison, boolean, and select operations for gmx_simd_real_t
#define	gmx_simd_cmpeq_r gmx_simd_cmpeq_f
	SIMD a==b for gmx_simd_real_t. Returns a gmx_simd_bool_t. More...

#define	gmx_simd_cmplt_r gmx_simd_cmplt_f
	SIMD a<b for gmx_simd_real_t. Returns a gmx_simd_bool_t. More...

#define	gmx_simd_cmple_r gmx_simd_cmple_f
	SIMD a<=b for gmx_simd_real_t. Returns a gmx_simd_bool_t. More...

#define	gmx_simd_and_b gmx_simd_and_fb
	For each element, the result boolean is true if both arguments are true. More...

#define	gmx_simd_or_b gmx_simd_or_fb
	For each element, the result boolean is true if either argument is true. More...

#define	gmx_simd_anytrue_b gmx_simd_anytrue_fb
	Return nonzero if any element in gmx_simd_bool_t is true, otherwise 0. More...

#define	gmx_simd_blendzero_r gmx_simd_blendzero_f
	Selects elements from gmx_simd_real_t where boolean is true, otherwise 0. More...

#define	gmx_simd_blendnotzero_r gmx_simd_blendnotzero_f
	Selects elements from gmx_simd_real_t where boolean is false, otherwise 0. More...

#define	gmx_simd_blendv_r gmx_simd_blendv_f
	Selects from 2nd real SIMD arg where boolean is true, otherwise 1st arg. More...

#define	gmx_simd_reduce_r gmx_simd_reduce_f
	Return sum of all elements in SIMD floating-point variable. More...

SIMD integer logical operations on gmx_simd_int32_t
These instructions are available if GMX_SIMD_HAVE_INT32_LOGICAL is defined.
#define	gmx_simd_slli_i gmx_simd_slli_fi
	Shift each element in gmx_simd_int32_t left by immediate. More...

#define	gmx_simd_srli_i gmx_simd_srli_fi
	Shift each element in gmx_simd_int32_t right by immediate. More...

#define	gmx_simd_and_i gmx_simd_and_fi
	Bitwise and on two gmx_simd_int32_t. More...

#define	gmx_simd_andnot_i gmx_simd_andnot_fi
	Bitwise and-not on two gmx_simd_int32_t; 1st arg is complemented. More...

#define	gmx_simd_or_i gmx_simd_or_fi
	Bitwise or on two gmx_simd_int32_t. More...

#define	gmx_simd_xor_i gmx_simd_xor_fi
	Bitwise xor on two gmx_simd_int32_t. More...

SIMD integer arithmetic operations on gmx_simd_int32_t
These instructions are available if GMX_SIMD_HAVE_INT32_ARITHMETICS is defined.
#define	gmx_simd_add_i gmx_simd_add_fi
	SIMD a+b for two gmx_simd_int32_t. More...

#define	gmx_simd_sub_i gmx_simd_sub_fi
	SIMD a-b for two gmx_simd_int32_t. More...

#define	gmx_simd_mul_i gmx_simd_mul_fi
	SIMD a*b for two gmx_simd_int32_t. More...

SIMD integer comparison, booleans, and selection on gmx_simd_int32_t
These instructions are available if GMX_SIMD_HAVE_INT32_ARITHMETICS is defined.
#define	gmx_simd_cmpeq_i gmx_simd_cmpeq_fi
	Returns boolean describing whether a==b, for gmx_simd_int32_t. More...

#define	gmx_simd_cmplt_i gmx_simd_cmplt_fi
	Returns boolean describing whether a<b, for gmx_simd_int32_t. More...

#define	gmx_simd_and_ib gmx_simd_and_fib
	For each element, the result boolean is true if both arguments are true. More...

#define	gmx_simd_or_ib gmx_simd_or_fib
	For each element, the result boolean is true if either argument is true. More...

#define	gmx_simd_anytrue_ib gmx_simd_anytrue_fib
	Return nonzero if any element in gmx_simd_ibool_t is true, otherwise 0. More...

#define	gmx_simd_blendzero_i gmx_simd_blendzero_fi
	Selects elements from gmx_simd_int32_t where boolean is true, otherwise 0. More...

#define	gmx_simd_blendnotzero_i gmx_simd_blendnotzero_fi
	Selects elements from gmx_simd_int32_t where boolean is false, otherwise 0. More...

#define	gmx_simd_blendv_i gmx_simd_blendv_fi
	Selects from 2nd int SIMD arg where boolean is true, otherwise 1st arg. More...

SIMD conversion operations
These instructions are available when both types involved in the conversion are defined, e.g. GMX_SIMD_HAVE_REAL and GMX_SIMD_HAVE_INT32 for real-to-integer conversion.
#define	gmx_simd_cvt_r2i gmx_simd_cvt_f2i
	Convert gmx_simd_real_t to gmx_simd_int32_t, round to nearest integer. More...

#define	gmx_simd_cvtt_r2i gmx_simd_cvtt_f2i
	Convert gmx_simd_real_t to gmx_simd_int32_t, truncate towards zero. More...

#define	gmx_simd_cvt_i2r gmx_simd_cvt_i2f
	Convert gmx_simd_int32_t to gmx_simd_real_t. More...

#define	gmx_simd_cvt_b2ib gmx_simd_cvt_fb2fib
	Convert from gmx_simd_bool_t to gmx_simd_ibool_t. More...

#define	gmx_simd_cvt_ib2b gmx_simd_cvt_fib2fb
	Convert from gmx_simd_ibool_t to gmx_simd_bool_t. More...

SIMD4 - constant width-four SIMD datatypes
These operations are only meant to be used for a few coordinate manipulation and grid interpolation routines, so we only support a subset of operations for SIMD4. To avoid repeating all the documentation from the generic width SIMD routines, we only provide brief documentation for these operations. Follow the link to the implementation documentation or the reference to the corresponding generic SIMD routine. The format will be exactly the same, but they have SIMD replaced with SIMD4.
#define	gmx_simd4_real_t gmx_simd4_float_t
	SIMD real datatype guaranteed to be 4 elements wide, if available. More...

#define	gmx_simd4_bool_t gmx_simd4_fbool_t
	Boolean for gmx_simd4_real_t comparision/selection.

#define	gmx_simd4_load_r gmx_simd4_load_f
	Load aligned data to gmx_simd4_real_t. More...

#define	gmx_simd4_load1_r gmx_simd4_load1_f
	Load single element to gmx_simd4_real_t. More...

#define	gmx_simd4_set1_r gmx_simd4_set1_f
	Set gmx_simd4_real_t from scalar value. More...

#define	gmx_simd4_store_r gmx_simd4_store_f
	store aligned data from gmx_simd4_real_t More...

#define	gmx_simd4_loadu_r gmx_simd4_loadu_f
	Load unaligned data to gmx_simd4_real_t. More...

#define	gmx_simd4_storeu_r gmx_simd4_storeu_f
	Store unaligned data from gmx_simd4_real_t. More...

#define	gmx_simd4_setzero_r gmx_simd4_setzero_f
	Set all elements in gmx_simd4_real_t to 0.0. More...

#define	gmx_simd4_and_r gmx_simd4_and_f
	Bitwise and for two gmx_simd4_real_t. More...

#define	gmx_simd4_andnot_r gmx_simd4_andnot_f
	Bitwise and-not for two gmx_simd4_real_t. 1st arg is complemented. More...

#define	gmx_simd4_or_r gmx_simd4_or_f
	Bitwise or for two gmx_simd4_real_t. More...

#define	gmx_simd4_xor_r gmx_simd4_xor_f
	Bitwise xor for two gmx_simd4_real_t. More...

#define	gmx_simd4_add_r gmx_simd4_add_f
	a+b for gmx_simd4_real_t More...

#define	gmx_simd4_sub_r gmx_simd4_sub_f
	a-b for gmx_simd4_real_t More...

#define	gmx_simd4_mul_r gmx_simd4_mul_f
	a*b for gmx_simd4_real_t More...

#define	gmx_simd4_fmadd_r gmx_simd4_fmadd_f
	a*b+c for gmx_simd4_real_t More...

#define	gmx_simd4_fmsub_r gmx_simd4_fmsub_f
	a*b-c for gmx_simd4_real_t More...

#define	gmx_simd4_fnmadd_r gmx_simd4_fnmadd_f
	-a*b+c for gmx_simd4_real_t More...

#define	gmx_simd4_fnmsub_r gmx_simd4_fnmsub_f
	-a*b-c for gmx_simd4_real_t More...

#define	gmx_simd4_rsqrt_r gmx_simd4_rsqrt_f
	1/sqrt(x) approximate lookup for gmx_simd4_real_t More...

#define	gmx_simd4_fabs_r gmx_simd4_fabs_f
	fabs(x) for gmx_simd4_real_t More...

#define	gmx_simd4_fneg_r gmx_simd4_fneg_f
	Change sign (-x) for gmx_simd4_real_t. More...

#define	gmx_simd4_max_r gmx_simd4_max_f
	Select maximum of each pair of elements from args for gmx_simd4_real_t. More...

#define	gmx_simd4_min_r gmx_simd4_min_f
	Select minimum of each pair of elements from args for gmx_simd4_real_t. More...

#define	gmx_simd4_round_r gmx_simd4_round_f
	Round gmx_simd4_real_t to nearest integer, return gmx_simd4_real_t. More...

#define	gmx_simd4_trunc_r gmx_simd4_trunc_f
	Truncate gmx_simd4_real_t towards zero, return gmx_simd4_real_t. More...

#define	gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_f
	Scalar product of first three elements of two gmx_simd4_real_t *. More...

#define	gmx_simd4_cmpeq_r gmx_simd4_cmpeq_f
	Return booleans whether a==b for each element two gmx_simd4_real_t. More...

#define	gmx_simd4_cmplt_r gmx_simd4_cmplt_f
	Return booleans whether a<b for each element two gmx_simd4_real_t. More...

#define	gmx_simd4_cmple_r gmx_simd4_cmple_f
	Return booleans whether a<=b for each element two gmx_simd4_real_t. More...

#define	gmx_simd4_and_b gmx_simd4_and_fb
	Logical and for two gmx_simd4_bool_t. More...

#define	gmx_simd4_or_b gmx_simd4_or_fb
	Logical or for two gmx_simd4_bool_t. More...

#define	gmx_simd4_anytrue_b gmx_simd4_anytrue_fb
	Return nonzero if any element in gmx_simd4_bool_t is true, otherwise 0. More...

#define	gmx_simd4_blendzero_r gmx_simd4_blendzero_f
	Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg. More...

#define	gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_f
	Selects from 2nd real SIMD4 arg where boolean is false, otherwise 1st arg. More...

#define	gmx_simd4_blendv_r gmx_simd4_blendv_f
	Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg. More...

#define	gmx_simd4_reduce_r gmx_simd4_reduce_f
	Return sum of all elements in SIMD4 floating-point variable. More...

#define	gmx_simd4_align_r gmx_simd4_align_f
	Align real memory for SIMD4 usage. More...

Single precision SIMD math functions
Note In most cases you should use the real-precision functions instead.
static gmx_simd_float_t gmx_simdcall	gmx_simd_sum4_f (gmx_simd_float_t a, gmx_simd_float_t b, gmx_simd_float_t c, gmx_simd_float_t d)
	SIMD float utility to sum a+b+c+d. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_xor_sign_f (gmx_simd_float_t a, gmx_simd_float_t b)
	Return -a if b is negative, SIMD float. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_rsqrt_iter_f (gmx_simd_float_t lu, gmx_simd_float_t x)
	Perform one Newton-Raphson iteration to improve 1/sqrt(x) for SIMD float. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_invsqrt_f (gmx_simd_float_t x)
	Calculate 1/sqrt(x) for SIMD float. More...

static gmx_simd_float_t	gmx_simd_invsqrt_maskfpe_f (gmx_simd_float_t x, gmx_simd_fbool_t m)
	Calculate 1/sqrt(x) for masked entries of SIMD float. More...

static gmx_simd_float_t	gmx_simd_invsqrt_notmaskfpe_f (gmx_simd_float_t x, gmx_simd_fbool_t m)
	Calculate 1/sqrt(x) for non-masked entries of SIMD float. More...

static void gmx_simdcall	gmx_simd_invsqrt_pair_f (gmx_simd_float_t x0, gmx_simd_float_t x1, gmx_simd_float_t out0, gmx_simd_float_t out1)
	Calculate 1/sqrt(x) for two SIMD floats. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_rcp_iter_f (gmx_simd_float_t lu, gmx_simd_float_t x)
	Perform one Newton-Raphson iteration to improve 1/x for SIMD float. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_inv_f (gmx_simd_float_t x)
	Calculate 1/x for SIMD float. More...

static gmx_simd_float_t	gmx_simd_inv_maskfpe_f (gmx_simd_float_t x, gmx_simd_fbool_t m)
	Calculate 1/x for masked entries of SIMD float. More...

static gmx_simd_float_t	gmx_simd_inv_notmaskfpe_f (gmx_simd_float_t x, gmx_simd_fbool_t m)
	Calculate 1/x for non-masked entries of SIMD float. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_sqrt_f (gmx_simd_float_t x)
	Calculate sqrt(x) correctly for SIMD floats, including argument 0.0. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_log_f (gmx_simd_float_t x)
	SIMD float log(x). This is the natural logarithm. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_exp2_f (gmx_simd_float_t x)
	SIMD float 2^x. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_exp_f (gmx_simd_float_t x)
	SIMD float exp(x). More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_erf_f (gmx_simd_float_t x)
	SIMD float erf(x). More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_erfc_f (gmx_simd_float_t x)
	SIMD float erfc(x). More...

static void gmx_simdcall	gmx_simd_sincos_f (gmx_simd_float_t x, gmx_simd_float_t sinval, gmx_simd_float_t cosval)
	SIMD float sin & cos. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_sin_f (gmx_simd_float_t x)
	SIMD float sin(x). More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_cos_f (gmx_simd_float_t x)
	SIMD float cos(x). More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_tan_f (gmx_simd_float_t x)
	SIMD float tan(x). More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_asin_f (gmx_simd_float_t x)
	SIMD float asin(x). More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_acos_f (gmx_simd_float_t x)
	SIMD float acos(x). More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_atan_f (gmx_simd_float_t x)
	SIMD float asin(x). More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_atan2_f (gmx_simd_float_t y, gmx_simd_float_t x)
	SIMD float atan2(y,x). More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_pmecorrF_f (gmx_simd_float_t z2)
	Calculate the force correction due to PME analytically in SIMD float. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_pmecorrV_f (gmx_simd_float_t z2)
	Calculate the potential correction due to PME analytically in SIMD float. More...

Double precision SIMD math functions
Note In most cases you should use the real-precision functions instead.
static gmx_simd_double_t gmx_simdcall	gmx_simd_sum4_d (gmx_simd_double_t a, gmx_simd_double_t b, gmx_simd_double_t c, gmx_simd_double_t d)
	SIMD utility function to sum a+b+c+d for SIMD doubles. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_xor_sign_d (gmx_simd_double_t a, gmx_simd_double_t b)
	Return -a if b is negative, SIMD double. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_rsqrt_iter_d (gmx_simd_double_t lu, gmx_simd_double_t x)
	Perform one Newton-Raphson iteration to improve 1/sqrt(x) for SIMD double. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_invsqrt_d (gmx_simd_double_t x)
	Calculate 1/sqrt(x) for SIMD double. More...

static gmx_simd_double_t	gmx_simd_invsqrt_maskfpe_d (gmx_simd_double_t x, gmx_simd_dbool_t m)
	Calculate 1/sqrt(x) for masked entries of SIMD double. More...

static gmx_simd_double_t	gmx_simd_invsqrt_notmaskfpe_d (gmx_simd_double_t x, gmx_simd_dbool_t m)
	Calculate 1/sqrt(x) for non-masked entries of SIMD double. More...

static void gmx_simdcall	gmx_simd_invsqrt_pair_d (gmx_simd_double_t x0, gmx_simd_double_t x1, gmx_simd_double_t out0, gmx_simd_double_t out1)
	Calculate 1/sqrt(x) for two SIMD doubles. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_rcp_iter_d (gmx_simd_double_t lu, gmx_simd_double_t x)
	Perform one Newton-Raphson iteration to improve 1/x for SIMD double. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_inv_d (gmx_simd_double_t x)
	Calculate 1/x for SIMD double. More...

static gmx_simd_double_t	gmx_simd_inv_maskfpe_d (gmx_simd_double_t x, gmx_simd_dbool_t m)
	Calculate 1/x for masked entries of SIMD double. More...

static gmx_simd_double_t	gmx_simd_inv_notmaskfpe_d (gmx_simd_double_t x, gmx_simd_dbool_t m)
	Calculate 1/x for non-masked entries of SIMD double. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_sqrt_d (gmx_simd_double_t x)
	Calculate sqrt(x) correctly for SIMD doubles, including argument 0.0. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_log_d (gmx_simd_double_t x)
	SIMD double log(x). This is the natural logarithm. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_exp2_d (gmx_simd_double_t x)
	SIMD double 2^x. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_exp_d (gmx_simd_double_t x)
	SIMD double exp(x). More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_erf_d (gmx_simd_double_t x)
	SIMD double erf(x). More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_erfc_d (gmx_simd_double_t x)
	SIMD double erfc(x). More...

static void gmx_simdcall	gmx_simd_sincos_d (gmx_simd_double_t x, gmx_simd_double_t sinval, gmx_simd_double_t cosval)
	SIMD double sin & cos. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_sin_d (gmx_simd_double_t x)
	SIMD double sin(x). More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_cos_d (gmx_simd_double_t x)
	SIMD double cos(x). More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_tan_d (gmx_simd_double_t x)
	SIMD double tan(x). More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_asin_d (gmx_simd_double_t x)
	SIMD double asin(x). More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_acos_d (gmx_simd_double_t x)
	SIMD double acos(x). More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_atan_d (gmx_simd_double_t x)
	SIMD double atan(x). More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_atan2_d (gmx_simd_double_t y, gmx_simd_double_t x)
	SIMD double atan2(y,x). More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_pmecorrF_d (gmx_simd_double_t z2)
	Calculate the force correction due to PME analytically for SIMD double. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_pmecorrV_d (gmx_simd_double_t z2)
	Calculate the potential correction due to PME analytically for SIMD double. More...

SIMD math functions for double prec. data, single prec. accuracy
Note In some cases we do not need full double accuracy of individual SIMD math functions, although the data is stored in double precision SIMD registers. This might be the case for special algorithms, or if the architecture does not support single precision. Since the full double precision evaluation of math functions typically require much more expensive polynomial approximations these functions implement the algorithms used in the single precision SIMD math functions, but they operate on double precision SIMD variables. You should normally not use these functions directly, but the real-precision wrappers instead. When Gromacs is compiled in single precision, those will be aliases to the normal single precision SIMD math functions.
static gmx_simd_double_t gmx_simdcall	gmx_simd_invsqrt_singleaccuracy_d (gmx_simd_double_t x)
	Calculate 1/sqrt(x) for SIMD double, but in single accuracy. More...

static gmx_simd_double_t	gmx_simd_invsqrt_maskfpe_singleaccuracy_d (gmx_simd_double_t x, gmx_simd_dbool_t m)
	1/sqrt(x) for masked entries of SIMD double, but in single accuracy. More...

static gmx_simd_double_t	gmx_simd_invsqrt_notmaskfpe_singleaccuracy_d (gmx_simd_double_t x, gmx_simd_dbool_t m)
	1/sqrt(x) for non-masked entries of SIMD double, in single accuracy. More...

static void gmx_simdcall	gmx_simd_invsqrt_pair_singleaccuracy_d (gmx_simd_double_t x0, gmx_simd_double_t x1, gmx_simd_double_t out0, gmx_simd_double_t out1)
	Calculate 1/sqrt(x) for two SIMD doubles, but single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_inv_singleaccuracy_d (gmx_simd_double_t x)
	Calculate 1/x for SIMD double, but in single accuracy. More...

static gmx_simd_double_t	gmx_simd_inv_maskfpe_singleaccuracy_d (gmx_simd_double_t x, gmx_simd_dbool_t m)
	1/x for masked entries of SIMD double, single accuracy. More...

static gmx_simd_double_t	gmx_simd_inv_notmaskfpe_singleaccuracy_d (gmx_simd_double_t x, gmx_simd_dbool_t m)
	1/x for non-masked entries of SIMD double, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_sqrt_singleaccuracy_d (gmx_simd_double_t x)
	Calculate sqrt(x) (correct for 0.0) for SIMD double, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_log_singleaccuracy_d (gmx_simd_double_t x)
	SIMD log(x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_exp2_singleaccuracy_d (gmx_simd_double_t x)
	SIMD 2^x. Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_exp_singleaccuracy_d (gmx_simd_double_t x)
	SIMD exp(x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_erf_singleaccuracy_d (gmx_simd_double_t x)
	SIMD erf(x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_erfc_singleaccuracy_d (gmx_simd_double_t x)
	SIMD erfc(x). Double precision SIMD data, single accuracy. More...

static void gmx_simdcall	gmx_simd_sincos_singleaccuracy_d (gmx_simd_double_t x, gmx_simd_double_t sinval, gmx_simd_double_t cosval)
	SIMD sin & cos. Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_sin_singleaccuracy_d (gmx_simd_double_t x)
	SIMD sin(x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_cos_singleaccuracy_d (gmx_simd_double_t x)
	SIMD cos(x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_tan_singleaccuracy_d (gmx_simd_double_t x)
	SIMD tan(x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_asin_singleaccuracy_d (gmx_simd_double_t x)
	SIMD asin(x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_acos_singleaccuracy_d (gmx_simd_double_t x)
	SIMD acos(x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_atan_singleaccuracy_d (gmx_simd_double_t x)
	SIMD asin(x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_atan2_singleaccuracy_d (gmx_simd_double_t y, gmx_simd_double_t x)
	SIMD atan2(y,x). Double precision SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_pmecorrF_singleaccuracy_d (gmx_simd_double_t z2)
	Analytical PME force correction, double SIMD data, single accuracy. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_pmecorrV_singleaccuracy_d (gmx_simd_double_t z2)
	Analytical PME potential correction, double SIMD data, single accuracy. More...

SIMD4 math functions
static gmx_simd_float_t gmx_simdcall	gmx_simd4_sum4_f (gmx_simd_float_t a, gmx_simd_float_t b, gmx_simd_float_t c, gmx_simd_float_t d)
	SIMD4 utility function to sum a+b+c+d for SIMD4 floats. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd4_rsqrt_iter_f (gmx_simd_float_t lu, gmx_simd_float_t x)
	Perform one Newton-Raphson iteration to improve 1/sqrt(x) for SIMD4 float. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd4_invsqrt_f (gmx_simd_float_t x)
	Calculate 1/sqrt(x) for SIMD4 float. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd4_sum4_d (gmx_simd_double_t a, gmx_simd_double_t b, gmx_simd_double_t c, gmx_simd_double_t d)
	SIMD4 utility function to sum a+b+c+d for SIMD4 doubles. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd4_rsqrt_iter_d (gmx_simd_double_t lu, gmx_simd_double_t x)
	Perform one Newton-Raphson iteration to improve 1/sqrt(x) for SIMD4 double. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd4_invsqrt_d (gmx_simd_double_t x)
	Calculate 1/sqrt(x) for SIMD4 double. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd4_invsqrt_singleaccuracy_d (gmx_simd_double_t x)
	Calculate 1/sqrt(x) for SIMD4 double, but in single accuracy. More...

#define	gmx_simd4_sum4_r gmx_simd4_sum4_f
	SIMD4 utility function to sum a+b+c+d for SIMD4 reals. More...

#define	gmx_simd4_invsqrt_r gmx_simd4_invsqrt_f
	Calculate 1/sqrt(x) for SIMD4 real. More...

#define	gmx_simd4_invsqrt_singleaccuracy_r gmx_simd4_invsqrt_f
	1/sqrt(x) for SIMD4 real. Single accuracy, even for double prec. More...

Real-precision SIMD math functions
These are the ones you should typically call in Gromacs.
#define	gmx_simd_sum4_r gmx_simd_sum4_f
	SIMD utility function to sum a+b+c+d for SIMD reals. More...

#define	gmx_simd_xor_sign_r gmx_simd_xor_sign_f
	Return -a if b is negative, SIMD real. More...

#define	gmx_simd_invsqrt_r gmx_simd_invsqrt_f
	Calculate 1/sqrt(x) for SIMD real. More...

#define	gmx_simd_invsqrt_pair_r gmx_simd_invsqrt_pair_f
	Calculate 1/sqrt(x) for two SIMD reals. More...

#define	gmx_simd_sqrt_r gmx_simd_sqrt_f
	Calculate sqrt(x) correctly for SIMD real, including argument 0.0. More...

#define	gmx_simd_inv_r gmx_simd_inv_f
	Calculate 1/x for SIMD real. More...

#define	gmx_simd_log_r gmx_simd_log_f
	SIMD real log(x). This is the natural logarithm. More...

#define	gmx_simd_exp2_r gmx_simd_exp2_f
	SIMD real 2^x. More...

#define	gmx_simd_exp_r gmx_simd_exp_f
	SIMD real e^x. More...

#define	gmx_simd_erf_r gmx_simd_erf_f
	SIMD real erf(x). More...

#define	gmx_simd_erfc_r gmx_simd_erfc_f
	SIMD real erfc(x). More...

#define	gmx_simd_sincos_r gmx_simd_sincos_f
	SIMD real sin & cos. More...

#define	gmx_simd_sin_r gmx_simd_sin_f
	SIMD real sin(x). More...

#define	gmx_simd_cos_r gmx_simd_cos_f
	SIMD real cos(x). More...

#define	gmx_simd_tan_r gmx_simd_tan_f
	SIMD real tan(x). More...

#define	gmx_simd_asin_r gmx_simd_asin_f
	SIMD real asin(x). More...

#define	gmx_simd_acos_r gmx_simd_acos_f
	SIMD real acos(x). More...

#define	gmx_simd_atan_r gmx_simd_atan_f
	SIMD real atan(x). More...

#define	gmx_simd_atan2_r gmx_simd_atan2_f
	SIMD real atan2(y,x). More...

#define	gmx_simd_pmecorrF_r gmx_simd_pmecorrF_f
	SIMD Analytic PME force correction. More...

#define	gmx_simd_pmecorrV_r gmx_simd_pmecorrV_f
	SIMD Analytic PME potential correction. More...

#define	gmx_simd_invsqrt_singleaccuracy_r gmx_simd_invsqrt_f
	Calculate 1/sqrt(x) for SIMD, only targeting single accuracy. More...

#define	gmx_simd_invsqrt_pair_singleaccuracy_r gmx_simd_invsqrt_pair_f
	Calculate 1/sqrt(x) for SIMD pair, only targeting single accuracy. More...

#define	gmx_simd_sqrt_singleaccuracy_r gmx_simd_sqrt_f
	Calculate sqrt(x), only targeting single accuracy. More...

#define	gmx_simd_inv_singleaccuracy_r gmx_simd_inv_f
	Calculate 1/x for SIMD real, only targeting single accuracy. More...

#define	gmx_simd_log_singleaccuracy_r gmx_simd_log_f
	SIMD real log(x), only targeting single accuracy. More...

#define	gmx_simd_exp2_singleaccuracy_r gmx_simd_exp2_f
	SIMD real 2^x, only targeting single accuracy. More...

#define	gmx_simd_exp_singleaccuracy_r gmx_simd_exp_f
	SIMD real e^x, only targeting single accuracy. More...

#define	gmx_simd_erf_singleaccuracy_r gmx_simd_erf_f
	SIMD real erf(x), only targeting single accuracy. More...

#define	gmx_simd_erfc_singleaccuracy_r gmx_simd_erfc_f
	SIMD real erfc(x), only targeting single accuracy. More...

#define	gmx_simd_sincos_singleaccuracy_r gmx_simd_sincos_f
	SIMD real sin & cos, only targeting single accuracy. More...

#define	gmx_simd_sin_singleaccuracy_r gmx_simd_sin_f
	SIMD real sin(x), only targeting single accuracy. More...

#define	gmx_simd_cos_singleaccuracy_r gmx_simd_cos_f
	SIMD real cos(x), only targeting single accuracy. More...

#define	gmx_simd_tan_singleaccuracy_r gmx_simd_tan_f
	SIMD real tan(x), only targeting single accuracy. More...

#define	gmx_simd_asin_singleaccuracy_r gmx_simd_asin_f
	SIMD real asin(x), only targeting single accuracy. More...

#define	gmx_simd_acos_singleaccuracy_r gmx_simd_acos_f
	SIMD real acos(x), only targeting single accuracy. More...

#define	gmx_simd_atan_singleaccuracy_r gmx_simd_atan_f
	SIMD real atan(x), only targeting single accuracy. More...

#define	gmx_simd_atan2_singleaccuracy_r gmx_simd_atan2_f
	SIMD real atan2(y,x), only targeting single accuracy. More...

#define	gmx_simd_pmecorrF_singleaccuracy_r gmx_simd_pmecorrF_f
	SIMD Analytic PME force corr., only targeting single accuracy. More...

#define	gmx_simd_pmecorrV_singleaccuracy_r gmx_simd_pmecorrV_f
	SIMD Analytic PME potential corr., only targeting single accuracy. More...

Classes
struct	gmx_simd_float_t
	Float SIMD variable. Supported with GMX_SIMD_HAVE_FLOAT. More...

struct	gmx_simd_double_t
	Floating-point SIMD variable type in double precision. More...

struct	gmx_simd_fint32_t
	Integer SIMD variable type to use for conversions to/from float. More...

struct	gmx_simd_dint32_t
	Integer SIMD variable type to use for conversions to/from double. More...

struct	gmx_simd_fbool_t
	Boolean type for float SIMD data. More...

struct	gmx_simd_dbool_t
	Boolean type for double precision SIMD data. More...

struct	gmx_simd_fibool_t
	Boolean type for integer datatypes corresponding to float SIMD. More...

struct	gmx_simd_dibool_t
	Boolean type for integer datatypes corresponding to double SIMD. More...

Macros
#define	gmx_simd_calc_rsq_f gmx_simd_norm2_f
	Calculating r^2 is the same as evaluating the norm of dx*dx. More...

#define	gmx_simd_calc_rsq_d gmx_simd_norm2_d
	Calculating r^2 is the same as evaluating the norm of dx*dx. More...

#define	gmx_simd4_calc_rsq_f gmx_simd4_norm2_f
	Calculating r^2 is the same as evaluating the norm of dx*dx. More...

#define	gmx_simd4_calc_rsq_d gmx_simd4_norm2_d
	Calculating r^2 is the same as evaluating the norm of dx*dx. More...

#define	gmx_simd_iprod_r gmx_simd_iprod_f
	SIMD real inner product of multiple real vectors. More...

#define	gmx_simd_norm2_r gmx_simd_norm2_f
	SIMD real norm squared of multiple real vectors. More...

#define	gmx_simd_calc_rsq_r gmx_simd_calc_rsq_f
	Calculating r^2 is the same as evaluating the norm of dx*dx. More...

#define	gmx_simd_cprod_r gmx_simd_cprod_f
	SIMD real cross-product of multiple real vectors. More...

#define	gmx_simd4_norm2_r gmx_simd4_norm2_f
	SIMD4 real norm squared of multiple vectors. More...

#define	gmx_simd4_calc_rsq_r gmx_simd4_calc_rsq_f
	Calculating r^2 is the same as evaluating the norm of dx*dx. More...

Functions
static gmx_simd_float_t gmx_simdcall	gmx_simd_iprod_f (gmx_simd_float_t ax, gmx_simd_float_t ay, gmx_simd_float_t az, gmx_simd_float_t bx, gmx_simd_float_t by, gmx_simd_float_t bz)
	SIMD float inner product of multiple float vectors. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd_norm2_f (gmx_simd_float_t ax, gmx_simd_float_t ay, gmx_simd_float_t az)
	SIMD float norm squared of multiple vectors. More...

static void gmx_simdcall	gmx_simd_cprod_f (gmx_simd_float_t ax, gmx_simd_float_t ay, gmx_simd_float_t az, gmx_simd_float_t bx, gmx_simd_float_t by, gmx_simd_float_t bz, gmx_simd_float_t cx, gmx_simd_float_t cy, gmx_simd_float_t *cz)
	SIMD float cross-product of multiple vectors. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_iprod_d (gmx_simd_double_t ax, gmx_simd_double_t ay, gmx_simd_double_t az, gmx_simd_double_t bx, gmx_simd_double_t by, gmx_simd_double_t bz)
	SIMD double inner product of multiple double vectors. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd_norm2_d (gmx_simd_double_t ax, gmx_simd_double_t ay, gmx_simd_double_t az)
	SIMD double norm squared of multiple vectors. More...

static void gmx_simdcall	gmx_simd_cprod_d (gmx_simd_double_t ax, gmx_simd_double_t ay, gmx_simd_double_t az, gmx_simd_double_t bx, gmx_simd_double_t by, gmx_simd_double_t bz, gmx_simd_double_t cx, gmx_simd_double_t cy, gmx_simd_double_t *cz)
	SIMD double cross-product of multiple vectors. More...

static gmx_simd_float_t gmx_simdcall	gmx_simd4_norm2_f (gmx_simd_float_t ax, gmx_simd_float_t ay, gmx_simd_float_t az)
	SIMD4 float inner product of four float vectors. More...

static gmx_simd_double_t gmx_simdcall	gmx_simd4_norm2_d (gmx_simd_double_t ax, gmx_simd_double_t ay, gmx_simd_double_t az)
	SIMD4 double norm squared of multiple vectors. More...

Directories
directory	simd
	SIMD intrinsics interface (simd)

directory	tests
	Unit tests for SIMD intrinsics interface (simd).

Files
file	impl_reference.h
	Reference SIMD implementation, including SIMD documentation.

file	simd.h
	Definitions, capabilities, and wrappers for SIMD module.

file	simd_math.h
	Math functions for SIMD datatypes.

file	vector_operations.h
	SIMD operations corresponding to Gromacs rvec datatypes.

Macro Definition Documentation

#define GMX_SIMD

GMX_SIMD indicates that some sort of SIMD support is present in software.

It is disabled if no architecture, neither reference SIMD, has been selected.

#define gmx_simd4_add_d gmx_simd_add_d

Add two SIMD4 double values.

You should typically call the real-precision gmx_simd_add_r.

Parameters

a	term1
b	term2

Returns: a+b

#define gmx_simd4_add_f gmx_simd_add_f

Add two SIMD4 float variables.

You should typically call the real-precision gmx_simd_add_r.

Parameters

a	term1
b	term2

Returns: a+b

#define gmx_simd4_add_r gmx_simd4_add_f

a+b for gmx_simd4_real_t

You should typically call the real-precision gmx_simd_add_r.

Parameters

a	term1
b	term2

Returns: a+b

#define gmx_simd4_align_r gmx_simd4_align_f

Align real memory for SIMD4 usage.

You should typically not call this function directly (unless you explicitly want single precision even when GMX_DOUBLE is set), but use the gmx_simd4_align_r macro to align memory in default Gromacs real precision.

Parameters

p	Pointer to memory, allocate at least GMX_SIMD4_WIDTH extra elements.

Returns: Aligned pointer (>=p) suitable for loading/storing float SIMD. If GMX_SIMD4_HAVE_FLOAT is not set, p will be returned unchanged.

This routine provides aligned memory for usage with gmx_simd4_float_t. should have allocated an extra GMX_SIMD4_WIDTH * sizeof(float) bytes.

#define gmx_simd4_and_b gmx_simd4_and_fb

Logical and for two gmx_simd4_bool_t.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a & b are true.

Note: This is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_and_ib

#define gmx_simd4_and_d gmx_simd_and_d

Bitwise and for two SIMD4 double variables.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	data1
b	data2

Returns: data1 & data2

#define gmx_simd4_and_db gmx_simd_and_db

Logical AND on double SIMD4 booleans.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a & b are true.

Note: This is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_and_ib

#define gmx_simd4_and_f gmx_simd_and_f

Bitwise and for two SIMD4 float variables.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	data1
b	data2

Returns: data1 & data2

#define gmx_simd4_and_fb gmx_simd_and_fb

Logical AND on float SIMD4 booleans.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a & b are true.

Note: This is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_and_ib

#define gmx_simd4_and_r gmx_simd4_and_f

Bitwise and for two gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	data1
b	data2

Returns: data1 & data2

#define gmx_simd4_andnot_d gmx_simd_andnot_d

Bitwise andnot for SIMD4 double. c=(~a) & b.

You should typically call the real-precision gmx_simd_andnot_r.

Parameters

a	data1
b	data2

Returns: (~data1) & data2

#define gmx_simd4_andnot_f gmx_simd_andnot_f

Bitwise andnot for two SIMD4 float variables. c=(~a) & b.

You should typically call the real-precision gmx_simd_andnot_r.

Parameters

a	data1
b	data2

Returns: (~data1) & data2

#define gmx_simd4_andnot_r gmx_simd4_andnot_f

Bitwise and-not for two gmx_simd4_real_t. 1st arg is complemented.

You should typically call the real-precision gmx_simd_andnot_r.

Parameters

a	data1
b	data2

Returns: (~data1) & data2

#define gmx_simd4_anytrue_b gmx_simd4_anytrue_fb

Return nonzero if any element in gmx_simd4_bool_t is true, otherwise 0.

You should typically call the real-precision gmx_simd_anytrue_b.

Parameters

a	Logical variable.

Returns: non-zero if any element in a is true, otherwise 0.

The actual return value for truth will depend on the architecture, so any non-zero value is considered truth.

#define gmx_simd4_anytrue_db gmx_simd_anytrue_db

Returns non-zero if any of the SIMD4 booleans in x is True.

You should typically call the real-precision gmx_simd_anytrue_b.

Parameters

a	Logical variable.

Returns: non-zero if any element in a is true, otherwise 0.

The actual return value for truth will depend on the architecture, so any non-zero value is considered truth.

#define gmx_simd4_anytrue_fb gmx_simd_anytrue_fb

Returns non-zero if any of the SIMD4 boolean in x is True.

You should typically call the real-precision gmx_simd_anytrue_b.

Parameters

a	Logical variable.

Returns: non-zero if any element in a is true, otherwise 0.

The actual return value for truth will depend on the architecture, so any non-zero value is considered truth.

#define gmx_simd4_blendnotzero_d gmx_simd_blendnotzero_d

Select from double precision SIMD4 variable where boolean is false.

You should typically call the real-precision gmx_simd_blendnotzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for false, 0 for true (sic).

#define gmx_simd4_blendnotzero_f gmx_simd_blendnotzero_f

Select from single precision SIMD4 variable where boolean is false.

You should typically call the real-precision gmx_simd_blendnotzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for false, 0 for true (sic).

#define gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_f

Selects from 2nd real SIMD4 arg where boolean is false, otherwise 1st arg.

You should typically call the real-precision gmx_simd_blendnotzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for false, 0 for true (sic).

#define gmx_simd4_blendv_d gmx_simd_blendv_d

Vector-blend instruction for SIMD4 double.

You should typically call the real-precision gmx_simd_blendv_r.

Parameters

a	First source
b	Second source
sel	Boolean selector

Returns: For each element, select b if sel is true, a otherwise.

#define gmx_simd4_blendv_f gmx_simd_blendv_f

Vector-blend instruction form SIMD4 float.

You should typically call the real-precision gmx_simd_blendv_r.

Parameters

a	First source
b	Second source
sel	Boolean selector

Returns: For each element, select b if sel is true, a otherwise.

#define gmx_simd4_blendv_r gmx_simd4_blendv_f

Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg.

You should typically call the real-precision gmx_simd_blendv_r.

Parameters

a	First source
b	Second source
sel	Boolean selector

Returns: For each element, select b if sel is true, a otherwise.

#define gmx_simd4_blendzero_d gmx_simd_blendzero_d

Select from double precision SIMD4 variable where boolean is true.

You should typically call the real-precision gmx_simd_blendzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for true, 0 for false.

#define gmx_simd4_blendzero_f gmx_simd_blendzero_f

Select from single precision SIMD4 variable where boolean is true.

You should typically call the real-precision gmx_simd_blendzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for true, 0 for false.

#define gmx_simd4_blendzero_r gmx_simd4_blendzero_f

Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg.

You should typically call the real-precision gmx_simd_blendzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for true, 0 for false.

#define gmx_simd4_calc_rsq_d gmx_simd4_norm2_d

Calculating r^2 is the same as evaluating the norm of dx*dx.

For details, see gmx_simd4_norm2_d.

#define gmx_simd4_calc_rsq_f gmx_simd4_norm2_f

Calculating r^2 is the same as evaluating the norm of dx*dx.

For details, see gmx_simd4_norm2_f

#define gmx_simd4_calc_rsq_r gmx_simd4_calc_rsq_f

Calculating r^2 is the same as evaluating the norm of dx*dx.

This will call gmx_simd4_calc_rsq_d if GMX_DOUBLE is defined, otherwise gmx_simd4_calc_rsq_f.

For details, see gmx_simd4_norm2_f

#define gmx_simd4_cmpeq_d gmx_simd_cmpeq_d

Equality comparison of two double precision SIMD4 values.

You should typically call the real-precision gmx_simd_cmpeq_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a==b.

Beware that exact floating-point comparisons are difficult.

#define gmx_simd4_cmpeq_f gmx_simd_cmpeq_f

Equality comparison of two single precision SIMD4.

You should typically call the real-precision gmx_simd_cmpeq_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a==b.

Beware that exact floating-point comparisons are difficult.

#define gmx_simd4_cmpeq_r gmx_simd4_cmpeq_f

Return booleans whether a==b for each element two gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_cmpeq_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a==b.

Beware that exact floating-point comparisons are difficult.

#define gmx_simd4_cmple_d gmx_simd_cmple_d

Less-than comparison of two double precision SIMD4 values.

You should typically call the real-precision gmx_simd_cmple_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<=b.

#define gmx_simd4_cmple_f gmx_simd_cmple_f

Less-than comparison of two single precision SIMD4.

You should typically call the real-precision gmx_simd_cmple_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<=b.

#define gmx_simd4_cmple_r gmx_simd4_cmple_f

Return booleans whether a<=b for each element two gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_cmple_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<=b.

#define gmx_simd4_cmplt_d gmx_simd_cmplt_d

Less-than comparison of two double precision SIMD4 values.

You should typically call the real-precision gmx_simd_cmplt_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<b.

#define gmx_simd4_cmplt_f gmx_simd_cmplt_f

Less-than comparison of two single precision SIMD4.

You should typically call the real-precision gmx_simd_cmplt_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<b.

#define gmx_simd4_cmplt_r gmx_simd4_cmplt_f

Return booleans whether a<b for each element two gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_cmplt_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<b.

#define gmx_simd4_dbool_t gmx_simd_dbool_t

SIMD4 variable type to use for logical comparisons on doubles.

Use the generic gmx_simd_bool_t (for gmx_simd_real_t) instead, unless you really know what you are doing.

#define gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_f

Scalar product of first three elements of two gmx_simd4_real_t *.

The dot product is calculated between the first three elements in the two vectors, while the fourth is ignored. The result is returned as a scalar.

Parameters

a	vector1
b	vector2

Returns: a[0]*b[0]+a[1]*b[1]+a[2]*b[2], returned as scalar. Last element is ignored.

#define gmx_simd4_double_t gmx_simd_double_t

SIMD4 double type. Available with GMX_SIMD4_HAVE_DOUBLE.

Unless you specifically want a double-precision type you should check gmx_simd4_real_t instead.

While the SIMD4 datatype is identical to the normal SIMD type in the reference implementation, this will often not be the case for other architectures.

#define gmx_simd4_fabs_d gmx_simd_fabs_d

SIMD4 double Floating-point fabs().

You should typically call the real-precision gmx_simd_fabs_r.

Parameters

a	any floating point values

Returns: fabs(a) for each element.

#define gmx_simd4_fabs_f gmx_simd_fabs_f

Floating-point absolute value for SIMD4 float.

You should typically call the real-precision gmx_simd_fabs_r.

Parameters

a	any floating point values

Returns: fabs(a) for each element.

#define gmx_simd4_fabs_r gmx_simd4_fabs_f

fabs(x) for gmx_simd4_real_t

You should typically call the real-precision gmx_simd_fabs_r.

Parameters

a	any floating point values

Returns: fabs(a) for each element.

#define gmx_simd4_fbool_t gmx_simd_fbool_t

SIMD4 variable type to use for logical comparisons on floats.

You should likely use gmx_simd_bool_t (for gmx_simd_real_t) instead, unless you really know what you are doing.

#define gmx_simd4_float_t gmx_simd_float_t

SIMD4 float type. Available with GMX_SIMD4_HAVE_FLOAT.

Unless you specifically want a single-precision type you should check gmx_simd4_real_t instead.

While the SIMD4 datatype is identical to the normal SIMD type in the reference implementation, this will often not be the case for other architectures.

#define gmx_simd4_fmadd_d gmx_simd_fmadd_d

Fused-multiply-add for SIMD4 double. Result is a*b+c.

You should typically call the real-precision gmx_simd_fmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fmadd_f gmx_simd_fmadd_f

Fused-multiply-add for SIMD4 float. Result is a*b+c.

You should typically call the real-precision gmx_simd_fmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fmadd_r gmx_simd4_fmadd_f

a*b+c for gmx_simd4_real_t

You should typically call the real-precision gmx_simd_fmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fmsub_d gmx_simd_fmsub_d

Fused-multiply-subtract for SIMD4 double. Result is a*b-c.

You should typically call the real-precision gmx_simd_fmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fmsub_f gmx_simd_fmsub_f

Fused-multiply-subtract for SIMD4 float. Result is a*b-c.

You should typically call the real-precision gmx_simd_fmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fmsub_r gmx_simd4_fmsub_f

a*b-c for gmx_simd4_real_t

You should typically call the real-precision gmx_simd_fmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fneg_d gmx_simd_fneg_d

SIMD4 double floating-point negate.

You should typically call the real-precision gmx_simd_fneg_r.

Parameters

a	Any floating-point value

Returns: -a

#define gmx_simd4_fneg_f gmx_simd_fneg_f

Floating-point negate for SIMD4 float.

You should typically call the real-precision gmx_simd_fneg_r.

Parameters

a	Any floating-point value

Returns: -a

#define gmx_simd4_fneg_r gmx_simd4_fneg_f

Change sign (-x) for gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_fneg_r.

Parameters

a	Any floating-point value

Returns: -a

#define gmx_simd4_fnmadd_d gmx_simd_fnmadd_d

Fused-negated-multiply-add for SIMD4 double. Result is -a*b+c.

You should typically call the real-precision gmx_simd_fnmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fnmadd_f gmx_simd_fnmadd_f

Fused-negated-multiply-add for SIMD4 float. Result is -a*b+c.

You should typically call the real-precision gmx_simd_fnmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fnmadd_r gmx_simd4_fnmadd_f

-a*b+c for gmx_simd4_real_t

You should typically call the real-precision gmx_simd_fnmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fnmsub_d gmx_simd_fnmsub_d

Fused-negated-multiply-sub for SIMD4 double. Result is -a*b-c.

You should typically call the real-precision gmx_simd_fnmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fnmsub_f gmx_simd_fnmsub_f

Fused-negated-multiply-add for SIMD4 float. Result is -a*b-c.

You should typically call the real-precision gmx_simd_fnmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd4_fnmsub_r gmx_simd4_fnmsub_f

-a*b-c for gmx_simd4_real_t

You should typically call the real-precision gmx_simd_fnmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b-c

For some implementations you save an instruction if you assign the result to c.

#define GMX_SIMD4_HAVE_REAL

Defined if gmx_simd4_real_t is available.

if GMX_DOUBLE is defined, this will be aliased to GMX_SIMD4_HAVE_DOUBLE, otherwise GMX_SIMD4_HAVE_FLOAT.

#define gmx_simd4_invsqrt_r gmx_simd4_invsqrt_f

Calculate 1/sqrt(x) for SIMD4 real.

You should normally call the real-precision routine gmx_simd_invsqrt_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

#define gmx_simd4_invsqrt_singleaccuracy_r gmx_simd4_invsqrt_f

1/sqrt(x) for SIMD4 real. Single accuracy, even for double prec.

You should normally call the real-precision routine gmx_simd_invsqrt_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd4_load1_d gmx_simd_load1_d

Double precision SIMD4 load single value to all elements.

Parameters

m	Pointer to single value in memory.

Returns: SIMD variable with all elements set to *m.

#define gmx_simd4_load1_f gmx_simd_load1_f

Set all elements of SIMD4 float from single pointer.

Parameters

m	Pointer to single value in memory.

Returns: SIMD variable with all elements set to *m.

#define gmx_simd4_load1_r gmx_simd4_load1_f

Load single element to gmx_simd4_real_t.

Parameters

m	Pointer to single value in memory.

Returns: SIMD variable with all elements set to *m.

#define gmx_simd4_load_d gmx_simd_load_d

Double precision SIMD4 load aligned.

Parameters

m	Pointer to memory aligned to the SIMD width.

Returns: SIMD variable with data loaded.

#define gmx_simd4_load_f gmx_simd_load_f

Load SIMD4 float from aligned memory.

Parameters

m	Pointer to memory aligned to the SIMD width.

Returns: SIMD variable with data loaded.

#define gmx_simd4_load_r gmx_simd4_load_f

Load aligned data to gmx_simd4_real_t.

Parameters

m	Pointer to memory aligned to the SIMD width.

Returns: SIMD variable with data loaded.

#define gmx_simd4_loadu_d gmx_simd_loadu_d

Load unaligned SIMD4 double.

Available with GMX_SIMD_HAVE_LOADU.

Parameters

m	Pointer to memory, no alignment requirement.

Returns: SIMD variable with data loaded.

#define gmx_simd4_loadu_f gmx_simd_loadu_f

Load SIMD4 float from unaligned memory.

Available with GMX_SIMD_HAVE_LOADU.

Parameters

m	Pointer to memory, no alignment requirement.

Returns: SIMD variable with data loaded.

#define gmx_simd4_loadu_r gmx_simd4_loadu_f

Load unaligned data to gmx_simd4_real_t.

Available with GMX_SIMD_HAVE_LOADU.

Parameters

m	Pointer to memory, no alignment requirement.

Returns: SIMD variable with data loaded.

#define gmx_simd4_max_d gmx_simd_max_d

Set each SIMD4 element to the largest from two variables.

You should typically call the real-precision gmx_simd_max_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: max(a,b) for each element.

#define gmx_simd4_max_f gmx_simd_max_f

Set each SIMD4 float element to the largest from two variables.

You should typically call the real-precision gmx_simd_max_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: max(a,b) for each element.

#define gmx_simd4_max_r gmx_simd4_max_f

Select maximum of each pair of elements from args for gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_max_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: max(a,b) for each element.

#define gmx_simd4_min_d gmx_simd_min_d

Set each SIMD4 element to the smallest from two variables.

You should typically call the real-precision gmx_simd_min_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: min(a,b) for each element.

#define gmx_simd4_min_f gmx_simd_min_f

Set each SIMD4 float element to the smallest from two variables.

You should typically call the real-precision gmx_simd_min_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: min(a,b) for each element.

#define gmx_simd4_min_r gmx_simd4_min_f

Select minimum of each pair of elements from args for gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_min_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: min(a,b) for each element.

#define gmx_simd4_mul_d gmx_simd_mul_d

Multiply two SIMD4 double values.

You should typically call the real-precision gmx_simd_mul_r.

Parameters

a	factor1
b	factor2

Returns: a*b.

#define gmx_simd4_mul_f gmx_simd_mul_f

Multiply two SIMD4 float variables.

You should typically call the real-precision gmx_simd_mul_r.

Parameters

a	factor1
b	factor2

Returns: a*b.

#define gmx_simd4_mul_r gmx_simd4_mul_f

a*b for gmx_simd4_real_t

You should typically call the real-precision gmx_simd_mul_r.

Parameters

a	factor1
b	factor2

Returns: a*b.

#define gmx_simd4_norm2_r gmx_simd4_norm2_f

SIMD4 real norm squared of multiple vectors.

This will call gmx_simd4_norm2_d if GMX_DOUBLE is defined, otherwise gmx_simd4_norm2_f.

For normal usage you should always call the real-precision gmx_simd_norm2_r.

Parameters

ax	X components of vectors
ay	Y components of vectors
az	Z components of vectors

Returns: Element i will be res[i] = ax[i]*ax[i]+ay[i]*ay[i]+az[i]*az[i].

Note: This corresponds to the scalar product of the vector with itself, but the compiler might be able to optimize it better with identical vectors.

#define gmx_simd4_or_b gmx_simd4_or_fb

Logical or for two gmx_simd4_bool_t.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a or b is true.

Note that this is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_or_ib

#define gmx_simd4_or_d gmx_simd_or_d

Bitwise or for SIMD4 double.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	data1
b	data2

Returns: data1 | data2

#define gmx_simd4_or_db gmx_simd_or_db

Logical OR on double SIMD4 booleans.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a or b is true.

Note that this is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_or_ib

#define gmx_simd4_or_f gmx_simd_or_f

Bitwise or for two SIMD4 float variables.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	data1
b	data2

Returns: data1 | data2

#define gmx_simd4_or_fb gmx_simd_or_fb

Logical OR on float SIMD4 booleans.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a or b is true.

Note that this is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_or_ib

#define gmx_simd4_or_r gmx_simd4_or_f

Bitwise or for two gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	data1
b	data2

Returns: data1 | data2

#define gmx_simd4_real_t gmx_simd4_float_t

SIMD real datatype guaranteed to be 4 elements wide, if available.

All the SIMD4 datatypes and operations behave like their counterparts for the generic SIMD implementation, but they might be implemented with different registers, or not supported at all. It is important that you check the define GMX_SIMD4_HAVE_REAL before using it.

Just as the normal SIMD operations, all SIMD4 types and routines will be aliased to either single or double precision ones based on whether GMX_DOUBLE is defined.

Note: There is no support for integer or math operations in SIMD4.

#define gmx_simd4_reduce_d gmx_simd_reduce_d

Return sum of all elements in SIMD4 double.

You should typically call the real-precision gmx_simd_reduce_r.

Parameters

a	SIMD variable to reduce/sum.

Returns: The sum of all elements in the argument variable.

#define gmx_simd4_reduce_f gmx_simd_reduce_f

Return sum of all elements in SIMD4 float.

You should typically call the real-precision gmx_simd_reduce_r.

Parameters

a	SIMD variable to reduce/sum.

Returns: The sum of all elements in the argument variable.

#define gmx_simd4_reduce_r gmx_simd4_reduce_f

Return sum of all elements in SIMD4 floating-point variable.

You should typically call the real-precision gmx_simd_reduce_r.

Parameters

a	SIMD variable to reduce/sum.

Returns: The sum of all elements in the argument variable.

#define gmx_simd4_round_d gmx_simd_round_d

Round SIMD4 double to nearest integer value (in floating-point format).

You should typically call the real-precision gmx_simd_round_r.

Parameters

a	Any floating-point value

Returns: The nearest integer, represented in floating-point format.

Note: The reference implementation rounds exact half-way cases away from zero, whereas most SIMD intrinsics will round to nearest even. This could be fixed by using rint/rintf, but the bigger problem is that MSVC does not support full C99, and none of the round or rint functions are defined. It's much easier to approximately implement round() than rint(), so we do that and hope we never get bitten in testing. (Thanks, Microsoft.)

#define gmx_simd4_round_f gmx_simd_round_f

Round to nearest integer value for SIMD4 float.

You should typically call the real-precision gmx_simd_round_r.

Parameters

a	Any floating-point value

Returns: The nearest integer, represented in floating-point format.

Note: The reference implementation rounds exact half-way cases away from zero, whereas most SIMD intrinsics will round to nearest even. This could be fixed by using rint/rintf, but the bigger problem is that MSVC does not support full C99, and none of the round or rint functions are defined. It's much easier to approximately implement round() than rint(), so we do that and hope we never get bitten in testing. (Thanks, Microsoft.)

#define gmx_simd4_round_r gmx_simd4_round_f

Round gmx_simd4_real_t to nearest integer, return gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_round_r.

Parameters

a	Any floating-point value

Returns: The nearest integer, represented in floating-point format.

Note: The reference implementation rounds exact half-way cases away from zero, whereas most SIMD intrinsics will round to nearest even. This could be fixed by using rint/rintf, but the bigger problem is that MSVC does not support full C99, and none of the round or rint functions are defined. It's much easier to approximately implement round() than rint(), so we do that and hope we never get bitten in testing. (Thanks, Microsoft.)

#define gmx_simd4_rsqrt_d gmx_simd_rsqrt_d

SIMD4 double 1.0/sqrt(x) lookup.

You should typically call the real-precision gmx_simd_rsqrt_r.

This is a low-level instruction that should only be called from routines implementing the inverse square root in simd_math.h.

Parameters

x	Argument, x>0

Returns: Approximation of 1/sqrt(x), accuracy is GMX_SIMD_RSQRT_BITS.

#define gmx_simd4_rsqrt_f gmx_simd_rsqrt_f

Lookup of approximate 1/sqrt(x) for SIMD4 float.

You should typically call the real-precision gmx_simd_rsqrt_r.

This is a low-level instruction that should only be called from routines implementing the inverse square root in simd_math.h.

Parameters

x	Argument, x>0

Returns: Approximation of 1/sqrt(x), accuracy is GMX_SIMD_RSQRT_BITS.

#define gmx_simd4_rsqrt_r gmx_simd4_rsqrt_f

1/sqrt(x) approximate lookup for gmx_simd4_real_t

You should typically call the real-precision gmx_simd_rsqrt_r.

This is a low-level instruction that should only be called from routines implementing the inverse square root in simd_math.h.

Parameters

x	Argument, x>0

Returns: Approximation of 1/sqrt(x), accuracy is GMX_SIMD_RSQRT_BITS.

#define gmx_simd4_set1_d gmx_simd_set1_d

Double precision SIMD4 set all elements from value.

Parameters

r	floating-point constant

Returns: SIMD variable with all elements set to r.

#define gmx_simd4_set1_f gmx_simd_set1_f

Set all SIMD4 float elements to the value r.

Parameters

r	floating-point constant

Returns: SIMD variable with all elements set to r.

#define gmx_simd4_set1_r gmx_simd4_set1_f

Set gmx_simd4_real_t from scalar value.

Parameters

r	floating-point constant

Returns: SIMD variable with all elements set to r.

#define gmx_simd4_setzero_d gmx_simd_setzero_d

Set all elements in SIMD4 double to 0.0.

Returns: The value 0.0 in all elements of a SIMD variable.

#define gmx_simd4_setzero_f gmx_simd_setzero_f

Set all SIMD4 float elements to 0.

Returns: The value 0.0 in all elements of a SIMD variable.

#define gmx_simd4_setzero_r gmx_simd4_setzero_f

Set all elements in gmx_simd4_real_t to 0.0.

Returns: The value 0.0 in all elements of a SIMD variable.

#define gmx_simd4_store_d gmx_simd_store_d

Double precision SIMD4 store to aligned memory.

Parameters

[out]	m	Pointer to memory, aligned to SIMD width.
	a	SIMD variable to store

#define gmx_simd4_store_f gmx_simd_store_f

Store the contents of SIMD4 float pr to aligned memory m.

Parameters

[out]	m	Pointer to memory, aligned to SIMD width.
	a	SIMD variable to store

#define gmx_simd4_store_r gmx_simd4_store_f

store aligned data from gmx_simd4_real_t

Parameters

[out]	m	Pointer to memory, aligned to SIMD width.
	a	SIMD variable to store

#define gmx_simd4_storeu_d gmx_simd_storeu_d

Store unaligned SIMD4 double.

Available with GMX_SIMD_HAVE_STOREU.

Parameters

[out]	m	Pointer to memory, no alignment requirement.
	a	SIMD variable to store.

#define gmx_simd4_storeu_f gmx_simd_storeu_f

Store SIMD4 float to unaligned memory.

Available with GMX_SIMD_HAVE_STOREU.

Parameters

[out]	m	Pointer to memory, no alignment requirement.
	a	SIMD variable to store.

#define gmx_simd4_storeu_r gmx_simd4_storeu_f

Store unaligned data from gmx_simd4_real_t.

Available with GMX_SIMD_HAVE_STOREU.

Parameters

[out]	m	Pointer to memory, no alignment requirement.
	a	SIMD variable to store.

#define gmx_simd4_sub_d gmx_simd_sub_d

Subtract two SIMD4 double values.

You should typically call the real-precision gmx_simd_sub_r.

Parameters

a	term1
b	term2

Returns: a-b

#define gmx_simd4_sub_f gmx_simd_sub_f

Subtract two SIMD4 float variables.

You should typically call the real-precision gmx_simd_sub_r.

Parameters

a	term1
b	term2

Returns: a-b

#define gmx_simd4_sub_r gmx_simd4_sub_f

a-b for gmx_simd4_real_t

You should typically call the real-precision gmx_simd_sub_r.

Parameters

a	term1
b	term2

Returns: a-b

#define gmx_simd4_sum4_r gmx_simd4_sum4_f

SIMD4 utility function to sum a+b+c+d for SIMD4 reals.

You should normally call the real-precision routine gmx_simd_sum4_r.

Parameters

a	term 1 (multiple values)
b	term 2 (multiple values)
c	term 3 (multiple values)
d	term 4 (multiple values)

Returns: sum of terms 1-4 (multiple values)

#define gmx_simd4_trunc_d gmx_simd_trunc_d

Truncate SIMD4 double, i.e. round towards zero.

You should typically call the real-precision gmx_simd_trunc_r.

Parameters

a	Any floating-point value

Returns: Integer rounded towards zero, represented in floating-point format.

Note: This is truncation towards zero, not floor(). The reason for this is that truncation is virtually always present as a dedicated hardware instruction, but floor() frequently isn't.

#define gmx_simd4_trunc_f gmx_simd_trunc_f

Round to largest integral value for SIMD4 float.

You should typically call the real-precision gmx_simd_trunc_r.

Parameters

a	Any floating-point value

Returns: Integer rounded towards zero, represented in floating-point format.

Note: This is truncation towards zero, not floor(). The reason for this is that truncation is virtually always present as a dedicated hardware instruction, but floor() frequently isn't.

#define gmx_simd4_trunc_r gmx_simd4_trunc_f

Truncate gmx_simd4_real_t towards zero, return gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_trunc_r.

Parameters

a	Any floating-point value

Returns: Integer rounded towards zero, represented in floating-point format.

Note: This is truncation towards zero, not floor(). The reason for this is that truncation is virtually always present as a dedicated hardware instruction, but floor() frequently isn't.

#define GMX_SIMD4_WIDTH 4

SIMD4 width is always 4, but use this for clarity in definitions.

It improves code readability to allocate e.g. 2*GMX_SIMD4_WIDTH instead of 8.

#define gmx_simd4_xor_d gmx_simd_xor_d

Bitwise xor for SIMD4 double.

You should typically call the real-precision gmx_simd_xor_r.

Parameters

a	data1
b	data2

Returns: data1 ^ data2

#define gmx_simd4_xor_f gmx_simd_xor_f

Bitwise xor for two SIMD4 float variables.

You should typically call the real-precision gmx_simd_xor_r.

Parameters

a	data1
b	data2

Returns: data1 ^ data2

#define gmx_simd4_xor_r gmx_simd4_xor_f

Bitwise xor for two gmx_simd4_real_t.

You should typically call the real-precision gmx_simd_xor_r.

Parameters

a	data1
b	data2

Returns: data1 ^ data2

#define gmx_simd_acos_r gmx_simd_acos_f

SIMD real acos(x).

You should normally call the real-precision routine gmx_simd_acos_r.

Parameters

x	The argument to evaluate acos for

Returns: Acos(x)

#define gmx_simd_acos_singleaccuracy_r gmx_simd_acos_f

SIMD real acos(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_acos_r.

Parameters

x	The argument to evaluate acos for

Returns: Acos(x)

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_add_i gmx_simd_add_fi

SIMD a+b for two gmx_simd_int32_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_add_di, otherwise gmx_simd_add_fi.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	term1
b	term2

Returns: a+b

#define gmx_simd_add_r gmx_simd_add_f

SIMD a+b for two gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_add_d, otherwise gmx_simd_add_f.

You should typically call the real-precision gmx_simd_add_r.

Parameters

a	term1
b	term2

Returns: a+b

#define gmx_simd_align_i gmx_simd_align_fi

Align integer memory for SIMD usage.

This routine will only align memory if GMX_SIMD_HAVE_INT32 is defined. Otherwise the original pointer will be returned.

Start by allocating an extra GMX_SIMD_INT32_WIDTH elements of memory, and then call this function. The returned pointer will be greater or equal to the one you provided, and point to an address inside your provided memory that is aligned to the SIMD width.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_align_di, otherwise gmx_simd_align_fi. For detailed documentation, see the precision-specific implementation routines.

#define gmx_simd_align_r gmx_simd_align_f

Align real memory for SIMD usage.

This routine will only align memory if GMX_SIMD_HAVE_REAL is defined. Otherwise the original pointer will be returned.

Start by allocating an extra GMX_SIMD_REAL_WIDTH float elements of memory, and then call this function. The returned pointer will be greater or equal to the one you provided, and point to an address inside your provided memory that is aligned to the SIMD width.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_align_d, otherwise gmx_simd_align_f. For detailed documentation, see the precision-specific implementation routines.

#define gmx_simd_and_b gmx_simd_and_fb

For each element, the result boolean is true if both arguments are true.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_and_db, otherwise gmx_simd_and_fb.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a & b are true.

Note: This is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_and_ib

#define gmx_simd_and_i gmx_simd_and_fi

Bitwise and on two gmx_simd_int32_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_and_di, otherwise gmx_simd_and_fi.

You should typically call the real-precision gmx_simd_and_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Note: You can not use this operation directly to select based on a boolean SIMD variable, since booleans are separate from integer SIMD. If that is what you need, have a look at gmx_simd_blendzero_i instead.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: a & b (bitwise and)

#define gmx_simd_and_ib gmx_simd_and_fib

For each element, the result boolean is true if both arguments are true.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_and_dib, otherwise gmx_simd_and_fib.

You should typically call the real-precision gmx_simd_and_ib.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD boolean 1
b	SIMD boolean 2

Returns: True for elements where both a and b are true.

#define gmx_simd_and_r gmx_simd_and_f

Bitwise and on two gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_and_d, otherwise gmx_simd_and_f.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	data1
b	data2

Returns: data1 & data2

#define gmx_simd_andnot_i gmx_simd_andnot_fi

Bitwise and-not on two gmx_simd_int32_t; 1st arg is complemented.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_andnot_di, otherwise gmx_simd_andnot_fi.

You should typically call the real-precision gmx_simd_andnot_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Note that you can NOT use this operation directly to select based on a boolean SIMD variable, since booleans are separate from integer SIMD. If that is what you need, have a look at gmx_simd_blendnotzero_i instead.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: (~a) & b (bitwise andnot)

#define gmx_simd_andnot_r gmx_simd_andnot_f

Bitwise and-not on two gmx_simd_real_t; 1st arg is complemented.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_andnot_d, otherwise gmx_simd_andnot_f.

You should typically call the real-precision gmx_simd_andnot_r.

Parameters

a	data1
b	data2

Returns: (~data1) & data2

#define gmx_simd_anytrue_b gmx_simd_anytrue_fb

Return nonzero if any element in gmx_simd_bool_t is true, otherwise 0.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_anytrue_db, otherwise gmx_simd_anytrue_fb.

You should typically call the real-precision gmx_simd_anytrue_b.

Parameters

a	Logical variable.

Returns: non-zero if any element in a is true, otherwise 0.

The actual return value for truth will depend on the architecture, so any non-zero value is considered truth.

#define gmx_simd_anytrue_ib gmx_simd_anytrue_fib

Return nonzero if any element in gmx_simd_ibool_t is true, otherwise 0.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_anytrue_dib, otherwise gmx_simd_anytrue_fib.

You should typically call the real-precision gmx_simd_anytrue_ib.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

The actual return value for "any true" will depend on the architecture. Any non-zero value should be considered truth.

Parameters

a	SIMD boolean

Returns: Nonzero integer if any of the elements in a is true, otherwise 0.

#define gmx_simd_asin_r gmx_simd_asin_f

SIMD real asin(x).

You should normally call the real-precision routine gmx_simd_asin_r.

Parameters

x	The argument to evaluate asin for

Returns: Asin(x)

#define gmx_simd_asin_singleaccuracy_r gmx_simd_asin_f

SIMD real asin(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_asin_r.

Parameters

x	The argument to evaluate asin for

Returns: Asin(x)

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_atan2_r gmx_simd_atan2_f

SIMD real atan2(y,x).

You should normally call the real-precision routine gmx_simd_atan2_r.

Parameters

y	Y component of vector, any quartile
x	X component of vector, any quartile

Returns: Atan(y,x), same argument/value range as standard math library.

Note: This routine should provide correct results for all finite non-zero or positive-zero arguments. However, negative zero arguments will be treated as positive zero, which means the return value will deviate from the standard math library atan2(y,x) for those cases. That should not be of any concern in Gromacs, and in particular it will not affect calculations of angles from vectors.

#define gmx_simd_atan2_singleaccuracy_r gmx_simd_atan2_f

SIMD real atan2(y,x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_atan2_r.

Parameters

y	Y component of vector, any quartile
x	X component of vector, any quartile

Returns: Atan(y,x), same argument/value range as standard math library.

Note: This routine should provide correct results for all finite non-zero or positive-zero arguments. However, negative zero arguments will be treated as positive zero, which means the return value will deviate from the standard math library atan2(y,x) for those cases. That should not be of any concern in Gromacs, and in particular it will not affect calculations of angles from vectors.; This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_atan_r gmx_simd_atan_f

SIMD real atan(x).

You should normally call the real-precision routine gmx_simd_atan_r.

Parameters

x	The argument to evaluate atan for

Returns: Atan(x), same argument/value range as standard math library.

#define gmx_simd_atan_singleaccuracy_r gmx_simd_atan_f

SIMD real atan(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_atan_r.

Parameters

x	The argument to evaluate atan for

Returns: Atan(x), same argument/value range as standard math library.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_blendnotzero_i gmx_simd_blendnotzero_fi

Selects elements from gmx_simd_int32_t where boolean is false, otherwise 0.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_blendnotzero_di, otherwise gmx_simd_blendnotzero_fi.

You should typically call the real-precision gmx_simd_blendnotzero_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer to select from
sel	Boolean selector

Returns: Elements from a where sel is false, 0 otherwise (sic).

#define gmx_simd_blendnotzero_r gmx_simd_blendnotzero_f

Selects elements from gmx_simd_real_t where boolean is false, otherwise 0.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_blendnotzero_d, otherwise gmx_simd_blendnotzero_f.

You should typically call the real-precision gmx_simd_blendnotzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for false, 0 for true (sic).

#define gmx_simd_blendv_i gmx_simd_blendv_fi

Selects from 2nd int SIMD arg where boolean is true, otherwise 1st arg.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_blendv_di, otherwise gmx_simd_blendv_fi.

You should typically call the real-precision gmx_simd_blendv_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	First source
b	Second source
sel	Boolean selector

Returns: For each element, select b if sel is true, a otherwise.

#define gmx_simd_blendv_r gmx_simd_blendv_f

Selects from 2nd real SIMD arg where boolean is true, otherwise 1st arg.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_blendv_d, otherwise gmx_simd_blendv_f.

You should typically call the real-precision gmx_simd_blendv_r.

Parameters

a	First source
b	Second source
sel	Boolean selector

Returns: For each element, select b if sel is true, a otherwise.

#define gmx_simd_blendzero_i gmx_simd_blendzero_fi

Selects elements from gmx_simd_int32_t where boolean is true, otherwise 0.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_blendzero_di, otherwise gmx_simd_blendzero_fi.

You should typically call the real-precision gmx_simd_blendzero_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer to select from
sel	Boolean selector

Returns: Elements from a where sel is true, 0 otherwise.

#define gmx_simd_blendzero_r gmx_simd_blendzero_f

Selects elements from gmx_simd_real_t where boolean is true, otherwise 0.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_blendzero_d, otherwise gmx_simd_blendzero_f.

You should typically call the real-precision gmx_simd_blendzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for true, 0 for false.

See Also: gmx_simd_blendzero_i

#define gmx_simd_bool_t gmx_simd_fbool_t

Boolean SIMD type for usage with gmx_simd_real_t.

This type is only available if GMX_SIMD_HAVE_REAL is defined.

If GMX_DOUBLE is defined, this will be set to gmx_simd_dbool_t internally, otherwise gmx_simd_fbool_t. This is necessary since some SIMD implementations use bitpatterns for marking truth, so single- vs. double precision booleans are not necessarily exchangable. As long as you just use this type you will not have to worry about precision.

See gmx_simd_ibool_t for an explanation of real vs. integer booleans.

#define gmx_simd_calc_rsq_d gmx_simd_norm2_d

Calculating r^2 is the same as evaluating the norm of dx*dx.

For details, see gmx_simd_norm2_d.

#define gmx_simd_calc_rsq_f gmx_simd_norm2_f

Calculating r^2 is the same as evaluating the norm of dx*dx.

For details, see gmx_simd_norm2_f.

#define gmx_simd_calc_rsq_r gmx_simd_calc_rsq_f

Calculating r^2 is the same as evaluating the norm of dx*dx.

This will call gmx_simd_calc_rsq_d if GMX_DOUBLE is defined, otherwise gmx_simd_calc_rsq_f.

For details, see gmx_simd_norm2_f.

#define gmx_simd_cmpeq_i gmx_simd_cmpeq_fi

Returns boolean describing whether a==b, for gmx_simd_int32_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cmpeq_di, otherwise gmx_simd_cmpeq_fi.

You should typically call the real-precision gmx_simd_cmpeq_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer1
b	SIMD integer2

Returns: SIMD integer boolean with true for elements where a==b

#define gmx_simd_cmpeq_r gmx_simd_cmpeq_f

SIMD a==b for gmx_simd_real_t. Returns a gmx_simd_bool_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cmpeq_d, otherwise gmx_simd_cmpeq_f.

You should typically call the real-precision gmx_simd_cmpeq_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a==b.

Beware that exact floating-point comparisons are difficult.

#define gmx_simd_cmple_r gmx_simd_cmple_f

SIMD a<=b for gmx_simd_real_t. Returns a gmx_simd_bool_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cmple_d, otherwise gmx_simd_cmple_f.

You should typically call the real-precision gmx_simd_cmple_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<=b.

#define gmx_simd_cmplt_i gmx_simd_cmplt_fi

Returns boolean describing whether a<b, for gmx_simd_int32_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cmplt_di, otherwise gmx_simd_cmplt_fi.

You should typically call the real-precision gmx_simd_cmplt_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer1
b	SIMD integer2

Returns: SIMD integer boolean with true for elements where a<b

#define gmx_simd_cmplt_r gmx_simd_cmplt_f

SIMD a<b for gmx_simd_real_t. Returns a gmx_simd_bool_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cmplt_d, otherwise gmx_simd_cmplt_f.

You should typically call the real-precision gmx_simd_cmplt_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<b.

#define gmx_simd_cos_r gmx_simd_cos_f

SIMD real cos(x).

You should normally call the real-precision routine gmx_simd_cos_r.

Parameters

x	The argument to evaluate cos for

Returns: Cos(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

#define gmx_simd_cos_singleaccuracy_r gmx_simd_cos_f

SIMD real cos(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_cos_r.

Parameters

x	The argument to evaluate cos for

Returns: Cos(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_cprod_r gmx_simd_cprod_f

SIMD real cross-product of multiple real vectors.

This will call gmx_simd_cprod_d if GMX_DOUBLE is defined, otherwise gmx_simd_cprod_f.

For normal usage you should always call the real-precision gmx_simd_cprod_r.

Parameters

	ax	X components of first vectors
	ay	Y components of first vectors
	az	Z components of first vectors
	bx	X components of second vectors
	by	Y components of second vectors
	bz	Z components of second vectors
[out]	cx	X components of cross product vectors
[out]	cy	Y components of cross product vectors
[out]	cz	Z components of cross product vectors

Returns: void

This calculates C = A x B, where the cross denotes the cross product. The arguments x/y/z denotes the different components, and each element corresponds to a separate vector.

#define gmx_simd_cvt_b2ib gmx_simd_cvt_fb2fib

Convert from gmx_simd_bool_t to gmx_simd_ibool_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cvt_db2dib, otherwise gmx_simd_cvt_fb2fib.

You should typically call the real-precision gmx_simd_cvt_b2ib.

Parameters

a	Boolean corresponding to SIMD floating-point

Returns: Boolean that can be applied to SIMD integer operations.

#define gmx_simd_cvt_i2r gmx_simd_cvt_i2f

Convert gmx_simd_int32_t to gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cvt_i2d, otherwise gmx_simd_cvt_i2f.

You should typically call the real-precision gmx_simd_cvt_i2r.

Parameters

a	SIMD integer

Returns: SIMD floating-pint

#define gmx_simd_cvt_ib2b gmx_simd_cvt_fib2fb

Convert from gmx_simd_ibool_t to gmx_simd_bool_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cvt_dib2db, otherwise gmx_simd_cvt_fib2fb.

You should typically call the real-precision gmx_simd_cvt_ib2b.

Parameters

a	Boolean corresponding to SIMD integer

Returns: Boolean that can be applied to SIMD floating-point.

#define gmx_simd_cvt_r2i gmx_simd_cvt_f2i

Convert gmx_simd_real_t to gmx_simd_int32_t, round to nearest integer.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cvt_d2i, otherwise gmx_simd_cvt_f2i.

You should typically call the real-precision gmx_simd_cvt_r2i.

Parameters

a	SIMD floating-point

Returns: SIMD integer, rounded to nearest integer.

#define gmx_simd_cvtt_r2i gmx_simd_cvtt_f2i

Convert gmx_simd_real_t to gmx_simd_int32_t, truncate towards zero.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_cvtt_d2i, otherwise gmx_simd_cvtt_f2i.

You should typically call the real-precision gmx_simd_cvtt_r2i.

Parameters

a	SIMD floating-point

Returns: SIMD integer, truncated towards zero.

#define gmx_simd_erf_r gmx_simd_erf_f

SIMD real erf(x).

You should normally call the real-precision routine gmx_simd_erf_r.

Parameters

x	The value to calculate erf(x) for.

Returns: erf(x)

This routine achieves very close to full precision, but we do not care about the last bit or the subnormal result range.

#define gmx_simd_erf_singleaccuracy_r gmx_simd_erf_f

SIMD real erf(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_erf_r.

Parameters

x	The value to calculate erf(x) for.

Returns: erf(x)

This routine achieves very close to full precision, but we do not care about the last bit or the subnormal result range.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_erfc_r gmx_simd_erfc_f

SIMD real erfc(x).

You should normally call the real-precision routine gmx_simd_erfc_r.

Parameters

x	The value to calculate erfc(x) for.

Returns: erfc(x)

This routine achieves full precision (bar the last bit) over most of the input range, but for large arguments where the result is getting close to the minimum representable numbers we accept slightly larger errors (think results that are in the ballpark of 10^-30 for single precision, or 10^-200 for double) since that is not relevant for MD.

#define gmx_simd_erfc_singleaccuracy_r gmx_simd_erfc_f

SIMD real erfc(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_erfc_r.

Parameters

x	The value to calculate erfc(x) for.

Returns: erfc(x)

This routine achieves full precision (bar the last bit) over most of the input range, but for large arguments where the result is getting close to the minimum representable numbers we accept slightly larger errors (think results that are in the ballpark of 10^-30 for single precision, or 10^-200 for double) since that is not relevant for MD.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_exp2_r gmx_simd_exp2_f

SIMD real 2^x.

You should normally call the real-precision routine gmx_simd_exp2_r.

Parameters

x Argument.

Returns: 2^x. Undefined if input argument caused overflow.

#define gmx_simd_exp2_singleaccuracy_r gmx_simd_exp2_f

SIMD real 2^x, only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_exp2_r.

Parameters

x Argument.

Returns: 2^x. Undefined if input argument caused overflow.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_exp_r gmx_simd_exp_f

SIMD real e^x.

You should normally call the real-precision routine gmx_simd_exp_r.

In addition to scaling the argument for 2^x this routine correctly does extended precision arithmetics to improve accuracy.

Parameters

x Argument.

Returns: exp(x). Undefined if input argument caused overflow, which can happen if abs(x) > 7e13.

#define gmx_simd_exp_singleaccuracy_r gmx_simd_exp_f

SIMD real e^x, only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_exp_r.

In addition to scaling the argument for 2^x this routine correctly does extended precision arithmetics to improve accuracy.

Parameters

x Argument.

Returns: exp(x). Undefined if input argument caused overflow, which can happen if abs(x) > 7e13.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_extract_i gmx_simd_extract_fi

Extract single integer from gmx_simd_int32_t element.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_extract_di , otherwise gmx_simd_extract_fi .

You should typically call the real-precision gmx_simd_extract_i.

Available with GMX_SIMD_HAVE_FINT32_EXTRACT.

Parameters

a	SIMD variable
index	Position to extract integer from

Returns: Single integer from position index in SIMD variable.

#define gmx_simd_fabs_r gmx_simd_fabs_f

SIMD fabs(x) for gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_fabs_d, otherwise gmx_simd_fabs_f.

You should typically call the real-precision gmx_simd_fabs_r.

Parameters

a	any floating point values

Returns: fabs(a) for each element.

#define gmx_simd_fmadd_d	(	a,
		b,
		c
	)	gmx_simd_add_d(gmx_simd_mul_d(a, b), c)

Fused-multiply-add. Result is a*b+c.

You should typically call the real-precision gmx_simd_fmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fmadd_f	(	a,
		b,
		c
	)	gmx_simd_add_f(gmx_simd_mul_f(a, b), c)

Fused-multiply-add. Result is a*b+c.

You should typically call the real-precision gmx_simd_fmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fmadd_r gmx_simd_fmadd_f

SIMD a*b+c for three gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_fmadd_d, otherwise gmx_simd_fmadd_f.

You should typically call the real-precision gmx_simd_fmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fmsub_d	(	a,
		b,
		c
	)	gmx_simd_sub_d(gmx_simd_mul_d(a, b), c)

Fused-multiply-subtract. Result is a*b-c.

You should typically call the real-precision gmx_simd_fmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fmsub_f	(	a,
		b,
		c
	)	gmx_simd_sub_f(gmx_simd_mul_f(a, b), c)

Fused-multiply-subtract. Result is a*b-c.

You should typically call the real-precision gmx_simd_fmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fmsub_r gmx_simd_fmsub_f

SIMD a*b-c for three gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_fmsub_d, otherwise gmx_simd_fmsub_f.

You should typically call the real-precision gmx_simd_fmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fneg_r gmx_simd_fneg_f

SIMD -x for gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_fneg_d, otherwise gmx_simd_fneg_f.

You should typically call the real-precision gmx_simd_fneg_r.

Parameters

a	Any floating-point value

Returns: -a

#define gmx_simd_fnmadd_d	(	a,
		b,
		c
	)	gmx_simd_sub_d(c, gmx_simd_mul_d(a, b))

Fused-negated-multiply-add. Result is -a*b+c.

You should typically call the real-precision gmx_simd_fnmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fnmadd_f	(	a,
		b,
		c
	)	gmx_simd_sub_f(c, gmx_simd_mul_f(a, b))

Fused-negated-multiply-add. Result is -a*b+c.

You should typically call the real-precision gmx_simd_fnmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fnmadd_r gmx_simd_fnmadd_f

SIMD -a*b+c for three gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_fnmadd_d, otherwise gmx_simd_fnmadd_f.

You should typically call the real-precision gmx_simd_fnmadd_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b+c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fnmsub_d	(	a,
		b,
		c
	)	gmx_simd_sub_d(gmx_simd_setzero_d(), gmx_simd_fmadd_d(a, b, c))

Fused-negated-multiply-add. Result is -a*b-c.

You should typically call the real-precision gmx_simd_fnmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fnmsub_f	(	a,
		b,
		c
	)	gmx_simd_sub_f(gmx_simd_setzero_f(), gmx_simd_fmadd_f(a, b, c))

Fused-negated-multiply-sub. Result is -a*b-c.

You should typically call the real-precision gmx_simd_fnmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fnmsub_r gmx_simd_fnmsub_f

SIMD -a*b-c for three gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_fnmsub_d, otherwise gmx_simd_fnmsub_f.

You should typically call the real-precision gmx_simd_fnmsub_r.

If GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.

Parameters

a	value
b	value
c	value

Returns: -a*b-c

For some implementations you save an instruction if you assign the result to c.

#define gmx_simd_fraction_r gmx_simd_fraction_f

SIMD Fraction, i.e. x-trunc(x) for gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_fraction_d, otherwise gmx_simd_fraction_f.

You should typically call the real-precision gmx_simd_fraction_r.

Parameters

a	Any floating-point value

Returns: a-trunc(r)

To maximize compatibility, we use the same definition of fractions as used e.g. for the AMD64 hardware instructions. This relies on truncation towards zero for the integer part, and the remaining fraction can thus be either positive or negative. As an example, -1.42 would return the fraction -0.42.

Hardware support with GMX_SIMD_HAVE_FRACTION, otherwise emulated.

#define gmx_simd_get_exponent_r gmx_simd_get_exponent_f

Return the FP exponent of a SIMD gmx_simd_real_t as a gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_get_exponent_d, otherwise gmx_simd_get_exponent_f.

#define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_f

Return the FP mantissa of a SIMD gmx_simd_real_t as a gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_get_mantissa_d, otherwise gmx_simd_get_mantissa_f.

#define GMX_SIMD_HAVE_DINT32

Defined if the SIMD implementation has gmx_simd_dint32_t.

Note: The Gromacs SIMD module works entirely with 32 bit integers, both in single and double precision, since some platforms do not support 64 bit SIMD integers at all. In particular, this means it is up to each implementation to get this working even if the architectures internal representation uses 64 bit integers when converting to/from double SIMD variables. For now we will try HARD to use conversions, packing or shuffling so the integer datatype has the same width as the floating-point type, i.e. if you use double precision SIMD with a width of 8, we want the integers we work with to also use a SIMD width of 8 to make it easy to load/store indices from arrays. This refers entirely to the function calls and how many integers we load/store in one call; the actual SIMD registers might be wider for integers internally (e.g. on x86 gmx_simd_dint32_t will only fill half the register), but this is none of the user's business. While this works for all current architectures, and we think it will work for future ones, we might have to alter this decision in the future. To avoid rewriting every single instance that refers to the SIMD width we still provide separate defines for the width of SIMD integer variables that you should use.

#define GMX_SIMD_HAVE_FLOAT

Defined when SIMD float support is present.

You should only use this to specifically check for single precision SIMD, support, even when the rest of Gromacs uses double precision.

See Also: GMX_SIMD_HAVE_REAL, GMX_SIMD_HAVE_DOUBLE

#define GMX_SIMD_HAVE_INT32

Defined if gmx_simd_int32_t is available.

if GMX_DOUBLE is defined, this will be aliased to GMX_SIMD_HAVE_DINT32, otherwise GMX_SIMD_HAVE_FINT32.

#define GMX_SIMD_HAVE_INT32_ARITHMETICS

Defined if arithmetic ops are supported on gmx_simd_int32_t.

if GMX_DOUBLE is defined, this will be aliased to GMX_SIMD_HAVE_DINT32_ARITHMETICS, otherwise GMX_SIMD_HAVE_FINT32_ARITHMETICS.

#define GMX_SIMD_HAVE_INT32_EXTRACT

Defined if gmx_simd_extract_i() is available.

if GMX_DOUBLE is defined, this will be aliased to GMX_SIMD_HAVE_DINT32_EXTRACT, otherwise GMX_SIMD_HAVE_FINT32_EXTRACT.

#define GMX_SIMD_HAVE_INT32_LOGICAL

Defined if logical ops are supported on gmx_simd_int32_t.

if GMX_DOUBLE is defined, this will be aliased to GMX_SIMD_HAVE_DINT32_LOGICAL, otherwise GMX_SIMD_HAVE_FINT32_LOGICAL.

#define GMX_SIMD_HAVE_REAL

Defined if gmx_simd_real_t is available.

if GMX_DOUBLE is defined, this will be aliased to GMX_SIMD_HAVE_DOUBLE, otherwise GMX_SIMD_HAVE_FLOAT.

#define gmx_simd_ibool_t gmx_simd_fibool_t

Boolean SIMD type for usage with gmx_simd_int32_t.

This type is only available if GMX_SIMD_HAVE_INT32 is defined.

If GMX_DOUBLE is defined, this will be set to gmx_simd_dibool_t internally, otherwise gmx_simd_fibool_t. This is necessary since some SIMD implementations use bitpatterns for marking truth, so single- vs. double precision booleans are not necessarily exchangable, and while a double-precision boolean might be represented with a 64-bit mask, the corresponding integer might only use a 32-bit mask.

We provide conversion routines for these cases, so the only thing you need to keep in mind is to use gmx_simd_bool_t when working with gmx_simd_real_t while you pick gmx_simd_ibool_t when working with gmx_simd_int32_t.

To convert between them, use gmx_simd_cvt_b2ib and gmx_simd_cvt_ib2b.

#define gmx_simd_int32_t gmx_simd_fint32_t

32-bit integer SIMD type.

This type is only available if GMX_SIMD_HAVE_INT32 is defined.

If GMX_DOUBLE is defined, this will be set to gmx_simd_dint32_t internally, otherwise gmx_simd_fint32_t. This might seem a strange implementation detail, but it is because some SIMD implementations use different types/widths of integers registers when converting from double vs. single precision floating point. As long as you just use this type you will not have to worry about precision.

#define GMX_SIMD_INT32_WIDTH GMX_SIMD_FINT32_WIDTH

Width of gmx_simd_int32_t.

if GMX_DOUBLE is defined, this will be aliased to GMX_SIMD_DINT32_WIDTH, otherwise GMX_SIMD_FINT32_WIDTH.

#define gmx_simd_inv_r gmx_simd_inv_f

Calculate 1/x for SIMD real.

You should normally call the real-precision routine gmx_simd_inv_r.

Parameters

x	Argument that must be nonzero. This routine does not check arguments.

Returns: 1/x. Result is undefined if your argument was invalid.

#define gmx_simd_inv_singleaccuracy_r gmx_simd_inv_f

Calculate 1/x for SIMD real, only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_inv_r.

Parameters

x	Argument that must be nonzero. This routine does not check arguments.

Returns: 1/x. Result is undefined if your argument was invalid.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_invsqrt_pair_r gmx_simd_invsqrt_pair_f

Calculate 1/sqrt(x) for two SIMD reals.

You should normally call the real-precision routine gmx_simd_invsqrt_pair_r.

Parameters

	x0	First set of arguments, x0 must be positive - no argument checking.
	x1	Second set of arguments, x1 must be positive - no argument checking.
[out]	out0	Result 1/sqrt(x0)
[out]	out1	Result 1/sqrt(x1)

In particular for double precision we can sometimes calculate square root pairs slightly faster by using single precision until the very last step.

#define gmx_simd_invsqrt_pair_singleaccuracy_r gmx_simd_invsqrt_pair_f

Calculate 1/sqrt(x) for SIMD pair, only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_invsqrt_pair_r.

Parameters

	x0	First set of arguments, x0 must be positive - no argument checking.
	x1	Second set of arguments, x1 must be positive - no argument checking.
[out]	out0	Result 1/sqrt(x0)
[out]	out1	Result 1/sqrt(x1)

In particular for double precision we can sometimes calculate square root pairs slightly faster by using single precision until the very last step.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_invsqrt_r gmx_simd_invsqrt_f

Calculate 1/sqrt(x) for SIMD real.

You should normally call the real-precision routine gmx_simd_invsqrt_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

#define gmx_simd_invsqrt_singleaccuracy_r gmx_simd_invsqrt_f

Calculate 1/sqrt(x) for SIMD, only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_invsqrt_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_iprod_r gmx_simd_iprod_f

SIMD real inner product of multiple real vectors.

This will call gmx_simd_iprod_d if GMX_DOUBLE is defined, otherwise gmx_simd_iprod_f.

For normal usage you should always call the real-precision gmx_simd_iprod_r.

Parameters

ax	X components of first vectors
ay	Y components of first vectors
az	Z components of first vectors
bx	X components of second vectors
by	Y components of second vectors
bz	Z components of second vectors

Returns: Element i will be res[i] = ax[i]*bx[i]+ay[i]*by[i]+az[i]*bz[i].

Note: The SIMD part is that we calculate many scalar products in one call.

#define gmx_simd_load1_r gmx_simd_load1_f

Set all elements in gmx_simd_real_t from single value in memory.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_load1_d, otherwise gmx_simd_load1_f.

Parameters

m	Pointer to single value in memory.

Returns: SIMD variable with all elements set to *m.

#define gmx_simd_load_i gmx_simd_load_fi

Load GMX_SIMD_INT32_WIDTH values from aligned memory to gmx_simd_int32_t .

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_load_di , otherwise gmx_simd_load_fi .

You should typically call the real-precision gmx_simd_load_i.

Parameters

m	Pointer to memory, aligned to integer SIMD width.

Returns: SIMD integer variable.

#define gmx_simd_load_r gmx_simd_load_f

Load GMX_SIMD_REAL_WIDTH values from aligned memory to gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_load_d, otherwise gmx_simd_load_f.

Parameters

m	Pointer to memory aligned to the SIMD width.

Returns: SIMD variable with data loaded.

#define gmx_simd_loadu_d gmx_simd_load_d

Load SIMD double from unaligned memory.

Available with GMX_SIMD_HAVE_LOADU.

Parameters

m	Pointer to memory, no alignment requirement.

Returns: SIMD variable with data loaded.

#define gmx_simd_loadu_di gmx_simd_load_di

Load unaligned integer SIMD data, width corresponds to gmx_simd_double_t.

You should typically call the real-precision gmx_simd_loadu_i.

Supported with GMX_SIMD_HAVE_LOADU.

Parameters

m	Pointer to memory, no alignment requirements.

Returns: SIMD integer variable.

#define gmx_simd_loadu_f gmx_simd_load_f

Load SIMD float from unaligned memory.

Available with GMX_SIMD_HAVE_LOADU.

Parameters

m	Pointer to memory, no alignment requirement.

Returns: SIMD variable with data loaded.

#define gmx_simd_loadu_fi gmx_simd_load_fi

Load unaligned integer SIMD data, width corresponds to gmx_simd_float_t.

You should typically call the real-precision gmx_simd_loadu_i.

Supported with GMX_SIMD_HAVE_LOADU.

Parameters

m	Pointer to memory, no alignment requirements.

Returns: SIMD integer variable.

#define gmx_simd_loadu_i gmx_simd_loadu_fi

Load GMX_SIMD_REAL_WIDTH values from unaligned memory to gmx_simd_int32_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_loadu_di , otherwise gmx_simd_loadu_fi .

You should typically call the real-precision gmx_simd_loadu_i.

Supported with GMX_SIMD_HAVE_LOADU.

Parameters

m	Pointer to memory, no alignment requirements.

Returns: SIMD integer variable.

#define gmx_simd_loadu_r gmx_simd_loadu_f

Load GMX_SIMD_REAL_WIDTH values from unaligned memory to gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_loadu_d, otherwise gmx_simd_loadu_f.

Available with GMX_SIMD_HAVE_LOADU.

Parameters

m	Pointer to memory, no alignment requirement.

Returns: SIMD variable with data loaded.

#define gmx_simd_log_r gmx_simd_log_f

SIMD real log(x). This is the natural logarithm.

You should normally call the real-precision routine gmx_simd_log_r.

Parameters

x	Argument, should be >0.

Returns: The natural logarithm of x. Undefined if argument is invalid.

#define gmx_simd_log_singleaccuracy_r gmx_simd_log_f

SIMD real log(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_log_r.

Parameters

x	Argument, should be >0.

Returns: The natural logarithm of x. Undefined if argument is invalid.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_max_r gmx_simd_max_f

SIMD max(a,b) for each element in gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_max_d, otherwise gmx_simd_max_f.

You should typically call the real-precision gmx_simd_max_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: max(a,b) for each element.

#define gmx_simd_min_r gmx_simd_min_f

SIMD min(a,b) for each element in gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_min_d, otherwise gmx_simd_min_f.

You should typically call the real-precision gmx_simd_min_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: min(a,b) for each element.

#define gmx_simd_mul_i gmx_simd_mul_fi

SIMD a*b for two gmx_simd_int32_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_mul_di, otherwise gmx_simd_mul_fi.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	factor1
b	factor2

Returns: a*b.

Note: Only the low 32 bits are retained, so this can overflow.

#define gmx_simd_mul_r gmx_simd_mul_f

SIMD a*b for two gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_mul_d, otherwise gmx_simd_mul_f.

You should typically call the real-precision gmx_simd_mul_r.

Parameters

a	factor1
b	factor2

Returns: a*b.

#define gmx_simd_norm2_r gmx_simd_norm2_f

SIMD real norm squared of multiple real vectors.

This will call gmx_simd_norm2_d if GMX_DOUBLE is defined, otherwise gmx_simd_norm2_f.

For normal usage you should always call the real-precision gmx_simd_norm2_r.

Parameters

ax	X components of vectors
ay	Y components of vectors
az	Z components of vectors

Returns: Element i will be res[i] = ax[i]*ax[i]+ay[i]*ay[i]+az[i]*az[i].

Note: This corresponds to the scalar product of the vector with itself, but the compiler might be able to optimize it better with identical vectors.

#define gmx_simd_or_b gmx_simd_or_fb

For each element, the result boolean is true if either argument is true.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_or_db, otherwise gmx_simd_or_fb.

#define gmx_simd_or_i gmx_simd_or_fi

Bitwise or on two gmx_simd_int32_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_or_di, otherwise gmx_simd_or_fi.

You should typically call the real-precision gmx_simd_or_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: a | b (bitwise or)

#define gmx_simd_or_ib gmx_simd_or_fib

For each element, the result boolean is true if either argument is true.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_or_dib, otherwise gmx_simd_or_fib.

You should typically call the real-precision gmx_simd_or_ib.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD boolean 1
b	SIMD boolean 2

Returns: True for elements where both a and b are true.

#define gmx_simd_or_r gmx_simd_or_f

Bitwise or on two gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_or_d, otherwise gmx_simd_or_f.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	data1
b	data2

Returns: data1 | data2

#define gmx_simd_pmecorrF_r gmx_simd_pmecorrF_f

SIMD Analytic PME force correction.

You should normally call the real-precision routine gmx_simd_pmecorrF_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb force - see below for details.

This routine is meant to enable analytical evaluation of the direct-space PME electrostatic force to avoid tables.

The direct-space potential should be $\mbox{erfc}(\beta r)/r$ , but there are some problems evaluating that:

First, the error function is difficult (read: expensive) to approxmiate accurately for intermediate to large arguments, and this happens already in ranges of $(\beta r)$ that occur in simulations. Second, we now try to avoid calculating potentials in Gromacs but use forces directly.

We can simply things slight by noting that the PME part is really a correction to the normal Coulomb force since $\mbox{erfc}(z)=1-\mbox{erf}(z)$ , i.e.

$V = \frac{1}{r} - \frac{\mbox{erf}(\beta r)}{r}$

The first term we already have from the inverse square root, so that we can leave out of this routine.

For pme tolerances of 1e-3 to 1e-8 and cutoffs of 0.5nm to 1.8nm, the argument $beta r$ will be in the range 0.15 to ~4, which is the range used for the minimax fit. Use your favorite plotting program to realize how well-behaved $\frac{\mbox{erf}(z)}{z}$ is in this range!

We approximate $f(z)=\mbox{erf}(z)/z$ with a rational minimax polynomial. However, it turns out it is more efficient to approximate $f(z)/z$ and then only use even powers. This is another minor optimization, since we actually want , because it is going to be multiplied by the vector between the two atoms to get the vectorial force. The fastest flops are the ones we can avoid calculating!

So, here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=(\beta r)^2$ .
Evaluate this routine with as the argument.
The return value is the expression:

$\frac{2 \exp{-z^2}}{\sqrt{\pi} z^2}-\frac{\mbox{erf}(z)}{z^3}$
Multiply the entire expression by $\beta^3$ . This will get you

$\frac{2 \beta^3 \exp(-z^2)}{\sqrt{\pi} z^2} - \frac{\beta^3 \mbox{erf}(z)}{z^3}$

or, switching back to (since $z=r \beta$ ):

$\frac{2 \beta \exp(-r^2 \beta^2)}{\sqrt{\pi} r^2} - \frac{\mbox{erf}(r \beta)}{r^3}$

With a bit of math exercise you should be able to confirm that this is exactly

$\frac{\frac{d}{dr}\left( \frac{\mbox{erf}(\beta r)}{r} \right)}{r}$
Add the result to $r^{-3}$ , multiply by the product of the charges, and you have your force (divided by ). A final multiplication with the vector connecting the two particles and you have your vectorial force to add to the particles.

This approximation achieves an error slightly lower than 1e-6 in single precision and 1e-11 in double precision for arguments smaller than 16 ( $\beta r \leq 4$ ); when added to $1/r$ the error will be insignificant. For $\beta r \geq 7206$ the return value can be inf or NaN.

#define gmx_simd_pmecorrF_singleaccuracy_r gmx_simd_pmecorrF_f

SIMD Analytic PME force corr., only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_pmecorrF_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb force - see below for details.

This routine is meant to enable analytical evaluation of the direct-space PME electrostatic force to avoid tables.

The direct-space potential should be $\mbox{erfc}(\beta r)/r$ , but there are some problems evaluating that:

First, the error function is difficult (read: expensive) to approxmiate accurately for intermediate to large arguments, and this happens already in ranges of $(\beta r)$ that occur in simulations. Second, we now try to avoid calculating potentials in Gromacs but use forces directly.

We can simply things slight by noting that the PME part is really a correction to the normal Coulomb force since $\mbox{erfc}(z)=1-\mbox{erf}(z)$ , i.e.

$V = \frac{1}{r} - \frac{\mbox{erf}(\beta r)}{r}$

The first term we already have from the inverse square root, so that we can leave out of this routine.

For pme tolerances of 1e-3 to 1e-8 and cutoffs of 0.5nm to 1.8nm, the argument $beta r$ will be in the range 0.15 to ~4, which is the range used for the minimax fit. Use your favorite plotting program to realize how well-behaved $\frac{\mbox{erf}(z)}{z}$ is in this range!

We approximate $f(z)=\mbox{erf}(z)/z$ with a rational minimax polynomial. However, it turns out it is more efficient to approximate $f(z)/z$ and then only use even powers. This is another minor optimization, since we actually want , because it is going to be multiplied by the vector between the two atoms to get the vectorial force. The fastest flops are the ones we can avoid calculating!

So, here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=(\beta r)^2$ .
Evaluate this routine with as the argument.
The return value is the expression:

$\frac{2 \exp{-z^2}}{\sqrt{\pi} z^2}-\frac{\mbox{erf}(z)}{z^3}$
Multiply the entire expression by $\beta^3$ . This will get you

$\frac{2 \beta^3 \exp(-z^2)}{\sqrt{\pi} z^2} - \frac{\beta^3 \mbox{erf}(z)}{z^3}$

or, switching back to (since $z=r \beta$ ):

$\frac{2 \beta \exp(-r^2 \beta^2)}{\sqrt{\pi} r^2} - \frac{\mbox{erf}(r \beta)}{r^3}$

With a bit of math exercise you should be able to confirm that this is exactly

$\frac{\frac{d}{dr}\left( \frac{\mbox{erf}(\beta r)}{r} \right)}{r}$
Add the result to $r^{-3}$ , multiply by the product of the charges, and you have your force (divided by ). A final multiplication with the vector connecting the two particles and you have your vectorial force to add to the particles.

This approximation achieves an error slightly lower than 1e-6 in single precision and 1e-11 in double precision for arguments smaller than 16 ( $\beta r \leq 4$ ); when added to $1/r$ the error will be insignificant. For $\beta r \geq 7206$ the return value can be inf or NaN.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_pmecorrV_r gmx_simd_pmecorrV_f

SIMD Analytic PME potential correction.

You should normally call the real-precision routine gmx_simd_pmecorrV_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb potential - see below for details.

See gmx_simd_pmecorrF_f for details about the approximation.

This routine calculates $\mbox{erf}(z)/z$ , although you should provide $z^2$ as the input argument.

Here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=\beta^2*r^2$ .
Evaluate this routine with z^2 as the argument.
The return value is the expression:

$\frac{\mbox{erf}(z)}{z}$
Multiply the entire expression by beta and switching back to (since $z=r \beta$ ):

$\frac{\mbox{erf}(r \beta)}{r}$
Subtract the result from , multiply by the product of the charges, and you have your potential.

This approximation achieves an error slightly lower than 1e-6 in single precision and 4e-11 in double precision for arguments smaller than 16 ( $0.15 \leq \beta r \leq 4$ ); for $\beta r \leq 0.15$ the error can be twice as high; when added to $1/r$ the error will be insignificant. For $\beta r \geq 7142$ the return value can be inf or NaN.

#define gmx_simd_pmecorrV_singleaccuracy_r gmx_simd_pmecorrV_f

SIMD Analytic PME potential corr., only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_pmecorrV_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb potential - see below for details.

See gmx_simd_pmecorrF_f for details about the approximation.

This routine calculates $\mbox{erf}(z)/z$ , although you should provide $z^2$ as the input argument.

Here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=\beta^2*r^2$ .
Evaluate this routine with z^2 as the argument.
The return value is the expression:

$\frac{\mbox{erf}(z)}{z}$
Multiply the entire expression by beta and switching back to (since $z=r \beta$ ):

$\frac{\mbox{erf}(r \beta)}{r}$
Subtract the result from , multiply by the product of the charges, and you have your potential.

This approximation achieves an error slightly lower than 1e-6 in single precision and 4e-11 in double precision for arguments smaller than 16 ( $0.15 \leq \beta r \leq 4$ ); for $\beta r \leq 0.15$ the error can be twice as high; when added to $1/r$ the error will be insignificant. For $\beta r \geq 7142$ the return value can be inf or NaN.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_rcp_r gmx_simd_rcp_f

SIMD table lookup for 1/x approximation.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_rcp_d, otherwise gmx_simd_rcp_f.

You should typically call the real-precision gmx_simd_rcp_r.

This is a low-level instruction that should only be called from routines implementing the reciprocal in simd_math.h.

Parameters

x	Argument, x!=0

Returns: Approximation of 1/x, accuracy is GMX_SIMD_RCP_BITS.

#define gmx_simd_real_t gmx_simd_float_t

Real precision floating-point SIMD datatype.

This type is only available if GMX_SIMD_HAVE_REAL is defined.

If GMX_DOUBLE is defined, this will be set to gmx_simd_double_t internally, otherwise gmx_simd_float_t.

#define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH

Width of gmx_simd_real_t.

if GMX_DOUBLE is defined, this will be aliased to GMX_SIMD_DOUBLE_WIDTH, otherwise GMX_SIMD_FLOAT_WIDTH.

#define gmx_simd_reduce_r gmx_simd_reduce_f

Return sum of all elements in SIMD floating-point variable.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_reduce_d, otherwise gmx_simd_reduce_f.

You should typically call the real-precision gmx_simd_reduce_r.

Parameters

a	SIMD variable to reduce/sum.

Returns: The sum of all elements in the argument variable.

#define gmx_simd_round_r gmx_simd_round_f

Round gmx_simd_real_t to nearest int, return gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_round_d, otherwise gmx_simd_round_f.

You should typically call the real-precision gmx_simd_round_r.

Parameters

a	Any floating-point value

Returns: The nearest integer, represented in floating-point format.

Note: The reference implementation rounds exact half-way cases away from zero, whereas most SIMD intrinsics will round to nearest even. This could be fixed by using rint/rintf, but the bigger problem is that MSVC does not support full C99, and none of the round or rint functions are defined. It's much easier to approximately implement round() than rint(), so we do that and hope we never get bitten in testing. (Thanks, Microsoft.)

#define gmx_simd_rsqrt_r gmx_simd_rsqrt_f

SIMD table lookup for 1/sqrt(x) approximation.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_rsqrt_d, otherwise gmx_simd_rsqrt_f.

You should typically call the real-precision gmx_simd_rsqrt_r.

This is a low-level instruction that should only be called from routines implementing the inverse square root in simd_math.h.

Parameters

x	Argument, x>0

Returns: Approximation of 1/sqrt(x), accuracy is GMX_SIMD_RSQRT_BITS.

#define gmx_simd_set1_i gmx_simd_set1_fi

Set all elements in gmx_simd_int32_t from a single integer.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_set1_di , otherwise gmx_simd_set1_fi .

You should typically call the real-precision gmx_simd_set1_i.

Parameters

b	integer value to set variable to.

Returns: SIMD variable with all elements set to b.

#define gmx_simd_set1_r gmx_simd_set1_f

Set all elements in gmx_simd_real_t from a scalar.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_set1_d, otherwise gmx_simd_set1_f.

Parameters

r	floating-point constant

Returns: SIMD variable with all elements set to r.

#define gmx_simd_set_exponent_r gmx_simd_set_exponent_f

Set the exponent of a SIMD gmx_simd_real_t from a gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_set_exponent_d, otherwise gmx_simd_set_exponent_f.

You should typically call the real-precision gmx_simd_set_exponent_r.

Parameters

a	A floating point value that will not overflow as 2^a.

Returns: 2^(round(a)).

The input is rounded to the nearest integer, the exponent bias is added to this integer, and the bits are shifted to the IEEE754 exponent part of the number.

Note: The argument will be rounded to nearest integer since that is what we need for the exponential functions, and this integer x will be set as the exponent so the new floating-point number will be 2^x.

#define gmx_simd_setzero_i gmx_simd_setzero_fi

Set all elements in gmx_simd_int32_t to 0.

If GMX_DOUBLE is defined, it will be aliased to gmx_simd_setzero_di , otherwise gmx_simd_setzero_fi .

You should typically call the real-precision gmx_simd_setzero_i.

Returns: SIMD integer variable with all bits set to zero.

#define gmx_simd_setzero_r gmx_simd_setzero_f

Set all elements in gmx_simd_real_t to 0.0.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_setzero_d, otherwise gmx_simd_setzero_f.

Returns: The value 0.0 in all elements of a SIMD variable.

#define gmx_simd_sin_r gmx_simd_sin_f

SIMD real sin(x).

You should normally call the real-precision routine gmx_simd_sin_r.

Parameters

x	The argument to evaluate sin for

Returns: Sin(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

#define gmx_simd_sin_singleaccuracy_r gmx_simd_sin_f

SIMD real sin(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_sin_r.

Parameters

x	The argument to evaluate sin for

Returns: Sin(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_sincos_r gmx_simd_sincos_f

SIMD real sin & cos.

You should normally call the real-precision routine gmx_simd_sincos_r.

Parameters

	x	The argument to evaluate sin/cos for
[out]	sinval	Sin(x)
[out]	cosval	Cos(x)

This version achieves close to machine precision, but for very large magnitudes of the argument we inherently begin to lose accuracy due to the argument reduction, despite using extended precision arithmetics internally.

#define gmx_simd_sincos_singleaccuracy_r gmx_simd_sincos_f

SIMD real sin & cos, only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_sincos_r.

Parameters

	x	The argument to evaluate sin/cos for
[out]	sinval	Sin(x)
[out]	cosval	Cos(x)

This version achieves close to machine precision, but for very large magnitudes of the argument we inherently begin to lose accuracy due to the argument reduction, despite using extended precision arithmetics internally.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_slli_i gmx_simd_slli_fi

Shift each element in gmx_simd_int32_t left by immediate.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_slli_di, otherwise gmx_simd_slli_fi.

You should typically call the real-precision gmx_simd_slli_i.

Logical shift. Each element is shifted (independently) up to 32 positions left, while zeros are shifted in from the right. Only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	integer data to shift
n	number of positions to shift left. n<=32.

Returns: shifted values

#define gmx_simd_sqrt_r gmx_simd_sqrt_f

Calculate sqrt(x) correctly for SIMD real, including argument 0.0.

You should normally call the real-precision routine gmx_simd_sqrt_r.

Parameters

x	Argument that must be >=0.

Returns: sqrt(x). If x=0, the result will correctly be set to 0. The result is undefined if the input value is negative.

#define gmx_simd_sqrt_singleaccuracy_r gmx_simd_sqrt_f

Calculate sqrt(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_sqrt_r.

Parameters

x	Argument that must be >=0.

Returns: sqrt(x). If x=0, the result will correctly be set to 0. The result is undefined if the input value is negative.

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_srli_i gmx_simd_srli_fi

Shift each element in gmx_simd_int32_t right by immediate.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_srli_di, otherwise gmx_simd_srli_fi.

You should typically call the real-precision gmx_simd_srli_i.

Logical shift. Each element is shifted (independently) up to 32 positions right, while zeros are shifted in from the left. Only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	integer data to shift
n	number of positions to shift right. n<=32.

Returns: shifted values

#define gmx_simd_store_i gmx_simd_store_fi

Store GMX_SIMD_REAL_WIDTH values from gmx_simd_int32_t to aligned memory.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_store_di , otherwise gmx_simd_store_fi .

You should typically call the real-precision gmx_simd_store_i.

Parameters

m	Memory aligned to integer SIMD width.
a	SIMD variable to store.

#define gmx_simd_store_r gmx_simd_store_f

Store GMX_SIMD_REAL_WIDTH values from gmx_simd_real_t to aligned memory.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_store_d, otherwise gmx_simd_store_f.

Parameters

[out]	m	Pointer to memory, aligned to SIMD width.
	a	SIMD variable to store

#define gmx_simd_storeu_d gmx_simd_store_d

Store SIMD double to unaligned memory.

Available with GMX_SIMD_HAVE_STOREU.

Parameters

[out]	m	Pointer to memory, no alignment requirement.
	a	SIMD variable to store.

#define gmx_simd_storeu_di gmx_simd_store_di

Store unaligned SIMD integer data, width corresponds to gmx_simd_double_t.

You should typically call the real-precision gmx_simd_storeu_i.

Supported with GMX_SIMD_HAVE_STOREU.

Parameters

m	Memory pointer, no alignment requirements.
a	SIMD variable to store.

#define gmx_simd_storeu_f gmx_simd_store_f

Store SIMD float to unaligned memory.

Available with GMX_SIMD_HAVE_STOREU.

Parameters

[out]	m	Pointer to memory, no alignment requirement.
	a	SIMD variable to store.

#define gmx_simd_storeu_fi gmx_simd_store_fi

Store unaligned SIMD integer data, width corresponds to gmx_simd_float_t.

You should typically call the real-precision gmx_simd_storeu_i.

Supported with GMX_SIMD_HAVE_STOREU.

Parameters

m	Memory pointer, no alignment requirements.
a	SIMD variable to store.

#define gmx_simd_storeu_i gmx_simd_storeu_fi

Store GMX_SIMD_REAL_WIDTH values from gmx_simd_int32_t to unaligned memory.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_storeu_di , otherwise gmx_simd_storeu_fi .

You should typically call the real-precision gmx_simd_storeu_i.

Supported with GMX_SIMD_HAVE_STOREU.

Parameters

m	Memory pointer, no alignment requirements.
a	SIMD variable to store.

#define gmx_simd_storeu_r gmx_simd_storeu_f

Store GMX_SIMD_REAL_WIDTH values from gmx_simd_real_t to unaligned memory.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_storeu_d, otherwise gmx_simd_storeu_f.

Available with GMX_SIMD_HAVE_STOREU.

Parameters

[out]	m	Pointer to memory, no alignment requirement.
	a	SIMD variable to store.

#define gmx_simd_sub_i gmx_simd_sub_fi

SIMD a-b for two gmx_simd_int32_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_sub_di, otherwise gmx_simd_sub_fi.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	term1
b	term2

Returns: a-b

#define gmx_simd_sub_r gmx_simd_sub_f

SIMD a-b for two gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_sub_d, otherwise gmx_simd_sub_f.

You should typically call the real-precision gmx_simd_sub_r.

Parameters

a	term1
b	term2

Returns: a-b

#define gmx_simd_sum4_r gmx_simd_sum4_f

SIMD utility function to sum a+b+c+d for SIMD reals.

You should normally call the real-precision routine gmx_simd_sum4_r.

Parameters

a	term 1 (multiple values)
b	term 2 (multiple values)
c	term 3 (multiple values)
d	term 4 (multiple values)

Returns: sum of terms 1-4 (multiple values)

#define gmx_simd_tan_r gmx_simd_tan_f

SIMD real tan(x).

You should normally call the real-precision routine gmx_simd_tan_r.

Parameters

x	The argument to evaluate tan for

Returns: Tan(x)

#define gmx_simd_tan_singleaccuracy_r gmx_simd_tan_f

SIMD real tan(x), only targeting single accuracy.

You should normally call the real-precision routine gmx_simd_tan_r.

Parameters

x	The argument to evaluate tan for

Returns: Tan(x)

Note: This is a performance-targeted function that only achieves single precision accuracy, even when the SIMD data is double precision.

#define gmx_simd_trunc_r gmx_simd_trunc_f

Truncate gmx_simd_real_t towards 0, return gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_trunc_d, otherwise gmx_simd_trunc_f.

You should typically call the real-precision gmx_simd_trunc_r.

Parameters

a	Any floating-point value

Returns: Integer rounded towards zero, represented in floating-point format.

Note: This is truncation towards zero, not floor(). The reason for this is that truncation is virtually always present as a dedicated hardware instruction, but floor() frequently isn't.

#define gmx_simd_xor_i gmx_simd_xor_fi

Bitwise xor on two gmx_simd_int32_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_xor_di, otherwise gmx_simd_xor_fi.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: a ^ b (bitwise xor)

#define gmx_simd_xor_r gmx_simd_xor_f

Bitwise exclusive-or on two gmx_simd_real_t.

If GMX_DOUBLE is defined, this will be aliased to gmx_simd_xor_d, otherwise gmx_simd_xor_f.

You should typically call the real-precision gmx_simd_xor_r.

Parameters

a	data1
b	data2

Returns: data1 ^ data2

#define gmx_simd_xor_sign_r gmx_simd_xor_sign_f

Return -a if b is negative, SIMD real.

You should normally call the real-precision routine gmx_simd_xor_sign_r.

Parameters

a	Values to set sign for
b	Values used to set sign

Returns: if b is negative, the sign of a will be changed.

This is equivalent to doing an xor operation on a with the sign bit of b, with the exception that negative zero is not considered to be negative on architectures where GMX_SIMD_HAVE_LOGICAL is not set.

Function Documentation

static double * gmx_simd4_align_d ( double * p )

inlinestatic

Align a double pointer for usage with SIMD4 instructions.

You should typically not call this function directly (unless you explicitly want double precision even when GMX_DOUBLE is not set), but use the gmx_simd4_align_r macro to align memory in default Gromacs real precision.

Parameters

p	Pointer to memory, allocate at least GMX_SIMD4_WIDTH extra elements.

Returns: Aligned pointer (>=p) suitable for loading/storing float SIMD. If GMX_SIMD4_HAVE_DOUBLE is not set, p will be returned unchanged.

This routine provides aligned memory for usage with gmx_simd4_double_t. should have allocated an extra GMX_SIMD4_WIDTH * sizeof(double) bytes.

static float * gmx_simd4_align_f ( float * p )

inlinestatic

Align a float pointer for usage with SIMD4 instructions.

You should typically not call this function directly (unless you explicitly want single precision even when GMX_DOUBLE is set), but use the gmx_simd4_align_r macro to align memory in default Gromacs real precision.

Parameters

p	Pointer to memory, allocate at least GMX_SIMD4_WIDTH extra elements.

Returns: Aligned pointer (>=p) suitable for loading/storing float SIMD. If GMX_SIMD4_HAVE_FLOAT is not set, p will be returned unchanged.

This routine provides aligned memory for usage with gmx_simd4_float_t. should have allocated an extra GMX_SIMD4_WIDTH * sizeof(float) bytes.

static double gmx_simd4_dotproduct3_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Return dot product of two double precision SIMD4 variables.

Returns: The value 0.0 in all elements of a SIMD variable.

static float gmx_simd4_dotproduct3_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Return dot product of two single precision SIMD4 variables.

The dot product is calculated between the first three elements in the two vectors, while the fourth is ignored. The result is returned as a scalar.

Parameters

a	vector1
b	vector2

Returns: a[0]*b[0]+a[1]*b[1]+a[2]*b[2], returned as scalar. Last element is ignored.

static gmx_simd_double_t gmx_simdcall gmx_simd4_invsqrt_d ( gmx_simd_double_t x )

inlinestatic

Calculate 1/sqrt(x) for SIMD4 double.

You should normally call the real-precision routine gmx_simd_invsqrt_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

static gmx_simd_float_t gmx_simdcall gmx_simd4_invsqrt_f ( gmx_simd_float_t x )

inlinestatic

Calculate 1/sqrt(x) for SIMD4 float.

You should normally call the real-precision routine gmx_simd_invsqrt_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

static gmx_simd_double_t gmx_simdcall gmx_simd4_invsqrt_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

Calculate 1/sqrt(x) for SIMD4 double, but in single accuracy.

You should normally call the real-precision routine gmx_simd_invsqrt_singleaccuracy_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

static gmx_simd_double_t gmx_simdcall gmx_simd4_norm2_d	(	gmx_simd_double_t	ax,
		gmx_simd_double_t	ay,
		gmx_simd_double_t	az
	)

inlinestatic

SIMD4 double norm squared of multiple vectors.

For normal usage you should always call the real-precision gmx_simd_norm2_r.

Parameters

ax	X components of vectors
ay	Y components of vectors
az	Z components of vectors

Returns: Element i will be res[i] = ax[i]*ax[i]+ay[i]*ay[i]+az[i]*az[i].

Note: This corresponds to the scalar product of the vector with itself, but the compiler might be able to optimize it better with identical vectors.

static gmx_simd_float_t gmx_simdcall gmx_simd4_norm2_f	(	gmx_simd_float_t	ax,
		gmx_simd_float_t	ay,
		gmx_simd_float_t	az
	)

inlinestatic

SIMD4 float inner product of four float vectors.

For normal usage you should always call the real-precision gmx_simd_norm2_r.

Parameters

ax	X components of vectors
ay	Y components of vectors
az	Z components of vectors

Returns: Element i will be res[i] = ax[i]*ax[i]+ay[i]*ay[i]+az[i]*az[i].

Note: This corresponds to the scalar product of the vector with itself, but the compiler might be able to optimize it better with identical vectors.

static gmx_simd_double_t gmx_simdcall gmx_simd4_rsqrt_iter_d	(	gmx_simd_double_t	lu,
		gmx_simd_double_t	x
	)

inlinestatic

Perform one Newton-Raphson iteration to improve 1/sqrt(x) for SIMD4 double.

This is a low-level routine that should only be used by SIMD math routine that evaluates the inverse square root.

Parameters

lu	Approximation of 1/sqrt(x), typically obtained from lookup.
x	The reference (starting) value x for which we want 1/sqrt(x).

Returns: An improved approximation with roughly twice as many bits of accuracy.

static gmx_simd_float_t gmx_simdcall gmx_simd4_rsqrt_iter_f	(	gmx_simd_float_t	lu,
		gmx_simd_float_t	x
	)

inlinestatic

Perform one Newton-Raphson iteration to improve 1/sqrt(x) for SIMD4 float.

This is a low-level routine that should only be used by SIMD math routine that evaluates the inverse square root.

Parameters

lu	Approximation of 1/sqrt(x), typically obtained from lookup.
x	The reference (starting) value x for which we want 1/sqrt(x).

Returns: An improved approximation with roughly twice as many bits of accuracy.

static gmx_simd_double_t gmx_simdcall gmx_simd4_sum4_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b,
		gmx_simd_double_t	c,
		gmx_simd_double_t	d
	)

inlinestatic

SIMD4 utility function to sum a+b+c+d for SIMD4 doubles.

You should normally call the real-precision routine gmx_simd_sum4_r.

Parameters

a	term 1 (multiple values)
b	term 2 (multiple values)
c	term 3 (multiple values)
d	term 4 (multiple values)

Returns: sum of terms 1-4 (multiple values)

static gmx_simd_float_t gmx_simdcall gmx_simd4_sum4_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b,
		gmx_simd_float_t	c,
		gmx_simd_float_t	d
	)

inlinestatic

SIMD4 utility function to sum a+b+c+d for SIMD4 floats.

You should normally call the real-precision routine gmx_simd_sum4_r.

Parameters

a	term 1 (multiple values)
b	term 2 (multiple values)
c	term 3 (multiple values)
d	term 4 (multiple values)

Returns: sum of terms 1-4 (multiple values)

static gmx_simd_double_t gmx_simdcall gmx_simd_acos_d ( gmx_simd_double_t x )

inlinestatic

SIMD double acos(x).

You should normally call the real-precision routine gmx_simd_acos_r.

Parameters

x	The argument to evaluate acos for

Returns: Acos(x)

static gmx_simd_float_t gmx_simdcall gmx_simd_acos_f ( gmx_simd_float_t x )

inlinestatic

SIMD float acos(x).

You should normally call the real-precision routine gmx_simd_acos_r.

Parameters

x	The argument to evaluate acos for

Returns: Acos(x)

static gmx_simd_double_t gmx_simdcall gmx_simd_acos_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD acos(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_acos_singleaccuracy_r.

Parameters

x	The argument to evaluate acos for

Returns: Acos(x)

static gmx_simd_double_t gmx_simd_add_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Add two double SIMD variables.

You should typically call the real-precision gmx_simd_add_r.

Parameters

a	term1
b	term2

Returns: a+b

static gmx_simd_dint32_t gmx_simd_add_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b
	)

inlinestatic

Add SIMD integers, corresponding to double precision.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	term1
b	term2

Returns: a+b

static gmx_simd_float_t gmx_simd_add_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Add two float SIMD variables.

You should typically call the real-precision gmx_simd_add_r.

Parameters

a	term1
b	term2

Returns: a+b

static gmx_simd_fint32_t gmx_simd_add_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b
	)

inlinestatic

Add SIMD integers.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	term1
b	term2

Returns: a+b

static double * gmx_simd_align_d ( double * p )

inlinestatic

Align a double pointer for usage with SIMD instructions.

You should typically not call this function directly (unless you explicitly want double precision even when GMX_DOUBLE is not set), but use the gmx_simd_align_r macro to align memory in default Gromacs real precision.

Parameters

p	Pointer to memory, allocate at least GMX_SIMD_DOUBLE_WIDTH extra elements.

Returns: Aligned pointer (>=p) suitable for loading/storing double fp SIMD. If GMX_SIMD_HAVE_DOUBLE is not set, p will be returned unchanged.

Start by allocating an extra GMX_SIMD_DOUBLE_WIDTH double elements of memory, and then call this function. The returned pointer will be greater or equal to the one you provided, and point to an address inside your provided memory that is aligned to the SIMD width.

static int * gmx_simd_align_di ( int * p )

inlinestatic

Align a (double) integer pointer for usage with SIMD instructions.

You should typically not call this function directly (unless you explicitly want integers corresponding to doublele precision even when GMX_DOUBLE is not set), but use the gmx_simd_align_i macro to align integer memory corresponding to Gromacs default floating-point precision.

Parameters

p	Pointer to memory, allocate at least GMX_SIMD_DINT32_WIDTH extra elements.

Returns: Aligned pointer (>=p) suitable for loading/storing double-integer SIMD. If GMX_SIMD_HAVE_DINT32 is not set, p will be returned unchanged.

This routine provides aligned memory for usage with gmx_simd_dint32_t. You should have allocated an extra GMX_SIMD_DINT32_WIDTH*sizeof(int) bytes. The reason why we need to separate float-integer vs. double-integer is that the width of registers after conversions from the floating-point types might not be identical, or even supported, in both cases.

static float * gmx_simd_align_f ( float * p )

inlinestatic

Align a float pointer for usage with SIMD instructions.

You should typically not call this function directly (unless you explicitly want single precision even when GMX_DOUBLE is set), but use the gmx_simd_align_r macro to align memory in default Gromacs real precision.

Parameters

p	Pointer to memory, allocate at least GMX_SIMD_FLOAT_WIDTH extra elements.

Returns: Aligned pointer (>=p) suitable for loading/storing float fp SIMD. If GMX_SIMD_HAVE_FLOAT is not set, p will be returned unchanged.

Start by allocating an extra GMX_SIMD_FLOAT_WIDTH float elements of memory, and then call this function. The returned pointer will be greater or equal to the one you provided, and point to an address inside your provided memory that is aligned to the SIMD width.

static int * gmx_simd_align_fi ( int * p )

inlinestatic

Align a (float) integer pointer for usage with SIMD instructions.

You should typically not call this function directly (unless you explicitly want integers corresponding to single precision even when GMX_DOUBLE is set), but use the gmx_simd_align_i macro to align integer memory corresponding to Gromacs default floating-point precision.

Parameters

p	Pointer to memory, allocate at least GMX_SIMD_FINT32_WIDTH extra elements.

Returns: Aligned pointer (>=p) suitable for loading/storing float-integer SIMD. If GMX_SIMD_HAVE_FINT32 is not set, p will be returned unchanged.

This routine provides aligned memory for usage with gmx_simd_fint32_t. You should have allocated an extra GMX_SIMD_FINT32_WIDTH * sizeof(int) bytes. The reason why we need to separate float-integer vs. double-integer is that the width of registers after conversions from the floating-point types might not be identical, or even supported, in both cases.

static gmx_simd_double_t gmx_simd_and_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Bitwise and for two SIMD double variables. Supported with GMX_SIMD_HAVE_LOGICAL.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	data1
b	data2

Returns: data1 & data2

static gmx_simd_dbool_t gmx_simd_and_db	(	gmx_simd_dbool_t	a,
		gmx_simd_dbool_t	b
	)

inlinestatic

Logical and on double precision SIMD booleans.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a & b are true.

Note: This is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_and_ib

static gmx_simd_dint32_t gmx_simd_and_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b
	)

inlinestatic

Integer bitwise and for SIMD variables.

You should typically call the real-precision gmx_simd_and_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Note: You can not use this operation directly to select based on a boolean SIMD variable, since booleans are separate from integer SIMD. If that is what you need, have a look at gmx_simd_blendzero_i instead.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: a & b (bitwise and)

static gmx_simd_dibool_t gmx_simd_and_dib	(	gmx_simd_dibool_t	a,
		gmx_simd_dibool_t	b
	)

inlinestatic

Logical AND on gmx_simd_dibool_t.

You should typically call the real-precision gmx_simd_and_ib.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD boolean 1
b	SIMD boolean 2

Returns: True for elements where both a and b are true.

static gmx_simd_float_t gmx_simd_and_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Bitwise and for two SIMD float variables. Supported with GMX_SIMD_HAVE_LOGICAL.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	data1
b	data2

Returns: data1 & data2

static gmx_simd_fbool_t gmx_simd_and_fb	(	gmx_simd_fbool_t	a,
		gmx_simd_fbool_t	b
	)

inlinestatic

Logical and on single precision SIMD booleans.

You should typically call the real-precision gmx_simd_and_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a & b are true.

Note: This is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_and_ib

static gmx_simd_fint32_t gmx_simd_and_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b
	)

inlinestatic

Integer SIMD bitwise and.

You should typically call the real-precision gmx_simd_and_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Note: You can not use this operation directly to select based on a boolean SIMD variable, since booleans are separate from integer SIMD. If that is what you need, have a look at gmx_simd_blendzero_i instead.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: a & b (bitwise and)

static gmx_simd_fibool_t gmx_simd_and_fib	(	gmx_simd_fibool_t	a,
		gmx_simd_fibool_t	b
	)

inlinestatic

Logical AND on gmx_simd_fibool_t.

You should typically call the real-precision gmx_simd_and_ib.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD boolean 1
b	SIMD boolean 2

Returns: True for elements where both a and b are true.

static gmx_simd_double_t gmx_simd_andnot_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Bitwise andnot for SIMD double. c=(~a) & b. Supported with GMX_SIMD_HAVE_LOGICAL.

You should typically call the real-precision gmx_simd_andnot_r.

Parameters

a	data1
b	data2

Returns: (~data1) & data2

static gmx_simd_dint32_t gmx_simd_andnot_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b
	)

inlinestatic

Integer bitwise not-and for SIMD variables.

You should typically call the real-precision gmx_simd_andnot_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Note that you can NOT use this operation directly to select based on a boolean SIMD variable, since booleans are separate from integer SIMD. If that is what you need, have a look at gmx_simd_blendnotzero_i instead.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: (~a) & b (bitwise andnot)

static gmx_simd_float_t gmx_simd_andnot_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Bitwise andnot for SIMD float. c=(~a) & b. Supported with GMX_SIMD_HAVE_LOGICAL.

You should typically call the real-precision gmx_simd_andnot_r.

Parameters

a	data1
b	data2

Returns: (~data1) & data2

static gmx_simd_fint32_t gmx_simd_andnot_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b
	)

inlinestatic

Integer SIMD bitwise not-and.

You should typically call the real-precision gmx_simd_andnot_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Note that you can NOT use this operation directly to select based on a boolean SIMD variable, since booleans are separate from integer SIMD. If that is what you need, have a look at gmx_simd_blendnotzero_i instead.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: (~a) & b (bitwise andnot)

static int gmx_simd_anytrue_db ( gmx_simd_dbool_t a )

inlinestatic

Returns non-zero if any of the boolean in x is True, otherwise 0.

You should typically call the real-precision gmx_simd_anytrue_b.

Parameters

a	Logical variable.

Returns: non-zero if any element in a is true, otherwise 0.

The actual return value for truth will depend on the architecture, so any non-zero value is considered truth.

static int gmx_simd_anytrue_dib ( gmx_simd_dibool_t a )

inlinestatic

Returns non-zero if any of the double-int SIMD booleans in x is True, otherwise 0.

You should typically call the real-precision gmx_simd_anytrue_ib.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

The actual return value for "any true" will depend on the architecture. Any non-zero value should be considered truth.

Parameters

a	SIMD boolean

Returns: Nonzero integer if any of the elements in a is true, otherwise 0.

static int gmx_simd_anytrue_fb ( gmx_simd_fbool_t a )

inlinestatic

Returns non-zero if any of the boolean in x is True, otherwise 0.

You should typically call the real-precision gmx_simd_anytrue_b.

Parameters

a	Logical variable.

Returns: non-zero if any element in a is true, otherwise 0.

The actual return value for truth will depend on the architecture, so any non-zero value is considered truth.

static int gmx_simd_anytrue_fib ( gmx_simd_fibool_t a )

inlinestatic

Returns non-zero if any of the boolean in x is True, otherwise 0.

You should typically call the real-precision gmx_simd_anytrue_ib.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

The actual return value for "any true" will depend on the architecture. Any non-zero value should be considered truth.

Parameters

a	SIMD boolean

Returns: Nonzero integer if any of the elements in a is true, otherwise 0.

static gmx_simd_double_t gmx_simdcall gmx_simd_asin_d ( gmx_simd_double_t x )

inlinestatic

SIMD double asin(x).

You should normally call the real-precision routine gmx_simd_asin_r.

Parameters

x	The argument to evaluate asin for

Returns: Asin(x)

static gmx_simd_float_t gmx_simdcall gmx_simd_asin_f ( gmx_simd_float_t x )

inlinestatic

SIMD float asin(x).

You should normally call the real-precision routine gmx_simd_asin_r.

Parameters

x	The argument to evaluate asin for

Returns: Asin(x)

static gmx_simd_double_t gmx_simdcall gmx_simd_asin_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD asin(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_asin_singleaccuracy_r.

Parameters

x	The argument to evaluate asin for

Returns: Asin(x)

static gmx_simd_double_t gmx_simdcall gmx_simd_atan2_d	(	gmx_simd_double_t	y,
		gmx_simd_double_t	x
	)

inlinestatic

SIMD double atan2(y,x).

You should normally call the real-precision routine gmx_simd_atan2_r.

Parameters

y	Y component of vector, any quartile
x	X component of vector, any quartile

Returns: Atan(y,x), same argument/value range as standard math library.

Note: This routine should provide correct results for all finite non-zero or positive-zero arguments. However, negative zero arguments will be treated as positive zero, which means the return value will deviate from the standard math library atan2(y,x) for those cases. That should not be of any concern in Gromacs, and in particular it will not affect calculations of angles from vectors.

static gmx_simd_float_t gmx_simdcall gmx_simd_atan2_f	(	gmx_simd_float_t	y,
		gmx_simd_float_t	x
	)

inlinestatic

SIMD float atan2(y,x).

You should normally call the real-precision routine gmx_simd_atan2_r.

Parameters

y	Y component of vector, any quartile
x	X component of vector, any quartile

Returns: Atan(y,x), same argument/value range as standard math library.

Note: This routine should provide correct results for all finite non-zero or positive-zero arguments. However, negative zero arguments will be treated as positive zero, which means the return value will deviate from the standard math library atan2(y,x) for those cases. That should not be of any concern in Gromacs, and in particular it will not affect calculations of angles from vectors.

static gmx_simd_double_t gmx_simdcall gmx_simd_atan2_singleaccuracy_d	(	gmx_simd_double_t	y,
		gmx_simd_double_t	x
	)

inlinestatic

SIMD atan2(y,x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_atan2_singleaccuracy_r.

Parameters

y	Y component of vector, any quartile
x	X component of vector, any quartile

Returns: Atan(y,x), same argument/value range as standard math library.

Note: This routine should provide correct results for all finite non-zero or positive-zero arguments. However, negative zero arguments will be treated as positive zero, which means the return value will deviate from the standard math library atan2(y,x) for those cases. That should not be of any concern in Gromacs, and in particular it will not affect calculations of angles from vectors.

static gmx_simd_double_t gmx_simdcall gmx_simd_atan_d ( gmx_simd_double_t x )

inlinestatic

SIMD double atan(x).

You should normally call the real-precision routine gmx_simd_atan_r.

Parameters

x	The argument to evaluate atan for

Returns: Atan(x), same argument/value range as standard math library.

static gmx_simd_float_t gmx_simdcall gmx_simd_atan_f ( gmx_simd_float_t x )

inlinestatic

SIMD float asin(x).

You should normally call the real-precision routine gmx_simd_atan_r.

Parameters

x	The argument to evaluate atan for

Returns: Atan(x), same argument/value range as standard math library.

static gmx_simd_double_t gmx_simdcall gmx_simd_atan_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD asin(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_atan_singleaccuracy_r.

Parameters

x	The argument to evaluate atan for

Returns: Atan(x), same argument/value range as standard math library.

static gmx_simd_double_t gmx_simd_blendnotzero_d	(	gmx_simd_double_t	a,
		gmx_simd_dbool_t	sel
	)

inlinestatic

Select from double SIMD variable where boolean is false.

You should typically call the real-precision gmx_simd_blendnotzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for false, 0 for true (sic).

static gmx_simd_dint32_t gmx_simd_blendnotzero_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dibool_t	sel
	)

inlinestatic

Select from SIMD ints (corresponding to double) where boolean is false.

You should typically call the real-precision gmx_simd_blendnotzero_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer to select from
sel	Boolean selector

Returns: Elements from a where sel is false, 0 otherwise (sic).

static gmx_simd_float_t gmx_simd_blendnotzero_f	(	gmx_simd_float_t	a,
		gmx_simd_fbool_t	sel
	)

inlinestatic

Select from single precision SIMD variable where boolean is false.

You should typically call the real-precision gmx_simd_blendnotzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for false, 0 for true (sic).

static gmx_simd_fint32_t gmx_simd_blendnotzero_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fibool_t	sel
	)

inlinestatic

Select from gmx_simd_fint32_t variable where boolean is false.

You should typically call the real-precision gmx_simd_blendnotzero_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer to select from
sel	Boolean selector

Returns: Elements from a where sel is false, 0 otherwise (sic).

static gmx_simd_double_t gmx_simd_blendv_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b,
		gmx_simd_dbool_t	sel
	)

inlinestatic

Vector-blend double SIMD selection.

You should typically call the real-precision gmx_simd_blendv_r.

Parameters

a	First source
b	Second source
sel	Boolean selector

Returns: For each element, select b if sel is true, a otherwise.

static gmx_simd_dint32_t gmx_simd_blendv_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b,
		gmx_simd_dibool_t	sel
	)

inlinestatic

Vector-blend SIMD selection for double-int SIMD.

You should typically call the real-precision gmx_simd_blendv_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	First source
b	Second source
sel	Boolean selector

Returns: For each element, select b if sel is true, a otherwise.

static gmx_simd_float_t gmx_simd_blendv_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b,
		gmx_simd_fbool_t	sel
	)

inlinestatic

Vector-blend SIMD selection.

You should typically call the real-precision gmx_simd_blendv_r.

Parameters

a	First source
b	Second source
sel	Boolean selector

Returns: For each element, select b if sel is true, a otherwise.

static gmx_simd_fint32_t gmx_simd_blendv_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b,
		gmx_simd_fibool_t	sel
	)

inlinestatic

Vector-blend SIMD selection.

You should typically call the real-precision gmx_simd_blendv_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	First source
b	Second source
sel	Boolean selector

Returns: For each element, select b if sel is true, a otherwise.

static gmx_simd_double_t gmx_simd_blendzero_d	(	gmx_simd_double_t	a,
		gmx_simd_dbool_t	sel
	)

inlinestatic

Select from double SIMD variable where boolean is true.

You should typically call the real-precision gmx_simd_blendzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for true, 0 for false.

static gmx_simd_dint32_t gmx_simd_blendzero_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dibool_t	sel
	)

inlinestatic

Select from SIMD ints (corresponding to double) where boolean is true.

You should typically call the real-precision gmx_simd_blendzero_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer to select from
sel	Boolean selector

Returns: Elements from a where sel is true, 0 otherwise.

static gmx_simd_float_t gmx_simd_blendzero_f	(	gmx_simd_float_t	a,
		gmx_simd_fbool_t	sel
	)

inlinestatic

Select from single precision SIMD variable where boolean is true.

You should typically call the real-precision gmx_simd_blendzero_r.

Parameters

a	Floating-point variable to select from
sel	Boolean selector

Returns: For each element, a is selected for true, 0 for false.

static gmx_simd_fint32_t gmx_simd_blendzero_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fibool_t	sel
	)

inlinestatic

Select from gmx_simd_fint32_t variable where boolean is true.

You should typically call the real-precision gmx_simd_blendzero_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer to select from
sel	Boolean selector

Returns: Elements from a where sel is true, 0 otherwise.

static gmx_simd_dbool_t gmx_simd_cmpeq_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

SIMD a==b for double SIMD.

You should typically call the real-precision gmx_simd_cmpeq_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a==b.

Beware that exact floating-point comparisons are difficult.

static gmx_simd_dibool_t gmx_simd_cmpeq_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b
	)

inlinestatic

Equality comparison of two ints corresponding to double SIMD data.

You should typically call the real-precision gmx_simd_cmpeq_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer1
b	SIMD integer2

Returns: SIMD integer boolean with true for elements where a==b

static gmx_simd_fbool_t gmx_simd_cmpeq_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

SIMD a==b for single SIMD.

You should typically call the real-precision gmx_simd_cmpeq_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a==b.

Beware that exact floating-point comparisons are difficult.

static gmx_simd_fibool_t gmx_simd_cmpeq_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b
	)

inlinestatic

Equality comparison of two integers corresponding to float values.

You should typically call the real-precision gmx_simd_cmpeq_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer1
b	SIMD integer2

Returns: SIMD integer boolean with true for elements where a==b

static gmx_simd_dbool_t gmx_simd_cmple_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

SIMD a<=b for double SIMD.

You should typically call the real-precision gmx_simd_cmple_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<=b.

static gmx_simd_fbool_t gmx_simd_cmple_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

SIMD a<=b for single SIMD.

You should typically call the real-precision gmx_simd_cmple_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<=b.

static gmx_simd_dbool_t gmx_simd_cmplt_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

SIMD a<b for double SIMD.

You should typically call the real-precision gmx_simd_cmplt_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<b.

static gmx_simd_dibool_t gmx_simd_cmplt_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b
	)

inlinestatic

Less-than comparison of two ints corresponding to double SIMD data.

You should typically call the real-precision gmx_simd_cmplt_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer1
b	SIMD integer2

Returns: SIMD integer boolean with true for elements where a<b

static gmx_simd_fbool_t gmx_simd_cmplt_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

SIMD a<b for single SIMD.

You should typically call the real-precision gmx_simd_cmplt_r.

Parameters

a	value1
b	value2

Returns: Each element of the boolean will be set to true if a<b.

static gmx_simd_fibool_t gmx_simd_cmplt_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b
	)

inlinestatic

Less-than comparison of two SIMD integers corresponding to float values.

You should typically call the real-precision gmx_simd_cmplt_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD integer1
b	SIMD integer2

Returns: SIMD integer boolean with true for elements where a<b

static gmx_simd_double_t gmx_simdcall gmx_simd_cos_d ( gmx_simd_double_t x )

inlinestatic

SIMD double cos(x).

You should normally call the real-precision routine gmx_simd_cos_r.

Parameters

x	The argument to evaluate cos for

Returns: Cos(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

static gmx_simd_float_t gmx_simdcall gmx_simd_cos_f ( gmx_simd_float_t x )

inlinestatic

SIMD float cos(x).

You should normally call the real-precision routine gmx_simd_cos_r.

Parameters

x	The argument to evaluate cos for

Returns: Cos(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

static gmx_simd_double_t gmx_simdcall gmx_simd_cos_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD cos(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_cos_singleaccuracy_r.

Parameters

x	The argument to evaluate cos for

Returns: Cos(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

static void gmx_simdcall gmx_simd_cprod_d	(	gmx_simd_double_t	ax,
		gmx_simd_double_t	ay,
		gmx_simd_double_t	az,
		gmx_simd_double_t	bx,
		gmx_simd_double_t	by,
		gmx_simd_double_t	bz,
		gmx_simd_double_t *	cx,
		gmx_simd_double_t *	cy,
		gmx_simd_double_t *	cz
	)

inlinestatic

SIMD double cross-product of multiple vectors.

For normal usage you should always call the real-precision gmx_simd_cprod_r.

Parameters

	ax	X components of first vectors
	ay	Y components of first vectors
	az	Z components of first vectors
	bx	X components of second vectors
	by	Y components of second vectors
	bz	Z components of second vectors
[out]	cx	X components of cross product vectors
[out]	cy	Y components of cross product vectors
[out]	cz	Z components of cross product vectors

Returns: void

This calculates C = A x B, where the cross denotes the cross product. The arguments x/y/z denotes the different components, and each element corresponds to a separate vector.

static void gmx_simdcall gmx_simd_cprod_f	(	gmx_simd_float_t	ax,
		gmx_simd_float_t	ay,
		gmx_simd_float_t	az,
		gmx_simd_float_t	bx,
		gmx_simd_float_t	by,
		gmx_simd_float_t	bz,
		gmx_simd_float_t *	cx,
		gmx_simd_float_t *	cy,
		gmx_simd_float_t *	cz
	)

inlinestatic

SIMD float cross-product of multiple vectors.

For normal usage you should always call the real-precision gmx_simd_cprod_r.

Parameters

	ax	X components of first vectors
	ay	Y components of first vectors
	az	Z components of first vectors
	bx	X components of second vectors
	by	Y components of second vectors
	bz	Z components of second vectors
[out]	cx	X components of cross product vectors
[out]	cy	Y components of cross product vectors
[out]	cz	Z components of cross product vectors

Returns: void

This calculates C = A x B, where the cross denotes the cross product. The arguments x/y/z denotes the different components, and each element corresponds to a separate vector.

static gmx_simd_float_t gmx_simd_cvt_d2f ( gmx_simd_double_t d )

inlinestatic

Convert SIMD double to float.

This version is available if GMX_SIMD_FLOAT_WIDTH is identical to GMX_SIMD_DOUBLE_WIDTH.

Float/double conversions are complex since the SIMD width could either be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will need to check for the width in the code, and have different code paths.

Parameters

d	Double-precision SIMD variable

Returns: Single-precision SIMD variable of the same width

static gmx_simd_dint32_t gmx_simd_cvt_d2i ( gmx_simd_double_t a )

inlinestatic

Round double precision floating point to integer.

You should typically call the real-precision gmx_simd_cvt_r2i.

Parameters

a	SIMD floating-point

Returns: SIMD integer, rounded to nearest integer.

static gmx_simd_dibool_t gmx_simd_cvt_db2dib ( gmx_simd_dbool_t a )

inlinestatic

Convert from double boolean to corresponding integer boolean.

You should typically call the real-precision gmx_simd_cvt_b2ib.

Parameters

a	Boolean corresponding to SIMD floating-point

Returns: Boolean that can be applied to SIMD integer operations.

static gmx_simd_float_t gmx_simd_cvt_dd2f	(	gmx_simd_double_t	d0,
		gmx_simd_double_t	d1
	)

inlinestatic

Convert SIMD double to float.

This version is available if GMX_SIMD_FLOAT_WIDTH is twice as large as GMX_SIMD_DOUBLE_WIDTH.

Float/double conversions are complex since the SIMD width could either be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will need to check for the width in the code, and have different code paths.

Parameters

d0	Double-precision SIMD variable, first half of values to put in f.
d1	Double-precision SIMD variable, second half of values to put in f.

Returns: Single-precision SIMD variable with all values.

static gmx_simd_dbool_t gmx_simd_cvt_dib2db ( gmx_simd_dibool_t a )

inlinestatic

Convert from integer boolean (corresponding to double) to double boolean.

You should typically call the real-precision gmx_simd_cvt_ib2b.

Parameters

a	Boolean corresponding to SIMD integer

Returns: Boolean that can be applied to SIMD floating-point.

static gmx_simd_double_t gmx_simd_cvt_f2d ( gmx_simd_float_t f )

inlinestatic

Convert SIMD float to double.

This version is available if GMX_SIMD_FLOAT_WIDTH is identical to GMX_SIMD_DOUBLE_WIDTH.

Float/double conversions are complex since the SIMD width could either be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will need to check for the width in the code, and have different code paths.

Parameters

f	Single-precision SIMD variable

Returns: Double-precision SIMD variable of the same width

static void gmx_simd_cvt_f2dd	(	gmx_simd_float_t	f,
		gmx_simd_double_t *	d0,
		gmx_simd_double_t *	d1
	)

inlinestatic

Convert SIMD float to double.

This version is available if GMX_SIMD_FLOAT_WIDTH is twice as large as GMX_SIMD_DOUBLE_WIDTH.

Float/double conversions are complex since the SIMD width could either be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will need to check for the width in the code, and have different code paths.

Parameters

	f	Single-precision SIMD variable
[out]	d0	Double-precision SIMD variable, first half of values from f.
[out]	d1	Double-precision SIMD variable, second half of values from f.

static gmx_simd_fint32_t gmx_simd_cvt_f2i ( gmx_simd_float_t a )

inlinestatic

Round single precision floating point to integer.

You should typically call the real-precision gmx_simd_cvt_r2i.

Parameters

a	SIMD floating-point

Returns: SIMD integer, rounded to nearest integer.

static gmx_simd_fibool_t gmx_simd_cvt_fb2fib ( gmx_simd_fbool_t a )

inlinestatic

Convert from float boolean to corresponding integer boolean.

You should typically call the real-precision gmx_simd_cvt_b2ib.

Parameters

a	Boolean corresponding to SIMD floating-point

Returns: Boolean that can be applied to SIMD integer operations.

static gmx_simd_fbool_t gmx_simd_cvt_fib2fb ( gmx_simd_fibool_t a )

inlinestatic

Convert from integer boolean (corresponding to float) to float boolean.

You should typically call the real-precision gmx_simd_cvt_ib2b.

Parameters

a	Boolean corresponding to SIMD integer

Returns: Boolean that can be applied to SIMD floating-point.

static gmx_simd_double_t gmx_simd_cvt_i2d ( gmx_simd_dint32_t a )

inlinestatic

Convert integer to single precision floating-point.

You should typically call the real-precision gmx_simd_cvt_i2r.

Parameters

a	SIMD integer

Returns: SIMD floating-pint

static gmx_simd_float_t gmx_simd_cvt_i2f ( gmx_simd_fint32_t a )

inlinestatic

Convert integer to single precision floating-point.

You should typically call the real-precision gmx_simd_cvt_i2r.

Parameters

a	SIMD integer

Returns: SIMD floating-pint

static gmx_simd_dint32_t gmx_simd_cvtt_d2i ( gmx_simd_double_t a )

inlinestatic

Truncate double precision floating point to integer.

You should typically call the real-precision gmx_simd_cvtt_r2i.

Parameters

a	SIMD floating-point

Returns: SIMD integer, truncated towards zero.

static gmx_simd_fint32_t gmx_simd_cvtt_f2i ( gmx_simd_float_t a )

inlinestatic

Truncate single precision floating point to integer.

You should typically call the real-precision gmx_simd_cvtt_r2i.

Parameters

a	SIMD floating-point

Returns: SIMD integer, truncated towards zero.

static gmx_simd_double_t gmx_simdcall gmx_simd_erf_d ( gmx_simd_double_t x )

inlinestatic

SIMD double erf(x).

You should normally call the real-precision routine gmx_simd_erf_r.

Parameters

x	The value to calculate erf(x) for.

Returns: erf(x)

This routine achieves very close to full precision, but we do not care about the last bit or the subnormal result range.

static gmx_simd_float_t gmx_simdcall gmx_simd_erf_f ( gmx_simd_float_t x )

inlinestatic

SIMD float erf(x).

You should normally call the real-precision routine gmx_simd_erf_r.

Parameters

x	The value to calculate erf(x) for.

Returns: erf(x)

This routine achieves very close to full precision, but we do not care about the last bit or the subnormal result range.

static gmx_simd_double_t gmx_simdcall gmx_simd_erf_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD erf(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_erf_singleaccuracy_r.

Parameters

x	The value to calculate erf(x) for.

Returns: erf(x)

This routine achieves very close to single precision, but we do not care about the last bit or the subnormal result range.

static gmx_simd_double_t gmx_simdcall gmx_simd_erfc_d ( gmx_simd_double_t x )

inlinestatic

SIMD double erfc(x).

You should normally call the real-precision routine gmx_simd_erfc_r.

Parameters

x	The value to calculate erfc(x) for.

Returns: erfc(x)

This routine achieves full precision (bar the last bit) over most of the input range, but for large arguments where the result is getting close to the minimum representable numbers we accept slightly larger errors (think results that are in the ballpark of 10^-30 for single precision, or 10^-200 for double) since that is not relevant for MD.

static gmx_simd_float_t gmx_simdcall gmx_simd_erfc_f ( gmx_simd_float_t x )

inlinestatic

SIMD float erfc(x).

You should normally call the real-precision routine gmx_simd_erfc_r.

Parameters

x	The value to calculate erfc(x) for.

Returns: erfc(x)

This routine achieves full precision (bar the last bit) over most of the input range, but for large arguments where the result is getting close to the minimum representable numbers we accept slightly larger errors (think results that are in the ballpark of 10^-30 for single precision, or 10^-200 for double) since that is not relevant for MD.

static gmx_simd_double_t gmx_simdcall gmx_simd_erfc_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD erfc(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_erfc_singleaccuracy_r.

Parameters

x	The value to calculate erfc(x) for.

Returns: erfc(x)

This routine achieves singleprecision (bar the last bit) over most of the input range, but for large arguments where the result is getting close to the minimum representable numbers we accept slightly larger errors (think results that are in the ballpark of 10^-30) since that is not relevant for MD.

static gmx_simd_double_t gmx_simdcall gmx_simd_exp2_d ( gmx_simd_double_t x )

inlinestatic

SIMD double 2^x.

You should normally call the real-precision routine gmx_simd_exp2_r.

Parameters

x Argument.

Returns: 2^x. Undefined if input argument caused overflow.

static gmx_simd_float_t gmx_simdcall gmx_simd_exp2_f ( gmx_simd_float_t x )

inlinestatic

SIMD float 2^x.

You should normally call the real-precision routine gmx_simd_exp2_r.

Parameters

x Argument.

Returns: 2^x. Undefined if input argument caused overflow.

static gmx_simd_double_t gmx_simdcall gmx_simd_exp2_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD 2^x. Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_exp2_singleaccuracy_r.

Parameters

x Argument.

Returns: 2^x. Undefined if input argument caused overflow.

static gmx_simd_double_t gmx_simdcall gmx_simd_exp_d ( gmx_simd_double_t x )

inlinestatic

SIMD double exp(x).

You should normally call the real-precision routine gmx_simd_exp_r.

In addition to scaling the argument for 2^x this routine correctly does extended precision arithmetics to improve accuracy.

Parameters

x Argument.

Returns: exp(x). Undefined if input argument caused overflow, which can happen if abs(x) > 7e13.

static gmx_simd_float_t gmx_simdcall gmx_simd_exp_f ( gmx_simd_float_t x )

inlinestatic

SIMD float exp(x).

You should normally call the real-precision routine gmx_simd_exp_r.

In addition to scaling the argument for 2^x this routine correctly does extended precision arithmetics to improve accuracy.

Parameters

x Argument.

Returns: exp(x). Undefined if input argument caused overflow, which can happen if abs(x) > 7e13.

static gmx_simd_double_t gmx_simdcall gmx_simd_exp_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD exp(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_exp_singleaccuracy_r.

Parameters

x Argument.

Returns: exp(x). Undefined if input argument caused overflow.

static gmx_int32_t gmx_simd_extract_di	(	gmx_simd_dint32_t	a,
		int	index
	)

inlinestatic

Extract element with index i from gmx_simd_dint32_t.

You should typically call the real-precision gmx_simd_extract_i.

Available with GMX_SIMD_HAVE_FINT32_EXTRACT.

Parameters

a	SIMD variable
index	Position to extract integer from

Returns: Single integer from position index in SIMD variable.

static gmx_int32_t gmx_simd_extract_fi	(	gmx_simd_fint32_t	a,
		int	index
	)

inlinestatic

Extract element with index i from gmx_simd_fint32_t.

You should typically call the real-precision gmx_simd_extract_i.

Available with GMX_SIMD_HAVE_FINT32_EXTRACT.

Parameters

a	SIMD variable
index	Position to extract integer from

Returns: Single integer from position index in SIMD variable.

static gmx_simd_double_t gmx_simd_fabs_d ( gmx_simd_double_t a )

inlinestatic

SIMD Floating-point fabs().

You should typically call the real-precision gmx_simd_fabs_r.

Parameters

a	any floating point values

Returns: fabs(a) for each element.

static gmx_simd_float_t gmx_simd_fabs_f ( gmx_simd_float_t a )

inlinestatic

SIMD Floating-point fabs().

You should typically call the real-precision gmx_simd_fabs_r.

Parameters

a	any floating point values

Returns: fabs(a) for each element.

static gmx_simd_double_t gmx_simd_fneg_d ( gmx_simd_double_t a )

inlinestatic

SIMD floating-point negate.

You should typically call the real-precision gmx_simd_fneg_r.

Parameters

a	Any floating-point value

Returns: -a

static gmx_simd_float_t gmx_simd_fneg_f ( gmx_simd_float_t a )

inlinestatic

SIMD floating-point negate.

You should typically call the real-precision gmx_simd_fneg_r.

Parameters

a	Any floating-point value

Returns: -a

static gmx_simd_double_t gmx_simd_fraction_d ( gmx_simd_double_t a )

inlinestatic

Fraction of the SIMD floating point number.

You should typically call the real-precision gmx_simd_fraction_r.

Parameters

a	Any floating-point value

Returns: a-trunc(r)

To maximize compatibility, we use the same definition of fractions as used e.g. for the AMD64 hardware instructions. This relies on truncation towards zero for the integer part, and the remaining fraction can thus be either positive or negative. As an example, -1.42 would return the fraction -0.42.

Hardware support with GMX_SIMD_HAVE_FRACTION, otherwise emulated.

static gmx_simd_float_t gmx_simd_fraction_f ( gmx_simd_float_t a )

inlinestatic

Fraction of the SIMD floating point number.

You should typically call the real-precision gmx_simd_fraction_r.

Parameters

a	Any floating-point value

Returns: a-trunc(r)

To maximize compatibility, we use the same definition of fractions as used e.g. for the AMD64 hardware instructions. This relies on truncation towards zero for the integer part, and the remaining fraction can thus be either positive or negative. As an example, -1.42 would return the fraction -0.42.

Hardware support with GMX_SIMD_HAVE_FRACTION, otherwise emulated.

static gmx_simd_double_t gmx_simd_get_exponent_d ( gmx_simd_double_t a )

inlinestatic

Extract (integer) exponent from double precision SIMD.

You should typically call the real-precision gmx_simd_get_exponent_r.

Parameters

a	Any floating-point value

Returns: Exponent value, represented in floating-point format.

The IEEE754 exponent field is selected, the bias removed, and it is converted to a normal floating-point SIMD.

static gmx_simd_float_t gmx_simd_get_exponent_f ( gmx_simd_float_t a )

inlinestatic

Extract (integer) exponent from single precision SIMD.

You should typically call the real-precision gmx_simd_get_exponent_r.

Parameters

a	Any floating-point value

Returns: Exponent value, represented in floating-point format.

The IEEE754 exponent field is selected, the bias removed, and it is converted to a normal floating-point SIMD.

static gmx_simd_double_t gmx_simd_get_mantissa_d ( gmx_simd_double_t a )

inlinestatic

Get SIMD doublemantissa.

You should typically call the real-precision gmx_simd_get_mantissa_r.

Parameters

a	Any floating-point value

Returns: Mantissa, represented in floating-point format.

The mantissa field is selected, and a new neutral exponent created.

static gmx_simd_float_t gmx_simd_get_mantissa_f ( gmx_simd_float_t a )

inlinestatic

Get SIMD mantissa.

You should typically call the real-precision gmx_simd_get_mantissa_r.

Parameters

a	Any floating-point value

Returns: Mantissa, represented in floating-point format.

The mantissa field is selected, and a new neutral exponent created.

static gmx_simd_double_t gmx_simdcall gmx_simd_inv_d ( gmx_simd_double_t x )

inlinestatic

Calculate 1/x for SIMD double.

You should normally call the real-precision routine gmx_simd_inv_r.

Parameters

x	Argument that must be nonzero. This routine does not check arguments.

Returns: 1/x. Result is undefined if your argument was invalid.

static gmx_simd_float_t gmx_simdcall gmx_simd_inv_f ( gmx_simd_float_t x )

inlinestatic

Calculate 1/x for SIMD float.

You should normally call the real-precision routine gmx_simd_inv_r.

Parameters

x	Argument that must be nonzero. This routine does not check arguments.

Returns: 1/x. Result is undefined if your argument was invalid.

static gmx_simd_double_t gmx_simd_inv_maskfpe_d	(	gmx_simd_double_t	x,
		gmx_simd_dbool_t	m
	)

inlinestatic

Calculate 1/x for masked entries of SIMD double.

Identical to gmx_simd_inv_f but avoids fp-exception for non-masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be nonzero for masked entries
m	Masked entries

Returns: 1/x. Result is undefined if your argument was invalid or entry was not masked.

static gmx_simd_float_t gmx_simd_inv_maskfpe_f	(	gmx_simd_float_t	x,
		gmx_simd_fbool_t	m
	)

inlinestatic

Calculate 1/x for masked entries of SIMD float.

Identical to gmx_simd_inv_f but avoids fp-exception for non-masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be nonzero for masked entries
m	Masked entries

Returns: 1/x. Result is undefined if your argument was invalid or entry was not masked.

static gmx_simd_double_t gmx_simd_inv_maskfpe_singleaccuracy_d	(	gmx_simd_double_t	x,
		gmx_simd_dbool_t	m
	)

inlinestatic

1/x for masked entries of SIMD double, single accuracy.

Identical to gmx_simd_inv_f but avoids fp-exception for non-masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be nonzero for masked entries
m	Masked entries

Returns: 1/x. Result is undefined if your argument was invalid or entry was not masked.

static gmx_simd_double_t gmx_simd_inv_notmaskfpe_d	(	gmx_simd_double_t	x,
		gmx_simd_dbool_t	m
	)

inlinestatic

Calculate 1/x for non-masked entries of SIMD double.

Identical to gmx_simd_inv_f but avoids fp-exception for masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be nonzero for non-masked entries
m	Masked entries

Returns: 1/x. Result is undefined if your argument was invalid or entry was masked.

static gmx_simd_float_t gmx_simd_inv_notmaskfpe_f	(	gmx_simd_float_t	x,
		gmx_simd_fbool_t	m
	)

inlinestatic

Calculate 1/x for non-masked entries of SIMD float.

Identical to gmx_simd_inv_f but avoids fp-exception for masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be nonzero for non-masked entries
m	Masked entries

Returns: 1/x. Result is undefined if your argument was invalid or entry was masked.

static gmx_simd_double_t gmx_simd_inv_notmaskfpe_singleaccuracy_d	(	gmx_simd_double_t	x,
		gmx_simd_dbool_t	m
	)

inlinestatic

1/x for non-masked entries of SIMD double, single accuracy.

Identical to gmx_simd_inv_f but avoids fp-exception for masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be nonzero for non-masked entries
m	Masked entries

Returns: 1/x. Result is undefined if your argument was invalid or entry was masked.

static gmx_simd_double_t gmx_simdcall gmx_simd_inv_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

Calculate 1/x for SIMD double, but in single accuracy.

You should normally call the real-precision routine gmx_simd_inv_singleaccuracy_r.

Parameters

x	Argument that must be nonzero. This routine does not check arguments.

Returns: 1/x. Result is undefined if your argument was invalid.

static gmx_simd_double_t gmx_simdcall gmx_simd_invsqrt_d ( gmx_simd_double_t x )

inlinestatic

Calculate 1/sqrt(x) for SIMD double.

You should normally call the real-precision routine gmx_simd_invsqrt_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

static gmx_simd_float_t gmx_simdcall gmx_simd_invsqrt_f ( gmx_simd_float_t x )

inlinestatic

Calculate 1/sqrt(x) for SIMD float.

You should normally call the real-precision routine gmx_simd_invsqrt_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

static gmx_simd_double_t gmx_simd_invsqrt_maskfpe_d	(	gmx_simd_double_t	x,
		gmx_simd_dbool_t	m
	)

inlinestatic

Calculate 1/sqrt(x) for masked entries of SIMD double.

Identical to gmx_simd_invsqrt_f but avoids fp-exception for non-masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be >0 for masked entries
m	Masked entries

Returns: 1/sqrt(x). Result is undefined if your argument was invalid or entry was not masked.

static gmx_simd_float_t gmx_simd_invsqrt_maskfpe_f	(	gmx_simd_float_t	x,
		gmx_simd_fbool_t	m
	)

inlinestatic

Calculate 1/sqrt(x) for masked entries of SIMD float.

Identical to gmx_simd_invsqrt_f but avoids fp-exception for non-masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be >0 for masked entries
m	Masked entries

Returns: 1/sqrt(x). Result is undefined if your argument was invalid or entry was not masked.

static gmx_simd_double_t gmx_simd_invsqrt_maskfpe_singleaccuracy_d	(	gmx_simd_double_t	x,
		gmx_simd_dbool_t	m
	)

inlinestatic

1/sqrt(x) for masked entries of SIMD double, but in single accuracy.

Identical to gmx_simd_invsqrt_f but avoids fp-exception for non-masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be >0 for masked entries
m	Masked entries

Returns: 1/sqrt(x). Result is undefined if your argument was invalid or entry was not masked.

static gmx_simd_double_t gmx_simd_invsqrt_notmaskfpe_d	(	gmx_simd_double_t	x,
		gmx_simd_dbool_t	m
	)

inlinestatic

Calculate 1/sqrt(x) for non-masked entries of SIMD double.

Identical to gmx_simd_invsqrt_f but avoids fp-exception for masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be >0 for non-masked entries
m	Masked entries

Returns: 1/sqrt(x). Result is undefined if your argument was invalid or entry was masked.

static gmx_simd_float_t gmx_simd_invsqrt_notmaskfpe_f	(	gmx_simd_float_t	x,
		gmx_simd_fbool_t	m
	)

inlinestatic

Calculate 1/sqrt(x) for non-masked entries of SIMD float.

Identical to gmx_simd_invsqrt_f but avoids fp-exception for masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be >0 for non-masked entries
m	Masked entries

Returns: 1/sqrt(x). Result is undefined if your argument was invalid or entry was masked.

static gmx_simd_double_t gmx_simd_invsqrt_notmaskfpe_singleaccuracy_d	(	gmx_simd_double_t	x,
		gmx_simd_dbool_t	m
	)

inlinestatic

1/sqrt(x) for non-masked entries of SIMD double, in single accuracy.

Identical to gmx_simd_invsqrt_f but avoids fp-exception for masked entries. The result for the non-masked entries is undefined and the user has to use blend with the same mask to obtain a defined result.

Parameters

x	Argument that must be >0 for non-masked entries
m	Masked entries

Returns: 1/sqrt(x). Result is undefined if your argument was invalid or entry was masked.

static void gmx_simdcall gmx_simd_invsqrt_pair_d	(	gmx_simd_double_t	x0,
		gmx_simd_double_t	x1,
		gmx_simd_double_t *	out0,
		gmx_simd_double_t *	out1
	)

inlinestatic

Calculate 1/sqrt(x) for two SIMD doubles.

You should normally call the real-precision routine gmx_simd_invsqrt_pair_r.

Parameters

	x0	First set of arguments, x0 must be positive - no argument checking.
	x1	Second set of arguments, x1 must be positive - no argument checking.
[out]	out0	Result 1/sqrt(x0)
[out]	out1	Result 1/sqrt(x1)

In particular for double precision we can sometimes calculate square root pairs slightly faster by using single precision until the very last step.

static void gmx_simdcall gmx_simd_invsqrt_pair_f	(	gmx_simd_float_t	x0,
		gmx_simd_float_t	x1,
		gmx_simd_float_t *	out0,
		gmx_simd_float_t *	out1
	)

inlinestatic

Calculate 1/sqrt(x) for two SIMD floats.

You should normally call the real-precision routine gmx_simd_invsqrt_pair_r.

Parameters

	x0	First set of arguments, x0 must be positive - no argument checking.
	x1	Second set of arguments, x1 must be positive - no argument checking.
[out]	out0	Result 1/sqrt(x0)
[out]	out1	Result 1/sqrt(x1)

In particular for double precision we can sometimes calculate square root pairs slightly faster by using single precision until the very last step.

static void gmx_simdcall gmx_simd_invsqrt_pair_singleaccuracy_d	(	gmx_simd_double_t	x0,
		gmx_simd_double_t	x1,
		gmx_simd_double_t *	out0,
		gmx_simd_double_t *	out1
	)

inlinestatic

Calculate 1/sqrt(x) for two SIMD doubles, but single accuracy.

You should normally call the real-precision routine gmx_simd_invsqrt_pair_singleaccuracy_r.

Parameters

	x0	First set of arguments, x0 must be positive - no argument checking.
	x1	Second set of arguments, x1 must be positive - no argument checking.
[out]	out0	Result 1/sqrt(x0)
[out]	out1	Result 1/sqrt(x1)

In particular for double precision we can sometimes calculate square root pairs slightly faster by using single precision until the very last step.

static gmx_simd_double_t gmx_simdcall gmx_simd_invsqrt_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

Calculate 1/sqrt(x) for SIMD double, but in single accuracy.

You should normally call the real-precision routine gmx_simd_invsqrt_singleaccuracy_r.

Parameters

x	Argument that must be >0. This routine does not check arguments.

Returns: 1/sqrt(x). Result is undefined if your argument was invalid.

static gmx_simd_double_t gmx_simdcall gmx_simd_iprod_d	(	gmx_simd_double_t	ax,
		gmx_simd_double_t	ay,
		gmx_simd_double_t	az,
		gmx_simd_double_t	bx,
		gmx_simd_double_t	by,
		gmx_simd_double_t	bz
	)

inlinestatic

SIMD double inner product of multiple double vectors.

For normal usage you should always call the real-precision gmx_simd_iprod_r.

Parameters

ax	X components of first vectors
ay	Y components of first vectors
az	Z components of first vectors
bx	X components of second vectors
by	Y components of second vectors
bz	Z components of second vectors

Returns: Element i will be res[i] = ax[i]*bx[i]+ay[i]*by[i]+az[i]*bz[i].

Note: The SIMD part is that we calculate many scalar products in one call.

static gmx_simd_float_t gmx_simdcall gmx_simd_iprod_f	(	gmx_simd_float_t	ax,
		gmx_simd_float_t	ay,
		gmx_simd_float_t	az,
		gmx_simd_float_t	bx,
		gmx_simd_float_t	by,
		gmx_simd_float_t	bz
	)

inlinestatic

SIMD float inner product of multiple float vectors.

For normal usage you should always call the real-precision gmx_simd_iprod_r.

Parameters

ax	X components of first vectors
ay	Y components of first vectors
az	Z components of first vectors
bx	X components of second vectors
by	Y components of second vectors
bz	Z components of second vectors

Returns: Element i will be res[i] = ax[i]*bx[i]+ay[i]*by[i]+az[i]*bz[i].

Note: The SIMD part is that we calculate many scalar products in one call.

static gmx_simd_double_t gmx_simd_load1_d ( const double * m )

inlinestatic

Set all SIMD variable elements to double pointed to by m (unaligned).

Parameters

m	Pointer to single value in memory.

Returns: SIMD variable with all elements set to *m.

static gmx_simd_float_t gmx_simd_load1_f ( const float * m )

inlinestatic

Set all SIMD variable elements to float pointed to by m (unaligned).

Parameters

m	Pointer to single value in memory.

Returns: SIMD variable with all elements set to *m.

static gmx_simd_double_t gmx_simd_load_d ( const double * m )

inlinestatic

Load GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.

Parameters

m	Pointer to memory aligned to the SIMD width.

Returns: SIMD variable with data loaded.

static gmx_simd_dint32_t gmx_simd_load_di ( const gmx_int32_t * m )

inlinestatic

Load aligned SIMD integer data, width corresponds to gmx_simd_double_t.

You should typically call the real-precision gmx_simd_load_i.

Parameters

m	Pointer to memory, aligned to integer SIMD width.

Returns: SIMD integer variable.

static gmx_simd_float_t gmx_simd_load_f ( const float * m )

inlinestatic

Load GMX_SIMD_FLOAT_WIDTH numbers from aligned memory.

Parameters

m	Pointer to memory aligned to the SIMD width.

Returns: SIMD variable with data loaded.

static gmx_simd_fint32_t gmx_simd_load_fi ( const gmx_int32_t * m )

inlinestatic

Load aligned SIMD integer data, width corresponds to gmx_simd_float_t.

You should typically call the real-precision gmx_simd_load_i.

Parameters

m	Pointer to memory, aligned to integer SIMD width.

Returns: SIMD integer variable.

static gmx_simd_double_t gmx_simdcall gmx_simd_log_d ( gmx_simd_double_t x )

inlinestatic

SIMD double log(x). This is the natural logarithm.

You should normally call the real-precision routine gmx_simd_log_r.

Parameters

x	Argument, should be >0.

Returns: The natural logarithm of x. Undefined if argument is invalid.

static gmx_simd_float_t gmx_simdcall gmx_simd_log_f ( gmx_simd_float_t x )

inlinestatic

SIMD float log(x). This is the natural logarithm.

You should normally call the real-precision routine gmx_simd_log_r.

Parameters

x	Argument, should be >0.

Returns: The natural logarithm of x. Undefined if argument is invalid.

static gmx_simd_double_t gmx_simdcall gmx_simd_log_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD log(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_log_singleaccuracy_r.

Parameters

x	Argument, should be >0.

Returns: The natural logarithm of x. Undefined if argument is invalid.

static gmx_simd_double_t gmx_simd_max_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Set each SIMD element to the largest from two variables.

You should typically call the real-precision gmx_simd_max_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: max(a,b) for each element.

static gmx_simd_float_t gmx_simd_max_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Set each SIMD element to the largest from two variables.

You should typically call the real-precision gmx_simd_max_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: max(a,b) for each element.

static gmx_simd_double_t gmx_simd_min_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Set each SIMD element to the smallest from two variables.

You should typically call the real-precision gmx_simd_min_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: min(a,b) for each element.

static gmx_simd_float_t gmx_simd_min_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Set each SIMD element to the smallest from two variables.

You should typically call the real-precision gmx_simd_min_r.

Parameters

a	Any floating-point value
b	Any floating-point value

Returns: min(a,b) for each element.

static gmx_simd_double_t gmx_simd_mul_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Multiply two SIMD variables.

You should typically call the real-precision gmx_simd_mul_r.

Parameters

a	factor1
b	factor2

Returns: a*b.

static gmx_simd_dint32_t gmx_simd_mul_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b
	)

inlinestatic

Multiply SIMD integers, corresponding to double precision.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	factor1
b	factor2

Returns: a*b.

Note: Only the low 32 bits are retained, so this can overflow.

static gmx_simd_float_t gmx_simd_mul_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Multiply two SIMD variables.

You should typically call the real-precision gmx_simd_mul_r.

Parameters

a	factor1
b	factor2

Returns: a*b.

static gmx_simd_fint32_t gmx_simd_mul_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b
	)

inlinestatic

Multiply SIMD integers.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	factor1
b	factor2

Returns: a*b.

Note: Only the low 32 bits are retained, so this can overflow.

static gmx_simd_double_t gmx_simdcall gmx_simd_norm2_d	(	gmx_simd_double_t	ax,
		gmx_simd_double_t	ay,
		gmx_simd_double_t	az
	)

inlinestatic

SIMD double norm squared of multiple vectors.

For normal usage you should always call the real-precision gmx_simd_norm2_r.

Parameters

ax	X components of vectors
ay	Y components of vectors
az	Z components of vectors

Returns: Element i will be res[i] = ax[i]*ax[i]+ay[i]*ay[i]+az[i]*az[i].

Note: This corresponds to the scalar product of the vector with itself, but the compiler might be able to optimize it better with identical vectors.

static gmx_simd_float_t gmx_simdcall gmx_simd_norm2_f	(	gmx_simd_float_t	ax,
		gmx_simd_float_t	ay,
		gmx_simd_float_t	az
	)

inlinestatic

SIMD float norm squared of multiple vectors.

For normal usage you should always call the real-precision gmx_simd_norm2_r.

Parameters

ax	X components of vectors
ay	Y components of vectors
az	Z components of vectors

Returns: Element i will be res[i] = ax[i]*ax[i]+ay[i]*ay[i]+az[i]*az[i].

Note: This corresponds to the scalar product of the vector with itself, but the compiler might be able to optimize it better with identical vectors.

static gmx_simd_double_t gmx_simd_or_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Bitwise or for SIMD double. Supported with GMX_SIMD_HAVE_LOGICAL.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	data1
b	data2

Returns: data1 | data2

static gmx_simd_dbool_t gmx_simd_or_db	(	gmx_simd_dbool_t	a,
		gmx_simd_dbool_t	b
	)

inlinestatic

Logical or on double precision SIMD booleans.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a or b is true.

Note that this is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_or_ib

static gmx_simd_dint32_t gmx_simd_or_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b
	)

inlinestatic

Integer bitwise or for SIMD variables.

You should typically call the real-precision gmx_simd_or_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: a | b (bitwise or)

static gmx_simd_dibool_t gmx_simd_or_dib	(	gmx_simd_dibool_t	a,
		gmx_simd_dibool_t	b
	)

inlinestatic

Logical OR on gmx_simd_dibool_t.

You should typically call the real-precision gmx_simd_or_ib.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD boolean 1
b	SIMD boolean 2

Returns: True for elements where both a and b are true.

static gmx_simd_float_t gmx_simd_or_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Bitwise or for SIMD float. Supported with GMX_SIMD_HAVE_LOGICAL.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	data1
b	data2

Returns: data1 | data2

static gmx_simd_fbool_t gmx_simd_or_fb	(	gmx_simd_fbool_t	a,
		gmx_simd_fbool_t	b
	)

inlinestatic

Logical or on single precision SIMD booleans.

You should typically call the real-precision gmx_simd_or_r.

Parameters

a	logical vars 1
b	logical vars 2

Returns: For each element, the result boolean is true if a or b is true.

Note that this is not necessarily a bitwise operation - the storage format of booleans is implementation-dependent.

See Also: gmx_simd_or_ib

static gmx_simd_fint32_t gmx_simd_or_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b
	)

inlinestatic

Integer SIMD bitwise or.

You should typically call the real-precision gmx_simd_or_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: a | b (bitwise or)

static gmx_simd_fibool_t gmx_simd_or_fib	(	gmx_simd_fibool_t	a,
		gmx_simd_fibool_t	b
	)

inlinestatic

Logical OR on gmx_simd_fibool_t.

You should typically call the real-precision gmx_simd_or_ib.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	SIMD boolean 1
b	SIMD boolean 2

Returns: True for elements where both a and b are true.

static gmx_simd_double_t gmx_simdcall gmx_simd_pmecorrF_d ( gmx_simd_double_t z2 )

inlinestatic

Calculate the force correction due to PME analytically for SIMD double.

You should normally call the real-precision routine gmx_simd_pmecorrF_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb force - see below for details.

This routine is meant to enable analytical evaluation of the direct-space PME electrostatic force to avoid tables.

The direct-space potential should be $\mbox{erfc}(\beta r)/r$ , but there are some problems evaluating that:

First, the error function is difficult (read: expensive) to approxmiate accurately for intermediate to large arguments, and this happens already in ranges of $(\beta r)$ that occur in simulations. Second, we now try to avoid calculating potentials in Gromacs but use forces directly.

We can simply things slight by noting that the PME part is really a correction to the normal Coulomb force since $\mbox{erfc}(z)=1-\mbox{erf}(z)$ , i.e.

$V = \frac{1}{r} - \frac{\mbox{erf}(\beta r)}{r}$

The first term we already have from the inverse square root, so that we can leave out of this routine.

For pme tolerances of 1e-3 to 1e-8 and cutoffs of 0.5nm to 1.8nm, the argument $beta r$ will be in the range 0.15 to ~4, which is the range used for the minimax fit. Use your favorite plotting program to realize how well-behaved $\frac{\mbox{erf}(z)}{z}$ is in this range!

We approximate $f(z)=\mbox{erf}(z)/z$ with a rational minimax polynomial. However, it turns out it is more efficient to approximate $f(z)/z$ and then only use even powers. This is another minor optimization, since we actually want , because it is going to be multiplied by the vector between the two atoms to get the vectorial force. The fastest flops are the ones we can avoid calculating!

So, here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=(\beta r)^2$ .
Evaluate this routine with as the argument.
The return value is the expression:

$\frac{2 \exp{-z^2}}{\sqrt{\pi} z^2}-\frac{\mbox{erf}(z)}{z^3}$
Multiply the entire expression by $\beta^3$ . This will get you

$\frac{2 \beta^3 \exp(-z^2)}{\sqrt{\pi} z^2} - \frac{\beta^3 \mbox{erf}(z)}{z^3}$

or, switching back to (since $z=r \beta$ ):

$\frac{2 \beta \exp(-r^2 \beta^2)}{\sqrt{\pi} r^2} - \frac{\mbox{erf}(r \beta)}{r^3}$

With a bit of math exercise you should be able to confirm that this is exactly

$\frac{\frac{d}{dr}\left( \frac{\mbox{erf}(\beta r)}{r} \right)}{r}$
Add the result to $r^{-3}$ , multiply by the product of the charges, and you have your force (divided by ). A final multiplication with the vector connecting the two particles and you have your vectorial force to add to the particles.

This approximation achieves an error slightly lower than 1e-6 in single precision and 1e-11 in double precision for arguments smaller than 16 ( $\beta r \leq 4$ ); when added to $1/r$ the error will be insignificant. For $\beta r \geq 7206$ the return value can be inf or NaN.

static gmx_simd_float_t gmx_simdcall gmx_simd_pmecorrF_f ( gmx_simd_float_t z2 )

inlinestatic

Calculate the force correction due to PME analytically in SIMD float.

You should normally call the real-precision routine gmx_simd_pmecorrF_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb force - see below for details.

This routine is meant to enable analytical evaluation of the direct-space PME electrostatic force to avoid tables.

The direct-space potential should be $\mbox{erfc}(\beta r)/r$ , but there are some problems evaluating that:

First, the error function is difficult (read: expensive) to approxmiate accurately for intermediate to large arguments, and this happens already in ranges of $(\beta r)$ that occur in simulations. Second, we now try to avoid calculating potentials in Gromacs but use forces directly.

We can simply things slight by noting that the PME part is really a correction to the normal Coulomb force since $\mbox{erfc}(z)=1-\mbox{erf}(z)$ , i.e.

$V = \frac{1}{r} - \frac{\mbox{erf}(\beta r)}{r}$

The first term we already have from the inverse square root, so that we can leave out of this routine.

For pme tolerances of 1e-3 to 1e-8 and cutoffs of 0.5nm to 1.8nm, the argument $beta r$ will be in the range 0.15 to ~4, which is the range used for the minimax fit. Use your favorite plotting program to realize how well-behaved $\frac{\mbox{erf}(z)}{z}$ is in this range!

We approximate $f(z)=\mbox{erf}(z)/z$ with a rational minimax polynomial. However, it turns out it is more efficient to approximate $f(z)/z$ and then only use even powers. This is another minor optimization, since we actually want , because it is going to be multiplied by the vector between the two atoms to get the vectorial force. The fastest flops are the ones we can avoid calculating!

So, here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=(\beta r)^2$ .
Evaluate this routine with as the argument.
The return value is the expression:

$\frac{2 \exp{-z^2}}{\sqrt{\pi} z^2}-\frac{\mbox{erf}(z)}{z^3}$
Multiply the entire expression by $\beta^3$ . This will get you

$\frac{2 \beta^3 \exp(-z^2)}{\sqrt{\pi} z^2} - \frac{\beta^3 \mbox{erf}(z)}{z^3}$

or, switching back to (since $z=r \beta$ ):

$\frac{2 \beta \exp(-r^2 \beta^2)}{\sqrt{\pi} r^2} - \frac{\mbox{erf}(r \beta)}{r^3}$

With a bit of math exercise you should be able to confirm that this is exactly

$\frac{\frac{d}{dr}\left( \frac{\mbox{erf}(\beta r)}{r} \right)}{r}$
Add the result to $r^{-3}$ , multiply by the product of the charges, and you have your force (divided by ). A final multiplication with the vector connecting the two particles and you have your vectorial force to add to the particles.

This approximation achieves an error slightly lower than 1e-6 in single precision and 1e-11 in double precision for arguments smaller than 16 ( $\beta r \leq 4$ ); when added to $1/r$ the error will be insignificant. For $\beta r \geq 7206$ the return value can be inf or NaN.

static gmx_simd_double_t gmx_simdcall gmx_simd_pmecorrF_singleaccuracy_d ( gmx_simd_double_t z2 )

static

Analytical PME force correction, double SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_pmecorrF_singleaccuracy_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb force - see below for details.

This routine is meant to enable analytical evaluation of the direct-space PME electrostatic force to avoid tables.

The direct-space potential should be $\mbox{erfc}(\beta r)/r$ , but there are some problems evaluating that:

First, the error function is difficult (read: expensive) to approxmiate accurately for intermediate to large arguments, and this happens already in ranges of $(\beta r)$ that occur in simulations. Second, we now try to avoid calculating potentials in Gromacs but use forces directly.

We can simply things slight by noting that the PME part is really a correction to the normal Coulomb force since $\mbox{erfc}(z)=1-\mbox{erf}(z)$ , i.e.

$V = \frac{1}{r} - \frac{\mbox{erf}(\beta r)}{r}$

The first term we already have from the inverse square root, so that we can leave out of this routine.

For pme tolerances of 1e-3 to 1e-8 and cutoffs of 0.5nm to 1.8nm, the argument $beta r$ will be in the range 0.15 to ~4. Use your favorite plotting program to realize how well-behaved $\frac{\mbox{erf}(z)}{z}$ is in this range!

We approximate $f(z)=\mbox{erf}(z)/z$ with a rational minimax polynomial. However, it turns out it is more efficient to approximate $f(z)/z$ and then only use even powers. This is another minor optimization, since we actually want , because it is going to be multiplied by the vector between the two atoms to get the vectorial force. The fastest flops are the ones we can avoid calculating!

So, here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=(\beta r)^2$ .
Evaluate this routine with as the argument.
The return value is the expression:

$\frac{2 \exp{-z^2}}{\sqrt{\pi} z^2}-\frac{\mbox{erf}(z)}{z^3}$
Multiply the entire expression by $\beta^3$ . This will get you

$\frac{2 \beta^3 \exp(-z^2)}{\sqrt{\pi} z^2} - \frac{\beta^3 \mbox{erf}(z)}{z^3}$

or, switching back to (since $z=r \beta$ ):

$\frac{2 \beta \exp(-r^2 \beta^2)}{\sqrt{\pi} r^2} - \frac{\mbox{erf}(r \beta)}{r^3}$

With a bit of math exercise you should be able to confirm that this is exactly

$\frac{\frac{d}{dr}\left( \frac{\mbox{erf}(\beta r)}{r} \right)}{r}$
Add the result to $r^{-3}$ , multiply by the product of the charges, and you have your force (divided by ). A final multiplication with the vector connecting the two particles and you have your vectorial force to add to the particles.

This approximation achieves an accuracy slightly lower than 1e-6; when added to $1/r$ the error will be insignificant.

static gmx_simd_double_t gmx_simdcall gmx_simd_pmecorrV_d ( gmx_simd_double_t z2 )

inlinestatic

Calculate the potential correction due to PME analytically for SIMD double.

You should normally call the real-precision routine gmx_simd_pmecorrV_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb potential - see below for details.

See gmx_simd_pmecorrF_f for details about the approximation.

This routine calculates $\mbox{erf}(z)/z$ , although you should provide $z^2$ as the input argument.

Here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=\beta^2*r^2$ .
Evaluate this routine with z^2 as the argument.
The return value is the expression:

$\frac{\mbox{erf}(z)}{z}$
Multiply the entire expression by beta and switching back to (since $z=r \beta$ ):

$\frac{\mbox{erf}(r \beta)}{r}$
Subtract the result from , multiply by the product of the charges, and you have your potential.

This approximation achieves an error slightly lower than 1e-6 in single precision and 4e-11 in double precision for arguments smaller than 16 ( $0.15 \leq \beta r \leq 4$ ); for $\beta r \leq 0.15$ the error can be twice as high; when added to $1/r$ the error will be insignificant. For $\beta r \geq 7142$ the return value can be inf or NaN.

static gmx_simd_float_t gmx_simdcall gmx_simd_pmecorrV_f ( gmx_simd_float_t z2 )

inlinestatic

Calculate the potential correction due to PME analytically in SIMD float.

You should normally call the real-precision routine gmx_simd_pmecorrV_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb potential - see below for details.

See gmx_simd_pmecorrF_f for details about the approximation.

This routine calculates $\mbox{erf}(z)/z$ , although you should provide $z^2$ as the input argument.

Here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=\beta^2*r^2$ .
Evaluate this routine with z^2 as the argument.
The return value is the expression:

$\frac{\mbox{erf}(z)}{z}$
Multiply the entire expression by beta and switching back to (since $z=r \beta$ ):

$\frac{\mbox{erf}(r \beta)}{r}$
Subtract the result from , multiply by the product of the charges, and you have your potential.

This approximation achieves an error slightly lower than 1e-6 in single precision and 4e-11 in double precision for arguments smaller than 16 ( $0.15 \leq \beta r \leq 4$ ); for $\beta r \leq 0.15$ the error can be twice as high; when added to $1/r$ the error will be insignificant. For $\beta r \geq 7142$ the return value can be inf or NaN.

static gmx_simd_double_t gmx_simdcall gmx_simd_pmecorrV_singleaccuracy_d ( gmx_simd_double_t z2 )

static

Analytical PME potential correction, double SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_pmecorrV_singleaccuracy_r.

Parameters

z2	$(r \beta)^2$ - see below for details.

Returns: Correction factor to coulomb potential - see below for details.

See gmx_simd_pmecorrF_f for details about the approximation.

This routine calculates $\mbox{erf}(z)/z$ , although you should provide $z^2$ as the input argument.

Here's how it should be used:

Calculate .
Multiply by $\beta^2$ , so you get $z^2=\beta^2*r^2$ .
Evaluate this routine with z^2 as the argument.
The return value is the expression:

$\frac{\mbox{erf}(z)}{z}$
Multiply the entire expression by beta and switching back to (since $z=r \beta$ ):

$\frac{\mbox{erf}(r \beta)}{r}$
Subtract the result from , multiply by the product of the charges, and you have your potential.

This approximation achieves an accuracy slightly lower than 1e-6; when added to $1/r$ the error will be insignificant.

static gmx_simd_double_t gmx_simd_rcp_d ( gmx_simd_double_t x )

inlinestatic

1.0/x lookup.

You should typically call the real-precision gmx_simd_rcp_r.

This is a low-level instruction that should only be called from routines implementing the reciprocal in simd_math.h.

Parameters

x	Argument, x!=0

Returns: Approximation of 1/x, accuracy is GMX_SIMD_RCP_BITS.

static gmx_simd_float_t gmx_simd_rcp_f ( gmx_simd_float_t x )

inlinestatic

SIMD 1.0/x lookup.

You should typically call the real-precision gmx_simd_rcp_r.

This is a low-level instruction that should only be called from routines implementing the reciprocal in simd_math.h.

Parameters

x	Argument, x!=0

Returns: Approximation of 1/x, accuracy is GMX_SIMD_RCP_BITS.

static gmx_simd_double_t gmx_simdcall gmx_simd_rcp_iter_d	(	gmx_simd_double_t	lu,
		gmx_simd_double_t	x
	)

inlinestatic

Perform one Newton-Raphson iteration to improve 1/x for SIMD double.

This is a low-level routine that should only be used by SIMD math routine that evaluates the reciprocal.

Parameters

lu	Approximation of 1/x, typically obtained from lookup.
x	The reference (starting) value x for which we want 1/x.

Returns: An improved approximation with roughly twice as many bits of accuracy.

static gmx_simd_float_t gmx_simdcall gmx_simd_rcp_iter_f	(	gmx_simd_float_t	lu,
		gmx_simd_float_t	x
	)

inlinestatic

Perform one Newton-Raphson iteration to improve 1/x for SIMD float.

This is a low-level routine that should only be used by SIMD math routine that evaluates the reciprocal.

Parameters

lu	Approximation of 1/x, typically obtained from lookup.
x	The reference (starting) value x for which we want 1/x.

Returns: An improved approximation with roughly twice as many bits of accuracy.

static double gmx_simd_reduce_d ( gmx_simd_double_t a )

inlinestatic

Return sum of all elements in SIMD double variable.

You should typically call the real-precision gmx_simd_reduce_r.

Parameters

a	SIMD variable to reduce/sum.

Returns: The sum of all elements in the argument variable.

static float gmx_simd_reduce_f ( gmx_simd_float_t a )

inlinestatic

Return sum of all elements in SIMD float variable.

You should typically call the real-precision gmx_simd_reduce_r.

Parameters

a	SIMD variable to reduce/sum.

Returns: The sum of all elements in the argument variable.

static gmx_simd_double_t gmx_simd_round_d ( gmx_simd_double_t a )

inlinestatic

Round to nearest integer value (in double floating-point format).

You should typically call the real-precision gmx_simd_round_r.

Parameters

a	Any floating-point value

Returns: The nearest integer, represented in floating-point format.

Note: The reference implementation rounds exact half-way cases away from zero, whereas most SIMD intrinsics will round to nearest even. This could be fixed by using rint/rintf, but the bigger problem is that MSVC does not support full C99, and none of the round or rint functions are defined. It's much easier to approximately implement round() than rint(), so we do that and hope we never get bitten in testing. (Thanks, Microsoft.)

static gmx_simd_float_t gmx_simd_round_f ( gmx_simd_float_t a )

inlinestatic

Round to nearest integer value (in floating-point format).

You should typically call the real-precision gmx_simd_round_r.

Parameters

a	Any floating-point value

Returns: The nearest integer, represented in floating-point format.

Note: The reference implementation rounds exact half-way cases away from zero, whereas most SIMD intrinsics will round to nearest even. This could be fixed by using rint/rintf, but the bigger problem is that MSVC does not support full C99, and none of the round or rint functions are defined. It's much easier to approximately implement round() than rint(), so we do that and hope we never get bitten in testing. (Thanks, Microsoft.)

static gmx_simd_double_t gmx_simd_rsqrt_d ( gmx_simd_double_t x )

inlinestatic

SIMD 1.0/sqrt(x) lookup.

You should typically call the real-precision gmx_simd_rsqrt_r.

This is a low-level instruction that should only be called from routines implementing the inverse square root in simd_math.h.

Parameters

x	Argument, x>0

Returns: Approximation of 1/sqrt(x), accuracy is GMX_SIMD_RSQRT_BITS.

static gmx_simd_float_t gmx_simd_rsqrt_f ( gmx_simd_float_t x )

inlinestatic

SIMD 1.0/sqrt(x) lookup.

You should typically call the real-precision gmx_simd_rsqrt_r.

This is a low-level instruction that should only be called from routines implementing the inverse square root in simd_math.h.

Parameters

x	Argument, x>0

Returns: Approximation of 1/sqrt(x), accuracy is GMX_SIMD_RSQRT_BITS.

static gmx_simd_double_t gmx_simdcall gmx_simd_rsqrt_iter_d	(	gmx_simd_double_t	lu,
		gmx_simd_double_t	x
	)

inlinestatic

Perform one Newton-Raphson iteration to improve 1/sqrt(x) for SIMD double.

This is a low-level routine that should only be used by SIMD math routine that evaluates the inverse square root.

Parameters

lu	Approximation of 1/sqrt(x), typically obtained from lookup.
x	The reference (starting) value x for which we want 1/sqrt(x).

Returns: An improved approximation with roughly twice as many bits of accuracy.

static gmx_simd_float_t gmx_simdcall gmx_simd_rsqrt_iter_f	(	gmx_simd_float_t	lu,
		gmx_simd_float_t	x
	)

inlinestatic

Perform one Newton-Raphson iteration to improve 1/sqrt(x) for SIMD float.

This is a low-level routine that should only be used by SIMD math routine that evaluates the inverse square root.

Parameters

lu	Approximation of 1/sqrt(x), typically obtained from lookup.
x	The reference (starting) value x for which we want 1/sqrt(x).

Returns: An improved approximation with roughly twice as many bits of accuracy.

static gmx_simd_double_t gmx_simd_set1_d ( double r )

inlinestatic

Set all SIMD double variable elements to the value r.

Parameters

r	floating-point constant

Returns: SIMD variable with all elements set to r.

static gmx_simd_dint32_t gmx_simd_set1_di ( gmx_int32_t b )

inlinestatic

Set SIMD from integer, width corresponds to gmx_simd_double_t.

You should typically call the real-precision gmx_simd_set1_i.

Parameters

b	integer value to set variable to.

Returns: SIMD variable with all elements set to b.

static gmx_simd_float_t gmx_simd_set1_f ( float r )

inlinestatic

Set all SIMD float variable elements to the value r.

Parameters

r	floating-point constant

Returns: SIMD variable with all elements set to r.

static gmx_simd_fint32_t gmx_simd_set1_fi ( gmx_int32_t b )

inlinestatic

Set SIMD from integer, width corresponds to gmx_simd_float_t.

You should typically call the real-precision gmx_simd_set1_i.

Parameters

b	integer value to set variable to.

Returns: SIMD variable with all elements set to b.

static gmx_simd_double_t gmx_simd_set_exponent_d ( gmx_simd_double_t a )

inlinestatic

Set (integer) exponent from single precision floating-point SIMD.

You should typically call the real-precision gmx_simd_set_exponent_r.

Parameters

a	A floating point value that will not overflow as 2^a.

Returns: 2^(round(a)).

The input is rounded to the nearest integer, the exponent bias is added to this integer, and the bits are shifted to the IEEE754 exponent part of the number.

Note: The argument will be rounded to nearest integer since that is what we need for the exponential functions, and this integer x will be set as the exponent so the new floating-point number will be 2^x.

static gmx_simd_float_t gmx_simd_set_exponent_f ( gmx_simd_float_t a )

inlinestatic

Set (integer) exponent from single precision floating-point SIMD.

You should typically call the real-precision gmx_simd_set_exponent_r.

Parameters

a	A floating point value that will not overflow as 2^a.

Returns: 2^(round(a)).

The input is rounded to the nearest integer, the exponent bias is added to this integer, and the bits are shifted to the IEEE754 exponent part of the number.

Note: The argument will be rounded to nearest integer since that is what we need for the exponential functions, and this integer x will be set as the exponent so the new floating-point number will be 2^x.

static gmx_simd_double_t gmx_simd_setzero_d ( )

inlinestatic

Set all SIMD double variable elements to 0.0.

Returns: The value 0.0 in all elements of a SIMD variable.

static gmx_simd_dint32_t gmx_simd_setzero_di ( )

inlinestatic

Set all SIMD variable elements to 0, width corresponds to gmx_simd_double_t.

You should typically call the real-precision gmx_simd_setzero_i.

Returns: SIMD integer variable with all bits set to zero.

static gmx_simd_float_t gmx_simd_setzero_f ( )

inlinestatic

Set all SIMD float variable elements to 0.0f.

Returns: The value 0.0 in all elements of a SIMD variable.

static gmx_simd_fint32_t gmx_simd_setzero_fi ( )

inlinestatic

Set all SIMD variable elements to 0, width corresponds to gmx_simd_float_t.

You should typically call the real-precision gmx_simd_setzero_i.

Returns: SIMD integer variable with all bits set to zero.

static gmx_simd_double_t gmx_simdcall gmx_simd_sin_d ( gmx_simd_double_t x )

inlinestatic

SIMD double sin(x).

You should normally call the real-precision routine gmx_simd_sin_r.

Parameters

x	The argument to evaluate sin for

Returns: Sin(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

static gmx_simd_float_t gmx_simdcall gmx_simd_sin_f ( gmx_simd_float_t x )

inlinestatic

SIMD float sin(x).

You should normally call the real-precision routine gmx_simd_sin_r.

Parameters

x	The argument to evaluate sin for

Returns: Sin(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

static gmx_simd_double_t gmx_simdcall gmx_simd_sin_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD sin(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_sin_singleaccuracy_r.

Parameters

x	The argument to evaluate sin for

Returns: Sin(x)

Attention: Do NOT call both sin & cos if you need both results, since each of them will then call gmx_simd_sincos_r and waste a factor 2 in performance.

static void gmx_simdcall gmx_simd_sincos_d	(	gmx_simd_double_t	x,
		gmx_simd_double_t *	sinval,
		gmx_simd_double_t *	cosval
	)

inlinestatic

SIMD double sin & cos.

You should normally call the real-precision routine gmx_simd_sincos_r.

Parameters

	x	The argument to evaluate sin/cos for
[out]	sinval	Sin(x)
[out]	cosval	Cos(x)

This version achieves close to machine precision, but for very large magnitudes of the argument we inherently begin to lose accuracy due to the argument reduction, despite using extended precision arithmetics internally.

static void gmx_simdcall gmx_simd_sincos_f	(	gmx_simd_float_t	x,
		gmx_simd_float_t *	sinval,
		gmx_simd_float_t *	cosval
	)

inlinestatic

SIMD float sin & cos.

You should normally call the real-precision routine gmx_simd_sincos_r.

Parameters

	x	The argument to evaluate sin/cos for
[out]	sinval	Sin(x)
[out]	cosval	Cos(x)

This version achieves close to machine precision, but for very large magnitudes of the argument we inherently begin to lose accuracy due to the argument reduction, despite using extended precision arithmetics internally.

static void gmx_simdcall gmx_simd_sincos_singleaccuracy_d	(	gmx_simd_double_t	x,
		gmx_simd_double_t *	sinval,
		gmx_simd_double_t *	cosval
	)

inlinestatic

SIMD sin & cos. Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_sincos_singleaccuracy_r.

Parameters

	x	The argument to evaluate sin/cos for
[out]	sinval	Sin(x)
[out]	cosval	Cos(x)

static gmx_simd_dint32_t gmx_simd_slli_di	(	gmx_simd_dint32_t	a,
		int	n
	)

inlinestatic

SIMD integer shift left, based on immediate value.

You should typically call the real-precision gmx_simd_slli_i.

Logical shift. Each element is shifted (independently) up to 32 positions left, while zeros are shifted in from the right. Only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	integer data to shift
n	number of positions to shift left. n<=32.

Returns: shifted values

static gmx_simd_fint32_t gmx_simd_slli_fi	(	gmx_simd_fint32_t	a,
		int	n
	)

inlinestatic

SIMD integer shift left logical, based on immediate value.

You should typically call the real-precision gmx_simd_slli_i.

Logical shift. Each element is shifted (independently) up to 32 positions left, while zeros are shifted in from the right. Only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	integer data to shift
n	number of positions to shift left. n<=32.

Returns: shifted values

static gmx_simd_double_t gmx_simdcall gmx_simd_sqrt_d ( gmx_simd_double_t x )

inlinestatic

Calculate sqrt(x) correctly for SIMD doubles, including argument 0.0.

You should normally call the real-precision routine gmx_simd_sqrt_r.

Parameters

x	Argument that must be >=0.

Returns: sqrt(x). If x=0, the result will correctly be set to 0. The result is undefined if the input value is negative.

static gmx_simd_float_t gmx_simdcall gmx_simd_sqrt_f ( gmx_simd_float_t x )

inlinestatic

Calculate sqrt(x) correctly for SIMD floats, including argument 0.0.

You should normally call the real-precision routine gmx_simd_sqrt_r.

Parameters

x	Argument that must be >=0.

Returns: sqrt(x). If x=0, the result will correctly be set to 0. The result is undefined if the input value is negative.

static gmx_simd_double_t gmx_simdcall gmx_simd_sqrt_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

Calculate sqrt(x) (correct for 0.0) for SIMD double, single accuracy.

You should normally call the real-precision routine gmx_simd_sqrt_r.

Parameters

x	Argument that must be >=0.

Returns: sqrt(x). If x=0, the result will correctly be set to 0. The result is undefined if the input value is negative.

static gmx_simd_dint32_t gmx_simd_srli_di	(	gmx_simd_dint32_t	a,
		int	n
	)

inlinestatic

SIMD integer shift right, based on immediate value.

You should typically call the real-precision gmx_simd_srli_i.

Logical shift. Each element is shifted (independently) up to 32 positions right, while zeros are shifted in from the left. Only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	integer data to shift
n	number of positions to shift right. n<=32.

Returns: shifted values

static gmx_simd_fint32_t gmx_simd_srli_fi	(	gmx_simd_fint32_t	a,
		int	n
	)

inlinestatic

SIMD integer shift right logical, based on immediate value.

You should typically call the real-precision gmx_simd_srli_i.

Logical shift. Each element is shifted (independently) up to 32 positions right, while zeros are shifted in from the left. Only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	integer data to shift
n	number of positions to shift right. n<=32.

Returns: shifted values

static void gmx_simd_store_d	(	double *	m,
		gmx_simd_double_t	a
	)

inlinestatic

Store the contents of the SIMD double variable pr to aligned memory m.

Parameters

[out]	m	Pointer to memory, aligned to SIMD width.
	a	SIMD variable to store

static gmx_simd_dint32_t gmx_simd_store_di	(	gmx_int32_t *	m,
		gmx_simd_dint32_t	a
	)

inlinestatic

Store aligned SIMD integer data, width corresponds to gmx_simd_double_t.

You should typically call the real-precision gmx_simd_store_i.

Parameters

m	Memory aligned to integer SIMD width.
a	SIMD variable to store.

static void gmx_simd_store_f	(	float *	m,
		gmx_simd_float_t	a
	)

inlinestatic

Store the contents of the SIMD float variable pr to aligned memory m.

Parameters

[out]	m	Pointer to memory, aligned to SIMD width.
	a	SIMD variable to store

static gmx_simd_fint32_t gmx_simd_store_fi	(	int *	m,
		gmx_simd_fint32_t	a
	)

inlinestatic

Store aligned SIMD integer data, width corresponds to gmx_simd_float_t.

You should typically call the real-precision gmx_simd_store_i.

Parameters

m	Memory aligned to integer SIMD width.
a	SIMD variable to store.

static gmx_simd_double_t gmx_simd_sub_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Add two float SIMD variables.

You should typically call the real-precision gmx_simd_sub_r.

Parameters

a	term1
b	term2

Returns: a-b

static gmx_simd_dint32_t gmx_simd_sub_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b
	)

inlinestatic

Subtract SIMD integers, corresponding to double precision.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	term1
b	term2

Returns: a-b

static gmx_simd_float_t gmx_simd_sub_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Subtract two SIMD variables.

You should typically call the real-precision gmx_simd_sub_r.

Parameters

a	term1
b	term2

Returns: a-b

static gmx_simd_fint32_t gmx_simd_sub_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b
	)

inlinestatic

Subtract SIMD integers.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) or GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.

Parameters

a	term1
b	term2

Returns: a-b

static gmx_simd_double_t gmx_simdcall gmx_simd_sum4_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b,
		gmx_simd_double_t	c,
		gmx_simd_double_t	d
	)

inlinestatic

SIMD utility function to sum a+b+c+d for SIMD doubles.

You should normally call the real-precision routine gmx_simd_sum4_r.

Parameters

a	term 1 (multiple values)
b	term 2 (multiple values)
c	term 3 (multiple values)
d	term 4 (multiple values)

Returns: sum of terms 1-4 (multiple values)

static gmx_simd_float_t gmx_simdcall gmx_simd_sum4_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b,
		gmx_simd_float_t	c,
		gmx_simd_float_t	d
	)

inlinestatic

SIMD float utility to sum a+b+c+d.

You should normally call the real-precision routine gmx_simd_sum4_r.

Parameters

a	term 1 (multiple values)
b	term 2 (multiple values)
c	term 3 (multiple values)
d	term 4 (multiple values)

Returns: sum of terms 1-4 (multiple values)

static gmx_simd_double_t gmx_simdcall gmx_simd_tan_d ( gmx_simd_double_t x )

inlinestatic

SIMD double tan(x).

You should normally call the real-precision routine gmx_simd_tan_r.

Parameters

x	The argument to evaluate tan for

Returns: Tan(x)

static gmx_simd_float_t gmx_simdcall gmx_simd_tan_f ( gmx_simd_float_t x )

inlinestatic

SIMD float tan(x).

You should normally call the real-precision routine gmx_simd_tan_r.

Parameters

x	The argument to evaluate tan for

Returns: Tan(x)

static gmx_simd_double_t gmx_simdcall gmx_simd_tan_singleaccuracy_d ( gmx_simd_double_t x )

inlinestatic

SIMD tan(x). Double precision SIMD data, single accuracy.

You should normally call the real-precision routine gmx_simd_tan_singleaccuracy_r.

Parameters

x	The argument to evaluate tan for

Returns: Tan(x)

static gmx_simd_double_t gmx_simd_trunc_d ( gmx_simd_double_t a )

inlinestatic

Truncate SIMD, i.e. round towards zero - common hardware instruction.

You should typically call the real-precision gmx_simd_trunc_r.

Parameters

a	Any floating-point value

Returns: Integer rounded towards zero, represented in floating-point format.

Note: This is truncation towards zero, not floor(). The reason for this is that truncation is virtually always present as a dedicated hardware instruction, but floor() frequently isn't.

static gmx_simd_float_t gmx_simd_trunc_f ( gmx_simd_float_t a )

inlinestatic

Truncate SIMD, i.e. round towards zero - common hardware instruction.

You should typically call the real-precision gmx_simd_trunc_r.

Parameters

a	Any floating-point value

Returns: Integer rounded towards zero, represented in floating-point format.

Note: This is truncation towards zero, not floor(). The reason for this is that truncation is virtually always present as a dedicated hardware instruction, but floor() frequently isn't.

static gmx_simd_double_t gmx_simd_xor_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Bitwise xor for SIMD double. Supported with GMX_SIMD_HAVE_LOGICAL.

You should typically call the real-precision gmx_simd_xor_r.

Parameters

a	data1
b	data2

Returns: data1 ^ data2

static gmx_simd_dint32_t gmx_simd_xor_di	(	gmx_simd_dint32_t	a,
		gmx_simd_dint32_t	b
	)

inlinestatic

Integer bitwise xor for SIMD variables.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: a ^ b (bitwise xor)

static gmx_simd_float_t gmx_simd_xor_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Bitwise xor for SIMD float. Supported with GMX_SIMD_HAVE_LOGICAL.

You should typically call the real-precision gmx_simd_xor_r.

Parameters

a	data1
b	data2

Returns: data1 ^ data2

static gmx_simd_fint32_t gmx_simd_xor_fi	(	gmx_simd_fint32_t	a,
		gmx_simd_fint32_t	b
	)

inlinestatic

Integer SIMD bitwise xor.

You should typically call the real-precision gmx_simd_xor_i.

This routine is only available if GMX_SIMD_HAVE_FINT32_LOGICAL (single) or GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.

Parameters

a	first integer SIMD
b	second integer SIMD

Returns: a ^ b (bitwise xor)

static gmx_simd_double_t gmx_simdcall gmx_simd_xor_sign_d	(	gmx_simd_double_t	a,
		gmx_simd_double_t	b
	)

inlinestatic

Return -a if b is negative, SIMD double.

You should normally call the real-precision routine gmx_simd_xor_sign_r.

Parameters

a	Values to set sign for
b	Values used to set sign

Returns: if b is negative, the sign of a will be changed.

This is equivalent to doing an xor operation on a with the sign bit of b, with the exception that negative zero is not considered to be negative on architectures where GMX_SIMD_HAVE_LOGICAL is not set.

static gmx_simd_float_t gmx_simdcall gmx_simd_xor_sign_f	(	gmx_simd_float_t	a,
		gmx_simd_float_t	b
	)

inlinestatic

Return -a if b is negative, SIMD float.

You should normally call the real-precision routine gmx_simd_xor_sign_r.

Parameters

a	Values to set sign for
b	Values used to set sign

Returns: if b is negative, the sign of a will be changed.

This is equivalent to doing an xor operation on a with the sign bit of b, with the exception that negative zero is not considered to be negative on architectures where GMX_SIMD_HAVE_LOGICAL is not set.

Description

SIMD implementation capability definitions

SIMD implementation load/store operations for single precision floating point

SIMD implementation load/store operations for double precision floating point

SIMD implementation load/store operations for integers (corresponding to float)

SIMD implementation load/store operations for integers (corresponding to double)

SIMD implementation single precision floating-point bitwise logical operations

SIMD implementation single precision floating-point arithmetics

SIMD implementation single precision floating-point comparisons, boolean, selection.

SIMD implementation double precision floating-point bitwise logical operations

SIMD implementation double precision floating-point arithmetics

SIMD implementation double precision floating-point comparison, boolean, selection.

SIMD implementation integer (corresponding to float) bitwise logical operations

SIMD implementation integer (corresponding to float) arithmetics

SIMD implementation integer (corresponding to float) comparisons, boolean, selection

SIMD implementation integer (corresponding to double) bitwise logical operations

SIMD implementation integer (corresponding to double) arithmetics

SIMD implementation integer (corresponding to double) comparisons, boolean selection

SIMD implementation conversion operations

SIMD4. Constant width-4 SIMD types and instructions

SIMD predefined macros to describe high-level capabilities

SIMD memory alignment operations

SIMD data types

SIMD load/store operations on gmx_simd_real_t

SIMD load/store operations on gmx_simd_int32_t

SIMD floating-point logical operations on gmx_simd_real_t

SIMD floating-point arithmetic operations on gmx_simd_real_t

SIMD comparison, boolean, and select operations for gmx_simd_real_t

SIMD integer logical operations on gmx_simd_int32_t

SIMD integer arithmetic operations on gmx_simd_int32_t

SIMD integer comparison, booleans, and selection on gmx_simd_int32_t

SIMD conversion operations

SIMD4 - constant width-four SIMD datatypes

Single precision SIMD math functions

Double precision SIMD math functions

SIMD math functions for double prec. data, single prec. accuracy

SIMD4 math functions

Real-precision SIMD math functions

Classes

Macros

Functions

Directories

Files

Macro Definition Documentation

Function Documentation