/* vfp.h (from vecLib-380.6) * Copyright 1999-2012 Apple Inc. All rights reserved. * * Overview: * vfp.h provides math library operations for SIMD vectors. These functions * are intended for use as replacements for calls to the system math library * in hand-vectorized code. * * If you are not writing vector code, but are looking for high-performance * math library operations, consult vForce.h instead, which provides math * library operations on arrays of floating-point data. * * Compatibility: * These routines operate on SIMD vectors, and are compatible with the types * declared in the headers for both SSE (Intel) and NEON (ARM) intrinsics. * * Bugs: * For bug reports or feature requests use * http://developer.apple.com/bugreporter/ */ #ifndef __VFP__ #define __VFP__ #include "vecLibTypes.h" #include #include #ifdef __cplusplus extern "C" { #endif /* Rounding Functions * * Each lane of the result vector contains the value in the corresponding * lane of the input vector rounded to an integral value in the specified * direction: * * Function Rounding Direction * -------- ------------------------ * vceilf toward +infinity * vfloorf toward -infinity * vintf toward zero * vnintf to nearest, ties to even * * When SSE4.1 code generation is enabled on Intel architectures, single- * instruction implementations of these operations are inlined instead of * making an external function call. */ #if defined __SSE4_1__ #include #define __VFP_INLINE_ATTR__ __attribute__((__always_inline__, __nodebug__)) static __inline__ vFloat __VFP_INLINE_ATTR__ vceilf(vFloat __vfp_a) { return _mm_ceil_ps(__vfp_a); } static __inline__ vFloat __VFP_INLINE_ATTR__ vfloorf(vFloat __vfp_a) { return _mm_floor_ps(__vfp_a); } static __inline__ vFloat __VFP_INLINE_ATTR__ vintf(vFloat __vfp_a) { return _mm_round_ps(__vfp_a, _MM_FROUND_TRUNC); } static __inline__ vFloat __VFP_INLINE_ATTR__ vnintf(vFloat __vfp_a) { return _mm_round_ps(__vfp_a, _MM_FROUND_NINT); } #else extern vFloat vceilf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_NA); extern vFloat vfloorf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_NA); extern vFloat vintf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_NA); extern vFloat vnintf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_NA); #endif /* Exponential and Logarithmic Functions * * Each lane of the result contains the result of the specified operation * applied to the corresponding lane of the input vector: * * Function Lanewise Operation * -------- ---------------------------- * vexpf base-e exponential function. * vexpm1f e**x - 1, computed in such a way as to be more * accurate than calling vexpf and then subtracting 1 * when the argument is close to zero. * vlogf natural logarithm. * vlog10f base-ten logarithm. * vlog1pf natural logarithm of (1+x), computed in such a way as * to be more accurate than adding 1 and calling vlogf * when the argument is close to zero. */ extern vFloat vexpf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vexpm1f(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vlogf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vlog10f(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_NA); extern vFloat vlog1pf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* Scaling Functions * * These functions can be used to efficiently rescale floating-point * computations when necessary: * * Function Lanewise Operation * -------- ---------------------------- * vlogbf extracts the exponent of its argument as a signed * integral value. Subnormal arguments are treated as * though they were first normalized. Thus: * 1 <= x * 2**(-logbf(x)) < 2 * vscalbf efficiently computes x * 2**n, where x is the first * argument and n is the second. */ extern vFloat vlogbf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vscalbf(vFloat, vSInt32) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* Power Functions * * vpowf raises the first argument to the power specified by the second * argument, and returns the result. Edge cases are as specified for the * pow( ) function in the math library. vipowf also raises the first * argument to the power specified by the second argument, but the second * argument to vipowf is an integer, not a floating-point number. */ extern vFloat vpowf(vFloat, vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vipowf(vFloat, vSInt32) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* Trigonometric and Hyperbolic Functions * * These functions compute lanewise trigonometric and hyperbolic functions * and their inverses. All inputs to the trigonometric functions, and * results from their inverses, are interpreted as angles measured in radians. * * Function Result * -------- ---------------------------- * vsinf sine of the argument. * vcosf cosine of the argument. * vsincosf returns the cosine of the first argument, and stores * the sine of the first argument to the destination * specified by the second argument. This address must * be a valid pointer and must be 16-byte aligned. * vtanf tangent of the argument. * * vasinf arcsine of the argument, in the range [-pi/2, pi/2]. * vacosf arccosine of the argument, in the range [0, pi]. * vatanf arctangent of the argument, in the range [-pi/2, pi/2]. * vatan2f arctangent of the first argument divided by the * second argument, using the sign of both arguments to * determine in which quadrant the result lies. The * result is in the range [-pi, pi], and is the signed * angle from the positive x axis to the point * (second argument, first argument). * * vsinhf hyperbolic sine of the argument. * vcoshf hyperbolic cosine of the argument. * vtanhf hyperbolic tangent of the argument. * * vasinhf inverse hyperbolic sine of the argument. * vacoshf inverse hyperbolic cosine of the argument. * vatanhf inverse hyperbolic tangent of the argument. */ extern vFloat vsinf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vcosf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vsincosf(vFloat, vFloat *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_NA); extern vFloat vtanf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vasinf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vacosf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vatanf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vatan2f(vFloat, vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vsinhf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vcoshf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vtanhf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vasinhf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vacoshf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vatanhf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* Arithmetic Functions * * vrecf, vsqrtf, and vrsqrtf provide lane-wise reciprocal, square-root, and * reciprocal square-root operations, respectively. Each lane in the result * of vdivf contains the corresponding lane of the first argument divided by * the corresponding lane of the second argument. */ extern vFloat vrecf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_NA); extern vFloat vsqrtf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vrsqrtf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vdivf(vFloat, vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* Remainder Functions * * These functions compute various forms of the remainder from division of the * first argument by the second argument. If we call the first argument x and * the second argument y, then the behavior of these functions is as follows: * * vfmodf returns the value r = x - qy, where q is an integer such that r has * the same sign as x and satisfies |r| < |y|, if y is not zero. * * vremainderf performs the remainder operation defined in the IEEE-754 * standard. It returns the value r = x - qy, where q is the integer value * closest to the exact value of x/y. If there are two integers closest to * x/y, then the one which is even is used. Thus, |r| <= |y|/2. * * vremquof returns the same remainder as vremainderf, and also stores the * 7 low-order bits of q to the address pointed to by the third argument. * This must be a valid pointer, and must have 16-byte alignment. */ extern vFloat vfmodf(vFloat, vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vremainderf(vFloat, vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vremquof(vFloat, vFloat, vUInt32 *) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* Floating-point Utility Functions * * These functions provide vector versions of common utility operations * for working with floating-point data: * * Function Lanewise Operation * -------- ---------------------------- * vfabf absolute value * vcopysignf returns a floating-point value with the magnitude of * the first operand and the sign of the second operand. * vsignbitf non-zero if and only if the signbit of the argument is * set. (Note that this applies to NaNs, zeros, and * infinities as well, and so is not the same as x < 0.) * vnextafterf returns the floating-point value adjacent to the * first operand in the direction of the second operand. * vclassifyf returns the value of the FP_xxxx macro (defined in * ) corresponding to the "class" of the argument. * e.g. if the argument is infinity, the result is * FP_INFINITE; if the argument is zero, the result is * FP_ZERO. Consult for further details. */ extern vFloat vcopysignf(vFloat, vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vUInt32 vsignbitf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vnextafterf(vFloat, vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vUInt32 vclassifyf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); extern vFloat vfabf(vFloat) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* Generalized Table Lookup * * This function provides a gather operation (table lookup). Each lane of the * result vector contains the value found in a table at index specified by * the corresponding lane of the first argument. The table base address is * specified by the second argument. Note please that the indices are * *signed* 32-bit integers. */ extern vUInt32 vtablelookup(vSInt32, uint32_t *) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); #ifdef __cplusplus } #endif #endif /* __VFP__ */