/* File: vecLib/vectorOps.h Contains: vector and matrix functions for AltiVec Version: vecLib-380.6 Copyright: © 1999-2012 by Apple Computer, Inc., all rights reserved. Bugs?: For bug reports, consult the following page on the World Wide Web: http://developer.apple.com/bugreporter/ */ #ifndef __VECTOROPS__ #define __VECTOROPS__ #include /* #ifndef __VECLIBTYPES__ #include #endif */ #include "vecLibTypes.h" #include #if PRAGMA_ONCE #pragma once #endif #ifdef __cplusplus extern "C" { #endif /* ------------------------------------------------------------------------------------- This section is a collection of Basic Linear Algebra Subprograms (BLAS), which use AltiVec technology for their implementations. The functions are grouped into three categories (called levels), as follows: 1) Vector-scalar linear algebra subprograms 2) Matrix-vector linear algebra subprograms 3) Matrix operations Following is a list of subprograms and a short description of each one. ------------------------------------------------------------------------------------- */ #if defined _AltiVecPIMLanguageExtensionsAreEnabled || defined __SSE__ /* ------------------------------------------------------------------------------------- Level 1 ------------------------------------------------------------------------------------- */ /************************************************** vIsamax finds the position of the first vector element having the largest magnitude. count length of vector x (count is a multiple of 4) x array of floats **************************************************/ /* * vIsamax() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern int32_t vIsamax( int32_t count, const vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************** vIsamin finds the position of the first vector element having minimum absolute value. count length of vector x (count is a multiple of 4) x array of floats **************************************************/ /* * vIsamin() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern int32_t vIsamin( int32_t count, const vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************** vIsmax finds the position of the first vector element having maximum value. count length of vector x (count is a multiple of 4) x array of floats **************************************************/ /* * vIsmax() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern int32_t vIsmax( int32_t count, const vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************** vIsmin finds the position of the first vector element having minimum value. count length of vector x (count is a multiple of 4) x array of floats **************************************************/ /* * vIsmin() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern int32_t vIsmin( int32_t count, const vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************** vSasum finds the sum of the magnitudes of the elements in a vector. count length of vector x (count is a multiple of 4) x array of floats **************************************************/ /* * vSasum() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern float vSasum( int32_t count, const vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************** vSsum is the vector version of sasum but without the absolute value. It takes the value of each element of the array and adds them together. multiple of 4) x array of floats **************************************************/ /* * vSsum() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern float vSsum( int32_t count, const vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************** vSaxpy multiplies a vector x, by a scalar and adds it to a vector y and stores the result in y n number of floats in x (n is a multiple of 4) alpha scalar number is single-precision floating-point x array of vFloats y array of vFloats, where the the result is stored **************************************************/ /* * vSaxpy() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSaxpy( int32_t n, float alpha, const vFloat x[], vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vScopy copies a vector x, into another vector y. n mumber of floats in x and y (n is a multiple of 4) x array of vFloats y array of vFloats *************************************************************/ /* * vScopy() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vScopy( int32_t n, const vFloat x[], vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSdot finds the dot product of two vectors. n mumber of floats in x and y (n is a multiple of 4) x array of vFloats y array of vFloats *************************************************************/ /* * vSdot() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern float vSdot( int32_t n, const vFloat x[], const vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSnaxpy computes saxpy "n" times. n number of saxpyV computations to be performed and the number of elements in vector A (n is a multiple of 4) m number of floats in each vector x(i) or y(i) a array of vFloats containing scalars a(i) x matrix containing arrays of vector- floats x(i) y matrix containing vectors y(i) *************************************************************/ /* * vSnaxpy() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSnaxpy( int32_t n, int32_t m, const vFloat a[], const vFloat x[], vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSndot computes the dot products "n" times. n number of dot product computations to be performed and the number of elements in vector S m number of elements in vectors x(i) and y(i) for each dot product computation (m is a multiple of 4) s array of floats. Depending on the value of "isw" different computations/ are performed and the results are stored in the array S isw indicates the type of computation to perform. if isw=1, S(i) <-- x(i) y(i) if isw=2, S(i) <-- - x(i) y(i) if isw=3, S(i) <-- S(i) + x(i) y(i)/ if isw=4, S(i) <-- S(i) - x(i) y(i)/ x matrix containing arrays x(i) y matrix containing arrays y(i) *************************************************************/ /* * vSndot() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSndot( int32_t n, int32_t m, float s[], int32_t isw, const vFloat x[], const vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSnrm2 finds the Euclidean length of a vector with scaling of input to avoid destructive underflow and overflow. count length of vector (multiple of 4) x array of vFloats *************************************************************/ /* * vSnrm2() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern float vSnrm2( int32_t count, const vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSnorm2 finds the Euclidean length of a vector with no scaling of input. count length of vector (multiple of 4) x array of vFloats *************************************************************/ /* * vSnorm2() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern float vSnorm2( int32_t count, const vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSrot applies a plane rotation. n number of points to be rotated, also number of elements in x and y (n is a multiple of 4) x array of vFloats. It is a vector of length n, containing x(i) coordinates of points to be rotated y array of vFloats. It is a vector of length n, containing y(i) coordinates of points to be rotated c cosine of angle of rotation s sine of angle of rotation *************************************************************/ /* * vSrot() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSrot( int32_t n, vFloat x[], vFloat y[], float c, float s) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSscal multiplies a vector x, by a scalar and stores the result in the vector x. n number of floats in x (n is a multiple of 4) alpha scalar number is single-precision floating-point x array of vFloats *************************************************************/ /* * vSscal() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSscal( int32_t n, float alpha, vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSswap interchanges the elements of vectors x and y n number of floats in x and y (n is a multiple of 4) x array of vFloats y array of vFloats *************************************************************/ /* * vSswap() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSswap( int32_t n, vFloat x[], vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSyax multiplies a vector x, by a scalar and stores the result in a vector y. n number of floats in x (n is a multiple of 4) alpha scalar number is single-precision floating-point x array of vFloats y array of vFloats *************************************************************/ /* * vSyax() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSyax( int32_t n, float alpha, const vFloat x[], vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSzaxpy multiplies a vector x, by a scalar and adds it to a vector y and stores the result in vector Z. n number of floats in x (n is a multiple of 4) alpha scalar number is single-precision floating-point x array of vFloats y array of vFloats Z array of vFloats, where the is stored *************************************************************/ /* * vSzaxpy() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSzaxpy( int32_t n, float alpha, const vFloat x[], const vFloat yY[], vFloat z[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* ------------------------------------------------------------------------------------- Level 2 ------------------------------------------------------------------------------------- */ /************************************************************* vSgemv multiplies an array of vFloats y by a scalar beta, and takes the result and adds it to the product of a scalar alpha multiplied by a matrix A multiplied by a vector x. The above result is stored in array y. Futhermore, the same function also performs the above calculation/ with the transpose of matrix A, instead of matrix A. In this function argument "forma" distinguishes between the above two cases. forma indicates the form of matrix A to use in the computation, where: If forma = "n", Matrix A is used If forma = "T", Transpose of Matrix A is used m number of rows in matrix A and depending on value of forma if forma = "n", it is the length of vector y if forma = "T", it is the length of vector x. m is a multiple of 4 n number of columns in matrix A and depending on value of forma if forma = "n", it is the length of vector x if forma = "T", it is the length of vector y. m is a multiple of 4 alpha is a scaling constant A is an m by n matrix. Its elements are vFloats x is an array of vFloats beta is a scaling constant y is an array of vFloats *************************************************************/ /* * vSgemv() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSgemv( char forma, int32_t m, int32_t n, float alpha, const vFloat a[], const vFloat x[], float beta, vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSgemx adds an array of vFloats y to the product of an scalar alpha by a mtrix A multiplied by an array of vFloats x. It then stores the result in the vector y. m number of rows in matrix A and the length of vector y. m is a multiple of 4 n number of columns in matrix A and the length of vector x. m is a multiple of 4 alpha is a scaling constant a is an m by n matrix. Its elements are vFloats x is an array of vFloats y is an array of vFloats *************************************************************/ /* * vSgemx() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSgemx( int32_t m, int32_t n, float alpha, const vFloat a[], const vFloat x[], vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSgemtx takes the transpose of a mtrix A and multiplies it by an array x. It then multiplies the result by a scalar alpha. Finally adds the above result to an array y and stores the result in array y. m number of rows in matrix A and the length of vector x. m is a multiple of 4 n number of columns in matrix A and the length of vector y. m is a multiple of 4 alpha is a scaling constant a is an m by n matrix. Its elements are vFloats x is an array of vFloats y is an array of vFloats *************************************************************/ /* * vSgemtx() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSgemtx( int32_t m, int32_t n, float alpha, const vFloat a[], const vFloat x[], vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* ------------------------------------------------------------------------------------- Level 3 ------------------------------------------------------------------------------------- */ /************************************************************* vSgeadd performs matrix addition for general matrices or their transposes. height height of the matrix (it is multiple of 4) width width of the matrix (it is multiple of 4) A matrix A, and depending on forma: if forma='N', A is used in the computation, and A has m rows and n columns if forma='T', A(T) is used in the computation, and A has n rows and m columns forma indicates the form of matrix A to use in the computation, where: if forma='N', A is used in the computation if forma='T', A(T) is used in the computation b matrix b, and depending on formb: if formb='N', b is used in the computation, and b has m rows and n columns if formb='T', b(T) is used in the computation, and b has n rows and m columns formb indicates the form of matrix b to use in the computation, where: if forma='N', b is used in the computation if forma='T', b(T) is used in the computation c is an m by n matrix c, containing the reults of the computation *************************************************************/ /* * vSgeadd() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSgeadd( int32_t height, int32_t width, const vFloat a[], char forma, const vFloat b[], char formb, vFloat c[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSgesub performs matrix subtraction for general matrices or their transposes. height height of the matrix (it is multiple of 4) width width of the matrix (it is multiple of 4) A matrix A, and depending on forma: if forma='N', A is used in the computation, and A has m rows and n columns if forma='T', A(T) is used in the computation, and A has n rows and m columns forma indicates the form of matrix A to use in the computation, where: if forma='N', A is used in the computation if forma='T', A(T) is used in the computation b matrix b, and depending on formb: if formb='N', b is used in the computation, and b has m rows and n columns if formb='T', b(T) is used in the computation, and b has n rows and m columns formb indicates the form of matrix b to use in the computation, where: if forma='N', b is used in the computation if forma='T', b(T) is used in the computation c is an m by n matrix c, containing the reults of the computation *************************************************************/ /* * vSgesub() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSgesub( int32_t height, int32_t width, const vFloat a[], char forma, const vFloat b[], char formb, vFloat c[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSgemul performs matrix multiplication for general matrices or their transposes. l height of the matrix A (it is multiple of 4) m width of matrix A (it is multiple of 4) n width of matrix b (it is multiple of 4) A matrix A, and depending on forma: if forma='N', A is used in the computation, and A has l rows and m columns if forma='T', A(T) is used in the computation, and A has m rows and l columns forma indicates the form of matrix A to use in the computation, where: if forma='N', A is used in the computation if forma='T', A(T) is used in the computation b matrix b, and depending on formb: if formb='N', b is used in the computation, and b has m rows and n columns if formb='T', b(T) is used in the computation, and b has n rows and m columns formb indicates the form of matrix b to use in the computation, where: if forma='N', b is used in the computation if forma='T', b(T) is used in the computation matrix is the matrix containing the results of the computation *************************************************************/ /* * vSgemul() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSgemul( int32_t l, int32_t m, int32_t n, const vFloat a[], char forma, const vFloat b[], char formb, vFloat matrix[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSgemm performs combined matrix multiplication and addition for general matrices or their transposes. l number of rows in matrix c (it is multiple of 4) m has the following meaning: if forma='N', it is the number of columns in matrix A if forma='T', it is the number of rows in matrix A. In addition if formb='N', it is the number of rows in matrix b if formb='T', it is the number of columns in matrix b n columns in matrix c A matrix A, and depending on forma: if forma='N', A is used in the computation, and A has l rows and m columns if forma='T', A(T) is used in the computation, and A has m rows and l columns forma indicates the form of matrix A to use in the computation, where: if forma='N', A is used in the computation if forma='T', A(T) is used in the computation b matrix b, and depending on formb: if formb='N', b is used in the computation, and b has m rows and n columns if formb='T', b(T) is used in the computation, and b has n rows and m columns formb indicates the form of matrix b to use in the computation, where: if forma='N', b is used in the computation if forma='T', b(T) is used in the computation alpha is a scalar beta is a scalar matrix is the l by n matrix *************************************************************/ /* * vSgemm() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSgemm( int32_t l, int32_t m, int32_t n, const vFloat a[], char forma, const vFloat b[], char formb, vFloat c[], float alpha, float beta, vFloat matrix[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSgetmi performs general matrix transpose (in place). size is the number of rows and columns in matrix x *************************************************************/ /* * vSgetmi() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSgetmi( int32_t size, vFloat x[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /************************************************************* vSgetmo performs general matrix transpose (out-of-place). height is the height of the matrix width is the width of the matrix x array of vFloats y array of vFloats *************************************************************/ /* * vSgetmo() * * Availability: * Mac OS X: in version 10.0 and later in vecLib.framework * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib * Non-Carbon CFM: in vecLib 1.0 and later */ extern void vSgetmo( int32_t height, int32_t width, const vFloat x[], vFloat y[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); /* vsGevv puts the outer product of vectors A and B into matrix M. Input: l is the number of elements in A and the number of rows in M. n is the number of elements in B and the number of columns in N. A is a vector of L floating-point numbers. B is a vector of M floating-point numbers. M is an array with space for L*N floating-point numbers. Output: For 0 <= i < l and 0 <= j < n, C[i*n + j] = A[i] * B[j]. Note: In the comments above, array elements are floating-point objects, in spite of the fact that the arrays are passed to the routine as pointers to vFloat. For example, if A contains four floating-point numbers, l is 4, even though A contains only one vFloat object. Availability: Mac OS X: In version 10.0 and later in vecLib.framework. CarbonLib: Not in Carbon, but vecLib is compatible with CarbonLib. Non-Carbon CFM: In vecLib 1.0 and later. */ extern void vSgevv( int32_t l, int32_t n, const vFloat A[], const vFloat B[], vFloat M[]) __OSX_AVAILABLE_STARTING(__MAC_10_0, __IPHONE_NA); #endif // defined _AltiVecPIMLanguageExtensionsAreEnabled || defined __SSE__ #ifdef __cplusplus } #endif #endif /* __VECTOROPS__ */