Crypto++
8.2
Free C++ class library of cryptographic schemes
|
Support functions for PowerPC and vector operations. More...
Go to the source code of this file.
Typedefs | |
typedef __vector unsigned char | uint8x16_p |
Vector of 8-bit elements. More... | |
typedef __vector unsigned short | uint16x8_p |
Vector of 16-bit elements. More... | |
typedef __vector unsigned int | uint32x4_p |
Vector of 32-bit elements. More... | |
typedef __vector unsigned long long | uint64x2_p |
Vector of 64-bit elements. More... | |
Functions | |
uint32x4_p | VecZero () |
The 0 vector. More... | |
uint32x4_p | VecOne () |
The 1 vector. More... | |
template<class T > | |
T | VecReverse (const T data) |
Reverse bytes in a vector. More... | |
LOAD OPERATIONS | |
uint32x4_p | VecLoad_ALTIVEC (const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoad_ALTIVEC (int off, const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoad (const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoad (int off, const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoad (const word32 src[4]) |
Loads a vector from a word array. More... | |
uint32x4_p | VecLoad (int off, const word32 src[4]) |
Loads a vector from a word array. More... | |
uint64x2_p | VecLoad (const word64 src[2]) |
Loads a vector from a word array. More... | |
uint64x2_p | VecLoad (int off, const word64 src[2]) |
Loads a vector from a word array. More... | |
uint32x4_p | VecLoadAligned (const byte src[16]) |
Loads a vector from an aligned byte array. More... | |
uint32x4_p | VecLoadAligned (int off, const byte src[16]) |
Loads a vector from an aligned byte array. More... | |
uint32x4_p | VecLoadBE (const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoadBE (int off, const byte src[16]) |
Loads a vector from a byte array. More... | |
STORE OPERATIONS | |
template<class T > | |
void | VecStore_ALTIVEC (const T data, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStore_ALTIVEC (const T data, int off, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStore (const T data, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStore (const T data, int off, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStore (const T data, word32 dest[4]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStore (const T data, int off, word32 dest[4]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStore (const T data, word64 dest[2]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStore (const T data, int off, word64 dest[2]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStoreBE (const T data, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStoreBE (const T data, int off, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStoreBE (const T data, word32 dest[4]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStoreBE (const T data, int off, word32 dest[4]) |
Stores a vector to a word array. More... | |
LOGICAL OPERATIONS | |
template<class T1 , class T2 > | |
T1 | VecAnd (const T1 vec1, const T2 vec2) |
AND two vectors. More... | |
template<class T1 , class T2 > | |
T1 | VecOr (const T1 vec1, const T2 vec2) |
OR two vectors. More... | |
template<class T1 , class T2 > | |
T1 | VecXor (const T1 vec1, const T2 vec2) |
XOR two vectors. More... | |
ARITHMETIC OPERATIONS | |
template<class T1 , class T2 > | |
T1 | VecAdd (const T1 vec1, const T2 vec2) |
Add two vectors. More... | |
template<class T1 , class T2 > | |
T1 | VecSub (const T1 vec1, const T2 vec2) |
Subtract two vectors. More... | |
uint32x4_p | VecAdd64 (const uint32x4_p &vec1, const uint32x4_p &vec2) |
Add two vectors. More... | |
OTHER OPERATIONS | |
template<class T1 , class T2 > | |
T1 | VecPermute (const T1 vec, const T2 mask) |
Permutes a vector. More... | |
template<class T1 , class T2 > | |
T1 | VecPermute (const T1 vec1, const T1 vec2, const T2 mask) |
Permutes two vectors. More... | |
template<unsigned int C, class T > | |
T | VecShiftLeftOctet (const T vec) |
Shift a vector left. More... | |
template<unsigned int C, class T > | |
T | VecShiftRightOctet (const T vec) |
Shift a vector right. More... | |
template<unsigned int C, class T > | |
T | VecRotateLeftOctet (const T vec) |
Rotate a vector left. More... | |
template<unsigned int C, class T > | |
T | VecRotateRightOctet (const T vec) |
Rotate a vector right. More... | |
template<unsigned int C> | |
uint32x4_p | VecRotateLeft (const uint32x4_p vec) |
Rotate a packed vector left. More... | |
template<unsigned int C> | |
uint32x4_p | VecShiftLeft (const uint32x4_p vec) |
Shift a packed vector left. More... | |
template<class T > | |
T | VecMergeHigh (const T vec1, const T vec2) |
Merge two vectors. More... | |
template<class T > | |
T | VecMergeLow (const T vec1, const T vec2) |
Merge two vectors. More... | |
template<unsigned int C> | |
uint64x2_p | VecRotateLeft (const uint64x2_p vec) |
Rotate a packed vector left. More... | |
template<unsigned int C> | |
uint64x2_p | VecShiftLeft (const uint64x2_p vec) |
Shift a packed vector left. More... | |
template<unsigned int C> | |
uint32x4_p | VecRotateRight (const uint32x4_p vec) |
Rotate a packed vector right. More... | |
template<unsigned int C> | |
uint32x4_p | VecShiftRight (const uint32x4_p vec) |
Shift a packed vector right. More... | |
template<unsigned int C> | |
uint64x2_p | VecRotateRight (const uint64x2_p vec) |
Rotate a packed vector right. More... | |
template<unsigned int C> | |
uint64x2_p | VecShiftRight (const uint64x2_p vec) |
Shift a packed vector right. More... | |
template<class T > | |
T | VecSwapWords (const T vec) |
Exchange high and low double words. More... | |
template<class T > | |
T | VecGetLow (const T val) |
Extract a dword from a vector. More... | |
template<class T > | |
T | VecGetHigh (const T val) |
Extract a dword from a vector. More... | |
template<class T1 , class T2 > | |
bool | VecEqual (const T1 vec1, const T2 vec2) |
Compare two vectors. More... | |
template<class T1 , class T2 > | |
bool | VecNotEqual (const T1 vec1, const T2 vec2) |
Compare two vectors. More... | |
POLYNOMIAL MULTIPLICATION | |
uint32x4_p | VecPolyMultiply (const uint32x4_p &a, const uint32x4_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecPolyMultiply (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecPolyMultiply00LE (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecPolyMultiply01LE (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecPolyMultiply10LE (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecPolyMultiply11LE (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
AES ENCRYPTION | |
template<class T1 , class T2 > | |
T1 | VecEncrypt (const T1 state, const T2 key) |
One round of AES encryption. More... | |
template<class T1 , class T2 > | |
T1 | VecEncryptLast (const T1 state, const T2 key) |
Final round of AES encryption. More... | |
template<class T1 , class T2 > | |
T1 | VecDecrypt (const T1 state, const T2 key) |
One round of AES decryption. More... | |
template<class T1 , class T2 > | |
T1 | VecDecryptLast (const T1 state, const T2 key) |
Final round of AES decryption. More... | |
SHA DIGESTS | |
template<int func, int fmask, class T > | |
T | VecSHA256 (const T vec) |
SHA256 Sigma functions. More... | |
template<int func, int fmask, class T > | |
T | VecSHA512 (const T vec) |
SHA512 Sigma functions. More... | |
Support functions for PowerPC and vector operations.
This header provides an agnostic interface into Clang, GCC and IBM XL C/C++ compilers modulo their different built-in functions for accessing vector intructions.
The abstractions are necesssary to support back to GCC 4.8 and XLC 11 and 12. GCC 4.8 and 4.9 are still popular, and they are the default compiler for GCC112, GCC118 and others on the compile farm. Older IBM XL C/C++ compilers also experience it due to lack of vec_xl
and vec_xst
support on some platforms. Modern compilers provide best support and don't need many of the hacks below.
The library is tested with the following PowerPC machines and compilers. GCC110, GCC111, GCC112, GCC119 and GCC135 are provided by the GCC Compile Farm
12 machines are used for testing because the three compilers form five profiles. The profiles are listed below.
The LLVM front-end makes it tricky to write portable code because LLVM pretends to be other compilers but cannot consume other compiler's builtins. When using XLC with -qxlcompatmacros the compiler pretends to be GCC, Clang and XLC all at once but it can only consume it's variety of builtins.
At Crypto++ 8.0 the various Vector{FuncName}
were renamed to Vec{FuncName}
. For example, VectorAnd
was changed to VecAnd
. The name change helped consolidate two slightly different implementations.
Definition in file ppc_simd.h.
typedef __vector unsigned char uint8x16_p |
Vector of 8-bit elements.
Definition at line 119 of file ppc_simd.h.
typedef __vector unsigned short uint16x8_p |
Vector of 16-bit elements.
Definition at line 124 of file ppc_simd.h.
typedef __vector unsigned int uint32x4_p |
Vector of 32-bit elements.
Definition at line 129 of file ppc_simd.h.
typedef __vector unsigned long long uint64x2_p |
Vector of 64-bit elements.
uint64x2_p is available on POWER7 and above. Some supporting functions, like 64-bit vec_add
(vaddudm
), did not arrive until POWER8.
Definition at line 139 of file ppc_simd.h.
|
inline |
The 0 vector.
Definition at line 145 of file ppc_simd.h.
|
inline |
The 1 vector.
Definition at line 154 of file ppc_simd.h.
|
inline |
Reverse bytes in a vector.
T | vector type |
data | the vector |
VecReverse() reverses the bytes in a vector
Definition at line 169 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array |
Loads a vector in native endian format from a byte array.
VecLoad_ALTIVEC() uses vec_ld
if the effective address of src
is aligned. If unaligned it uses vec_lvsl
, vec_ld
, vec_perm
and src
. The fixups using vec_lvsl
and vec_perm
are relatively expensive so you should provide aligned memory adresses.
Definition at line 193 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array |
off | offset into the src byte array |
Loads a vector in native endian format from a byte array.
VecLoad_ALTIVEC() uses vec_ld
if the effective address of src
is aligned. If unaligned it uses vec_lvsl
, vec_ld
, vec_perm
and src
.
The fixups using vec_lvsl
and vec_perm
are relatively expensive so you should provide aligned memory adresses.
Definition at line 223 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array |
VecLoad() loads a vector in from a byte array.
VecLoad() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 253 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array |
off | offset into the byte array |
VecLoad() loads a vector in from a byte array.
VecLoad() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 281 of file ppc_simd.h.
|
inline |
Loads a vector from a word array.
src | the word array |
VecLoad() loads a vector in from a word array.
VecLoad() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 308 of file ppc_simd.h.
|
inline |
Loads a vector from a word array.
src | the word array |
off | offset into the word array |
VecLoad() loads a vector in from a word array.
VecLoad() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 326 of file ppc_simd.h.
|
inline |
Loads a vector from a word array.
src | the word array |
VecLoad() loads a vector in from a word array.
VecLoad() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
VecLoad() with 64-bit elements is available on POWER7 and above.
Definition at line 346 of file ppc_simd.h.
|
inline |
Loads a vector from a word array.
src | the word array |
off | offset into the word array |
VecLoad() loads a vector in from a word array.
VecLoad() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
VecLoad() with 64-bit elements is available on POWER8 and above.
Definition at line 365 of file ppc_simd.h.
|
inline |
Loads a vector from an aligned byte array.
src | the byte array |
VecLoadAligned() loads a vector in from an aligned byte array.
VecLoadAligned() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. Altivec's vec_ld
is used if POWER7 is not available. The effective address of src
must be aligned.
Definition at line 383 of file ppc_simd.h.
|
inline |
Loads a vector from an aligned byte array.
src | the byte array |
off | offset into the byte array |
VecLoadAligned() loads a vector in from an aligned byte array.
VecLoadAligned() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. Altivec's vec_ld
is used if POWER7 is not available. The effective address of src
must be aligned.
Definition at line 411 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array |
VecLoadBE() loads a vector in from a byte array. VecLoadBE will reverse all bytes in the array on a little endian system.
VecLoadBE() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 440 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array |
off | offset into the src byte array |
VecLoadBE() loads a vector in from a byte array. VecLoadBE will reverse all bytes in the array on a little endian system.
VecLoadBE() uses POWER7's vec_xl
or vec_vsx_ld
if available. The instructions do not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 481 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
dest | the byte array |
VecStore_ALTIVEC() stores a vector to a byte array.
VecStore_ALTIVEC() uses vec_st
if the effective address of dest
is aligned, and uses vec_ste
otherwise. vec_ste
is relatively expensive so you should provide aligned memory adresses.
VecStore_ALTIVEC() is used automatically when POWER7 or above and unaligned loads is not available.
Definition at line 528 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
off | the byte offset into the array |
dest | the byte array |
VecStore_ALTIVEC() stores a vector to a byte array.
VecStore_ALTIVEC() uses vec_st
if the effective address of dest
is aligned, and uses vec_ste
otherwise. vec_ste
is relatively expensive so you should provide aligned memory adresses.
VecStore_ALTIVEC() is used automatically when POWER7 or above and unaligned loads is not available.
Definition at line 567 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
dest | the byte array |
VecStore() stores a vector to a byte array.
VecStore() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 605 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
off | the byte offset into the array |
dest | the byte array |
VecStore() stores a vector to a byte array.
VecStore() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 636 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
dest | the word array |
VecStore() stores a vector to a word array.
VecStore() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 666 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
off | the byte offset into the array |
dest | the word array |
VecStore() stores a vector to a word array.
VecStore() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 687 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
dest | the word array |
VecStore() stores a vector to a word array.
VecStore() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
VecStore() with 64-bit elements is available on POWER8 and above.
Definition at line 708 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
off | the byte offset into the array |
dest | the word array |
VecStore() stores a vector to a word array.
VecStore() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
VecStore() with 64-bit elements is available on POWER8 and above.
Definition at line 730 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
dest | the byte array |
VecStoreBE() stores a vector to a byte array. VecStoreBE will reverse all bytes in the array on a little endian system.
VecStoreBE() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 751 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
off | offset into the dest byte array |
dest | the byte array |
VecStoreBE() stores a vector to a byte array. VecStoreBE will reverse all bytes in the array on a little endian system.
VecStoreBE() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 795 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
dest | the word array |
VecStoreBE() stores a vector to a word array. VecStoreBE will reverse all bytes in the array on a little endian system.
VecStoreBE() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 838 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
off | offset into the dest word array |
dest | the word array |
VecStoreBE() stores a vector to a word array. VecStoreBE will reverse all words in the array on a little endian system.
VecStoreBE() uses POWER7's vec_xst
or vec_vsx_st
if available. The instructions do not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses.
Definition at line 860 of file ppc_simd.h.
|
inline |
AND two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
VecAnd() returns a new vector from vec1 and vec2. The return vector is the same type as vec1.
Definition at line 882 of file ppc_simd.h.
|
inline |
OR two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
VecOr() returns a new vector from vec1 and vec2. The return vector is the same type as vec1.
Definition at line 899 of file ppc_simd.h.
|
inline |
XOR two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
VecXor() returns a new vector from vec1 and vec2. The return vector is the same type as vec1.
Definition at line 916 of file ppc_simd.h.
|
inline |
Add two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
VecAdd() returns a new vector from vec1 and vec2. vec2 is cast to the same type as vec1. The return vector is the same type as vec1.
Definition at line 939 of file ppc_simd.h.
|
inline |
Subtract two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
VecSub() returns a new vector from vec1 and vec2. vec2 is cast to the same type as vec1. The return vector is the same type as vec1.
Definition at line 956 of file ppc_simd.h.
|
inline |
Add two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
VecAdd64() returns a new vector from vec1 and vec2. vec1 and vec2 are added as if uint64x2_p vectors. On POWER7 and below VecAdd64() manages the carries from two elements in a uint32x4_p vector.
Definition at line 974 of file ppc_simd.h.
|
inline |
Permutes a vector.
T1 | vector type |
T2 | vector type |
vec | the vector |
mask | vector mask |
VecPermute() returns a new vector from vec based on mask. mask is an uint8x16_p type vector. The return vector is the same type as vec.
Definition at line 1010 of file ppc_simd.h.
|
inline |
Permutes two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
mask | vector mask |
VecPermute() returns a new vector from vec1 and vec2 based on mask. mask is an uint8x16_p type vector. The return vector is the same type as vec1.
Definition at line 1029 of file ppc_simd.h.
|
inline |
Shift a vector left.
C | shift byte count |
T | vector type |
vec | the vector |
VecShiftLeftOctet() returns a new vector after shifting the concatenation of the zero vector and the source vector by the specified number of bytes. The return vector is the same type as vec.
On big endian machines VecShiftLeftOctet() is vec_sld(a, z, c)
. On little endian machines VecShiftLeftOctet() is translated to vec_sld(z, a, 16-c)
. You should always call the function as if on a big endian machine as shown below.
uint8x16_p x = VecLoad(ptr); uint8x16_p y = VecShiftLeftOctet<12>(x);
Definition at line 1056 of file ppc_simd.h.
|
inline |
Shift a vector right.
C | shift byte count |
T | vector type |
vec | the vector |
VecShiftRightOctet() returns a new vector after shifting the concatenation of the zero vector and the source vector by the specified number of bytes. The return vector is the same type as vec.
On big endian machines VecShiftRightOctet() is vec_sld(a, z, c)
. On little endian machines VecShiftRightOctet() is translated to vec_sld(z, a, 16-c)
. You should always call the function as if on a big endian machine as shown below.
uint8x16_p x = VecLoad(ptr); uint8x16_p y = VecShiftRightOctet<12>(y);
Definition at line 1103 of file ppc_simd.h.
|
inline |
Rotate a vector left.
C | shift byte count |
T | vector type |
vec | the vector |
VecRotateLeftOctet() returns a new vector after rotating the concatenation of the source vector with itself by the specified number of bytes. The return vector is the same type as vec.
Definition at line 1142 of file ppc_simd.h.
|
inline |
Rotate a vector right.
C | shift byte count |
T | vector type |
vec | the vector |
VecRotateRightOctet() returns a new vector after rotating the concatenation of the source vector with itself by the specified number of bytes. The return vector is the same type as vec.
Definition at line 1167 of file ppc_simd.h.
|
inline |
Rotate a packed vector left.
C | shift bit count |
vec | the vector |
VecRotateLeft() rotates each element in a packed vector by bit count.
Definition at line 1187 of file ppc_simd.h.
|
inline |
Shift a packed vector left.
C | shift bit count |
vec | the vector |
VecShiftLeft() rotates each element in a packed vector by bit count.
Definition at line 1202 of file ppc_simd.h.
|
inline |
Merge two vectors.
T | vector type |
vec1 | the first vector |
vec2 | the second vector |
Definition at line 1217 of file ppc_simd.h.
|
inline |
Merge two vectors.
T | vector type |
vec1 | the first vector |
vec2 | the second vector |
Definition at line 1231 of file ppc_simd.h.
|
inline |
Rotate a packed vector left.
C | shift bit count |
vec | the vector |
VecRotateLeft() rotates each element in a packed vector by bit count.
VecRotateLeft() with 64-bit elements is available on POWER8 and above.
Definition at line 1248 of file ppc_simd.h.
|
inline |
Shift a packed vector left.
C | shift bit count |
vec | the vector |
VecShiftLeft() rotates each element in a packed vector by bit count.
VecShiftLeft() with 64-bit elements is available on POWER8 and above.
Definition at line 1264 of file ppc_simd.h.
|
inline |
Rotate a packed vector right.
C | shift bit count |
vec | the vector |
VecRotateRight() rotates each element in a packed vector by bit count.
Definition at line 1281 of file ppc_simd.h.
|
inline |
Shift a packed vector right.
C | shift bit count |
vec | the vector |
VecShiftRight() rotates each element in a packed vector by bit count.
Definition at line 1296 of file ppc_simd.h.
|
inline |
Rotate a packed vector right.
C | shift bit count |
vec | the vector |
VecRotateRight() rotates each element in a packed vector by bit count.
VecRotateRight() with 64-bit elements is available on POWER8 and above.
Definition at line 1314 of file ppc_simd.h.
|
inline |
Shift a packed vector right.
C | shift bit count |
vec | the vector |
VecShiftRight() rotates each element in a packed vector by bit count.
VecShiftRight() with 64-bit elements is available on POWER8 and above.
Definition at line 1330 of file ppc_simd.h.
|
inline |
Exchange high and low double words.
T | vector type |
vec | the vector |
Definition at line 1346 of file ppc_simd.h.
|
inline |
Extract a dword from a vector.
T | vector type |
val | the vector |
VecGetLow() extracts the low dword from a vector. The low dword is composed of the least significant bits and occupies bytes 8 through 15 when viewed as a big endian array. The return vector is the same type as the original vector and padded with 0's in the most significant bit positions.
Definition at line 1363 of file ppc_simd.h.
|
inline |
Extract a dword from a vector.
T | vector type |
val | the vector |
VecGetHigh() extracts the high dword from a vector. The high dword is composed of the most significant bits and occupies bytes 0 through 7 when viewed as a big endian array. The return vector is the same type as the original vector and padded with 0's in the most significant bit positions.
Definition at line 1385 of file ppc_simd.h.
|
inline |
Compare two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
VecEqual() performs a bitwise compare. The vector element types do not matter.
Definition at line 1407 of file ppc_simd.h.
|
inline |
Compare two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
VecNotEqual() performs a bitwise compare. The vector element types do not matter.
Definition at line 1424 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
VecPolyMultiply() performs polynomial multiplication. POWER8 polynomial multiplication multiplies the high and low terms, and then XOR's the high and low products. That is, the result is ah*bh XOR al*bl
. It is different behavior than Intel polynomial multiplication. To obtain a single product without the XOR, then set one of the high or low terms to 0. For example, setting ah=0
results in 0*bh XOR al*bl = al*bl
.
Definition at line 1452 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
VecPolyMultiply() performs polynomial multiplication. POWER8 polynomial multiplication multiplies the high and low terms, and then XOR's the high and low products. That is, the result is ah*bh XOR al*bl
. It is different behavior than Intel polynomial multiplication. To obtain a single product without the XOR, then set one of the high or low terms to 0. For example, setting ah=0
results in 0*bh XOR al*bl = al*bl
.
Definition at line 1477 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
VecPolyMultiply00LE() performs polynomial multiplication and presents the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x00)
. The 0x00
indicates the low 64-bits of a
and b
are multiplied.
Definition at line 1501 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
VecPolyMultiply01LE performs() polynomial multiplication and presents the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x01)
. The 0x01
indicates the low 64-bits of a
and high 64-bits of b
are multiplied.
Definition at line 1523 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
VecPolyMultiply10LE() performs polynomial multiplication and presents the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x10)
. The 0x10
indicates the high 64-bits of a
and low 64-bits of b
are multiplied.
Definition at line 1545 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
VecPolyMultiply11LE() performs polynomial multiplication and presents the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x11)
. The 0x11
indicates the high 64-bits of a
and b
are multiplied.
Definition at line 1567 of file ppc_simd.h.
|
inline |
One round of AES encryption.
T1 | vector type |
T2 | vector type |
state | the state vector |
key | the subkey vector |
VecEncrypt() performs one round of AES encryption of state using subkey key. The return vector is the same type as vec1.
VecEncrypt() is available on POWER8 and above.
Definition at line 1593 of file ppc_simd.h.
|
inline |
Final round of AES encryption.
T1 | vector type |
T2 | vector type |
state | the state vector |
key | the subkey vector |
VecEncryptLast() performs the final round of AES encryption of state using subkey key. The return vector is the same type as vec1.
VecEncryptLast() is available on POWER8 and above.
Definition at line 1618 of file ppc_simd.h.
|
inline |
One round of AES decryption.
T1 | vector type |
T2 | vector type |
state | the state vector |
key | the subkey vector |
VecDecrypt() performs one round of AES decryption of state using subkey key. The return vector is the same type as vec1.
VecDecrypt() is available on POWER8 and above.
Definition at line 1643 of file ppc_simd.h.
|
inline |
Final round of AES decryption.
T1 | vector type |
T2 | vector type |
state | the state vector |
key | the subkey vector |
VecDecryptLast() performs the final round of AES decryption of state using subkey key. The return vector is the same type as vec1.
VecDecryptLast() is available on POWER8 and above.
Definition at line 1668 of file ppc_simd.h.
|
inline |
SHA256 Sigma functions.
func | function |
fmask | function mask |
T | vector type |
vec | the block to transform |
VecSHA256() selects sigma0, sigma1, Sigma0, Sigma1 based on func and fmask. The return vector is the same type as vec.
VecSHA256() is available on POWER8 and above.
Definition at line 1698 of file ppc_simd.h.
|
inline |
SHA512 Sigma functions.
func | function |
fmask | function mask |
T | vector type |
vec | the block to transform |
VecSHA512() selects sigma0, sigma1, Sigma0, Sigma1 based on func and fmask. The return vector is the same type as vec.
VecSHA512() is available on POWER8 and above.
Definition at line 1723 of file ppc_simd.h.