107#ifndef CRYPTOPP_PPC_CRYPTO_H
108#define CRYPTOPP_PPC_CRYPTO_H
113#if defined(__ALTIVEC__)
124#ifndef CRYPTOPP_DISABLE_POWER7
125# if defined(_AIX) && defined(_ARCH_PWR7) && defined(__xlC__)
134#ifndef CRYPTOPP_DISABLE_POWER8
135# if defined(_AIX) && defined(_ARCH_PWR8) && defined(__xlC__)
145#define CONST_V8_CAST(x) ((unsigned char*)(x))
151#define CONST_V32_CAST(x) ((unsigned int*)(x))
157#define CONST_V64_CAST(x) ((unsigned long long*)(x))
163#define NCONST_V8_CAST(x) ((unsigned char*)(x))
169#define NCONST_V32_CAST(x) ((unsigned int*)(x))
175#define NCONST_V64_CAST(x) ((unsigned long long*)(x))
179#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
180# pragma GCC diagnostic push
181# pragma GCC diagnostic ignored "-Wdeprecated"
184NAMESPACE_BEGIN(CryptoPP)
186#if defined(__ALTIVEC__) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
192typedef __vector
unsigned char uint8x16_p;
197typedef __vector
unsigned short uint16x8_p;
202typedef __vector
unsigned int uint32x4_p;
204#if defined(__VSX__) || defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
212typedef __vector
unsigned long long uint64x2_p;
218inline uint32x4_p VecZero()
220 const uint32x4_p v = {0,0,0,0};
227inline uint32x4_p VecOne()
229 const uint32x4_p v = {1,1,1,1};
242inline T VecReverse(
const T data)
244#if defined(CRYPTOPP_BIG_ENDIAN)
245 const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
246 return (T)vec_perm(data, data, mask);
248 const uint8x16_p mask = {0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15};
249 return (T)vec_perm(data, data, mask);
263inline T VecReverseLE(
const T data)
265#if defined(CRYPTOPP_LITTLE_ENDIAN)
266 const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
267 return (T)vec_perm(data, data, mask);
283inline T VecReverseBE(
const T data)
285#if defined(CRYPTOPP_BIG_ENDIAN)
286 const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
287 return (T)vec_perm(data, data, mask);
308inline uint32x4_p VecLoad_ALTIVEC(
const byte src[16])
311 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src);
322 return (uint32x4_p)vec_perm(low, high, perm);
339inline uint32x4_p VecLoad_ALTIVEC(
int off,
const byte src[16])
342 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src)+off;
353 return (uint32x4_p)vec_perm(low, high, perm);
369inline uint32x4_p VecLoad(
const byte src[16])
375 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src);
376 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
377 CRYPTOPP_UNUSED(addr);
379#if defined(_ARCH_PWR9)
399inline uint32x4_p VecLoad(
int off,
const byte src[16])
405 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src)+off;
406 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
407 CRYPTOPP_UNUSED(addr);
409#if defined(_ARCH_PWR9)
428inline uint32x4_p VecLoad(
const word32 src[4])
434 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src);
435 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word32>() == 0);
436 CRYPTOPP_UNUSED(addr);
438#if defined(_ARCH_PWR9)
440#elif defined(__VSX__) || defined(_ARCH_PWR8)
460inline uint32x4_p VecLoad(
int off,
const word32 src[4])
466 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src)+off;
467 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word32>() == 0);
468 CRYPTOPP_UNUSED(addr);
470#if defined(_ARCH_PWR9)
472#elif defined(__VSX__) || defined(_ARCH_PWR8)
479#if defined(__VSX__) || defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
494inline uint64x2_p VecLoad(
const word64 src[2])
500 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src);
501 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word64>() == 0);
502 CRYPTOPP_UNUSED(addr);
504#if defined(_ARCH_PWR9)
506#elif defined(__VSX__) || defined(_ARCH_PWR8)
528inline uint64x2_p VecLoad(
int off,
const word64 src[2])
534 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src)+off;
535 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word64>() == 0);
536 CRYPTOPP_UNUSED(addr);
538#if defined(_ARCH_PWR9)
540#elif defined(__VSX__) || defined(_ARCH_PWR8)
560inline uint32x4_p VecLoadAligned(
const byte src[16])
566 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src);
567 CRYPTOPP_ASSERT(addr % 16 == 0);
568 CRYPTOPP_UNUSED(addr);
570#if defined(_ARCH_PWR9)
588inline uint32x4_p VecLoadAligned(
int off,
const byte src[16])
594 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src)+off;
595 CRYPTOPP_ASSERT(addr % 16 == 0);
596 CRYPTOPP_UNUSED(addr);
598#if defined(_ARCH_PWR9)
615inline uint32x4_p VecLoadAligned(
const word32 src[4])
621 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src);
622 CRYPTOPP_ASSERT(addr % 16 == 0);
623 CRYPTOPP_UNUSED(addr);
625#if defined(_ARCH_PWR9)
627#elif defined(__VSX__) || defined(_ARCH_PWR8)
645inline uint32x4_p VecLoadAligned(
int off,
const word32 src[4])
651 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src)+off;
652 CRYPTOPP_ASSERT(addr % 16 == 0);
653 CRYPTOPP_UNUSED(addr);
655#if defined(_ARCH_PWR9)
657#elif defined(__VSX__) || defined(_ARCH_PWR8)
664#if defined(__VSX__) || defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
676inline uint64x2_p VecLoadAligned(
const word64 src[4])
682 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src);
683 CRYPTOPP_ASSERT(addr % 16 == 0);
684 CRYPTOPP_UNUSED(addr);
686#if defined(_ARCH_PWR9)
688#elif defined(__VSX__) || defined(_ARCH_PWR8)
707inline uint64x2_p VecLoadAligned(
int off,
const word64 src[4])
713 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src)+off;
714 CRYPTOPP_ASSERT(addr % 16 == 0);
715 CRYPTOPP_UNUSED(addr);
717#if defined(_ARCH_PWR9)
719#elif defined(__VSX__) || defined(_ARCH_PWR8)
742inline uint32x4_p VecLoadBE(
const byte src[16])
748 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src);
750 CRYPTOPP_UNUSED(addr);
752#if defined(_ARCH_PWR9)
753 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
755#elif defined(CRYPTOPP_BIG_ENDIAN)
758 return (uint32x4_p)VecReverseLE(VecLoad_ALTIVEC(
CONST_V8_CAST(src)));
776inline uint32x4_p VecLoadBE(
int off,
const byte src[16])
782 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(src)+off;
784 CRYPTOPP_UNUSED(addr);
786#if defined(_ARCH_PWR9)
787 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
789#elif defined(CRYPTOPP_BIG_ENDIAN)
792 return (uint32x4_p)VecReverseLE(VecLoad_ALTIVEC(
CONST_V8_CAST(addr)));
817inline void VecStore_ALTIVEC(
const T data,
byte dest[16])
820 uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest);
828 uint8x16_p perm = (uint8x16_p)vec_perm(data, data, vec_lvsr(0,
NCONST_V8_CAST(addr)));
829 vec_ste((uint8x16_p) perm, 0, (
unsigned char*)
NCONST_V8_CAST(addr));
830 vec_ste((uint16x8_p) perm, 1, (
unsigned short*)
NCONST_V8_CAST(addr));
831 vec_ste((uint32x4_p) perm, 3, (
unsigned int*)
NCONST_V8_CAST(addr));
832 vec_ste((uint32x4_p) perm, 4, (
unsigned int*)
NCONST_V8_CAST(addr));
833 vec_ste((uint32x4_p) perm, 8, (
unsigned int*)
NCONST_V8_CAST(addr));
834 vec_ste((uint32x4_p) perm, 12, (
unsigned int*)
NCONST_V8_CAST(addr));
835 vec_ste((uint16x8_p) perm, 14, (
unsigned short*)
NCONST_V8_CAST(addr));
836 vec_ste((uint8x16_p) perm, 15, (
unsigned char*)
NCONST_V8_CAST(addr));
857inline void VecStore_ALTIVEC(
const T data,
int off,
byte dest[16])
860 uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest)+off;
868 uint8x16_p perm = (uint8x16_p)vec_perm(data, data, vec_lvsr(0,
NCONST_V8_CAST(addr)));
869 vec_ste((uint8x16_p) perm, 0, (
unsigned char*)
NCONST_V8_CAST(addr));
870 vec_ste((uint16x8_p) perm, 1, (
unsigned short*)
NCONST_V8_CAST(addr));
871 vec_ste((uint32x4_p) perm, 3, (
unsigned int*)
NCONST_V8_CAST(addr));
872 vec_ste((uint32x4_p) perm, 4, (
unsigned int*)
NCONST_V8_CAST(addr));
873 vec_ste((uint32x4_p) perm, 8, (
unsigned int*)
NCONST_V8_CAST(addr));
874 vec_ste((uint32x4_p) perm, 12, (
unsigned int*)
NCONST_V8_CAST(addr));
875 vec_ste((uint16x8_p) perm, 14, (
unsigned short*)
NCONST_V8_CAST(addr));
876 vec_ste((uint8x16_p) perm, 15, (
unsigned char*)
NCONST_V8_CAST(addr));
895inline void VecStore(
const T data,
byte dest[16])
901 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest);
902 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
903 CRYPTOPP_UNUSED(addr);
905#if defined(_ARCH_PWR9)
928inline void VecStore(
const T data,
int off,
byte dest[16])
934 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest)+off;
935 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
936 CRYPTOPP_UNUSED(addr);
938#if defined(_ARCH_PWR9)
960inline void VecStore(
const T data,
word32 dest[4])
966 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest);
967 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word32>() == 0);
968 CRYPTOPP_UNUSED(addr);
970#if defined(_ARCH_PWR9)
972#elif defined(__VSX__) || defined(_ARCH_PWR8)
995inline void VecStore(
const T data,
int off,
word32 dest[4])
1001 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest)+off;
1002 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word32>() == 0);
1003 CRYPTOPP_UNUSED(addr);
1005#if defined(_ARCH_PWR9)
1007#elif defined(__VSX__) || defined(_ARCH_PWR8)
1030inline void VecStore(
const T data, word64 dest[2])
1036 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest);
1037 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word64>() == 0);
1038 CRYPTOPP_UNUSED(addr);
1040#if defined(_ARCH_PWR9)
1042#elif defined(__VSX__) || defined(_ARCH_PWR8)
1067inline void VecStore(
const T data,
int off, word64 dest[2])
1073 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest)+off;
1074 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word64>() == 0);
1075 CRYPTOPP_UNUSED(addr);
1077#if defined(_ARCH_PWR9)
1079#elif defined(__VSX__) || defined(_ARCH_PWR8)
1100inline void VecStoreAligned(
const T data,
byte dest[16])
1106 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest);
1107 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
1108 CRYPTOPP_UNUSED(addr);
1110#if defined(_ARCH_PWR9)
1131inline void VecStoreAligned(
const T data,
int off,
byte dest[16])
1137 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest)+off;
1138 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
1139 CRYPTOPP_UNUSED(addr);
1141#if defined(_ARCH_PWR9)
1162inline void VecStoreAligned(
const T data,
word32 dest[4])
1168 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest);
1169 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word32>() == 0);
1170 CRYPTOPP_UNUSED(addr);
1172#if defined(_ARCH_PWR9)
1174#elif defined(__VSX__) || defined(_ARCH_PWR8)
1196inline void VecStoreAligned(
const T data,
int off,
word32 dest[4])
1202 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest)+off;
1203 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word32>() == 0);
1204 CRYPTOPP_UNUSED(addr);
1206#if defined(_ARCH_PWR9)
1208#elif defined(__VSX__) || defined(_ARCH_PWR8)
1231inline void VecStoreBE(
const T data,
byte dest[16])
1237 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest);
1238 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
1239 CRYPTOPP_UNUSED(addr);
1241#if defined(_ARCH_PWR9)
1243#elif defined(CRYPTOPP_BIG_ENDIAN)
1267inline void VecStoreBE(
const T data,
int off,
byte dest[16])
1273 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest)+off;
1274 CRYPTOPP_ASSERT(addr % GetAlignmentOf<byte>() == 0);
1275 CRYPTOPP_UNUSED(addr);
1277#if defined(_ARCH_PWR9)
1279#elif defined(CRYPTOPP_BIG_ENDIAN)
1302inline void VecStoreBE(
const T data,
word32 dest[4])
1308 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest);
1309 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word32>() == 0);
1310 CRYPTOPP_UNUSED(addr);
1312#if defined(_ARCH_PWR9)
1314#elif defined(CRYPTOPP_BIG_ENDIAN)
1338inline void VecStoreBE(
const T data,
int off,
word32 dest[4])
1344 const uintptr_t addr =
reinterpret_cast<uintptr_t
>(dest)+off;
1345 CRYPTOPP_ASSERT(addr % GetAlignmentOf<word32>() == 0);
1346 CRYPTOPP_UNUSED(addr);
1348#if defined(_ARCH_PWR9)
1350#elif defined(CRYPTOPP_BIG_ENDIAN)
1375template <
class T1,
class T2>
1376inline T1 VecAnd(
const T1 vec1,
const T2 vec2)
1378 return (T1)vec_and(vec1, (T1)vec2);
1394template <
class T1,
class T2>
1395inline T1 VecOr(
const T1 vec1,
const T2 vec2)
1397 return (T1)vec_or(vec1, (T1)vec2);
1413template <
class T1,
class T2>
1414inline T1 VecXor(
const T1 vec1,
const T2 vec2)
1416 return (T1)vec_xor(vec1, (T1)vec2);
1437template <
class T1,
class T2>
1438inline T1 VecAdd(
const T1 vec1,
const T2 vec2)
1440 return (T1)vec_add(vec1, (T1)vec2);
1455template <
class T1,
class T2>
1456inline T1 VecSub(
const T1 vec1,
const T2 vec2)
1458 return (T1)vec_sub(vec1, (T1)vec2);
1477template <
class T1,
class T2>
1478inline T1 VecPermute(
const T1 vec,
const T2 mask)
1480 return (T1)vec_perm(vec, vec, (uint8x16_p)mask);
1495template <
class T1,
class T2>
1496inline T1 VecPermute(
const T1 vec1,
const T1 vec2,
const T2 mask)
1498 return (T1)vec_perm(vec1, (T1)vec2, (uint8x16_p)mask);
1527template <
unsigned int C,
class T>
1528inline T VecShiftLeftOctet(
const T vec)
1543#if defined(CRYPTOPP_BIG_ENDIAN)
1545 return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, R);
1547 enum { R=(16-C)&0xf };
1548 return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, R);
1574template <
unsigned int C,
class T>
1575inline T VecShiftRightOctet(
const T vec)
1590#if defined(CRYPTOPP_BIG_ENDIAN)
1591 enum { R=(16-C)&0xf };
1592 return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, R);
1595 return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, R);
1613template <
unsigned int C,
class T>
1614inline T VecRotateLeftOctet(
const T vec)
1616#if defined(CRYPTOPP_BIG_ENDIAN)
1618 return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)vec, R);
1620 enum { R=(16-C)&0xf };
1621 return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)vec, R);
1638template <
unsigned int C,
class T>
1639inline T VecRotateRightOctet(
const T vec)
1641#if defined(CRYPTOPP_BIG_ENDIAN)
1642 enum { R=(16-C)&0xf };
1643 return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)vec, R);
1646 return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)vec, R);
1659template<
unsigned int C>
1660inline uint32x4_p VecRotateLeft(
const uint32x4_p vec)
1662 const uint32x4_p m = {C, C, C, C};
1663 return vec_rl(vec, m);
1675template<
unsigned int C>
1676inline uint32x4_p VecRotateRight(
const uint32x4_p vec)
1678 const uint32x4_p m = {32-C, 32-C, 32-C, 32-C};
1679 return vec_rl(vec, m);
1691template<
unsigned int C>
1692inline uint32x4_p VecShiftLeft(
const uint32x4_p vec)
1694 const uint32x4_p m = {C, C, C, C};
1695 return vec_sl(vec, m);
1707template<
unsigned int C>
1708inline uint32x4_p VecShiftRight(
const uint32x4_p vec)
1710 const uint32x4_p m = {C, C, C, C};
1711 return vec_sr(vec, m);
1715#if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
1728template<
unsigned int C>
1729inline uint64x2_p VecRotateLeft(
const uint64x2_p vec)
1731 const uint64x2_p m = {C, C};
1732 return vec_rl(vec, m);
1746template<
unsigned int C>
1747inline uint64x2_p VecShiftLeft(
const uint64x2_p vec)
1749 const uint64x2_p m = {C, C};
1750 return vec_sl(vec, m);
1764template<
unsigned int C>
1765inline uint64x2_p VecRotateRight(
const uint64x2_p vec)
1767 const uint64x2_p m = {64-C, 64-C};
1768 return vec_rl(vec, m);
1782template<
unsigned int C>
1783inline uint64x2_p VecShiftRight(
const uint64x2_p vec)
1785 const uint64x2_p m = {C, C};
1786 return vec_sr(vec, m);
1805inline T VecMergeLow(
const T vec1,
const T vec2)
1807 return vec_mergel(vec1, vec2);
1819inline T VecMergeHigh(
const T vec1,
const T vec2)
1821 return vec_mergeh(vec1, vec2);
1830inline uint32x4_p VecSplatWord(
word32 val)
1833 CRYPTOPP_UNUSED(val);
1837#if defined(_ARCH_PWR4) && defined(__GNUC__)
1838 return vec_splats(val);
1842 const word32 x[4] = {val};
1843 return vec_splat(VecLoad(x),0);
1854template <
unsigned int N>
1855inline uint32x4_p VecSplatElement(
const uint32x4_p val)
1857 return vec_splat(val, N);
1860#if defined(__VSX__) || defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
1867inline uint64x2_p VecSplatWord(word64 val)
1870 return vec_splats((
unsigned long long)val);
1880template <
unsigned int N>
1881inline uint64x2_p VecSplatElement(
const uint64x2_p val)
1883#if defined(__VSX__) || defined(_ARCH_PWR8)
1884 return vec_splat(val, N);
1889 const uint8x16_p m = {0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7};
1890 return vec_perm(val, val, m);
1894 const uint8x16_p m = {8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15};
1895 return vec_perm(val, val, m);
1913inline T VecGetLow(
const T val)
1915#if defined(CRYPTOPP_BIG_ENDIAN) && (defined(__VSX__) || defined(_ARCH_PWR8))
1917 return (T)VecMergeLow((uint64x2_p)zero, (uint64x2_p)val);
1919 return VecShiftRightOctet<8>(VecShiftLeftOctet<8>(val));
1935inline T VecGetHigh(
const T val)
1937#if defined(CRYPTOPP_BIG_ENDIAN) && (defined(__VSX__) || defined(_ARCH_PWR8))
1939 return (T)VecMergeHigh((uint64x2_p)zero, (uint64x2_p)val);
1941 return VecShiftRightOctet<8>(val);
1953inline T VecSwapWords(
const T vec)
1955 return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)vec, 8);
1974template <
class T1,
class T2>
1975inline bool VecEqual(
const T1 vec1,
const T2 vec2)
1977 return 1 == vec_all_eq((uint32x4_p)vec1, (uint32x4_p)vec2);
1991template <
class T1,
class T2>
1992inline bool VecNotEqual(
const T1 vec1,
const T2 vec2)
1994 return 0 == vec_all_eq((uint32x4_p)vec1, (uint32x4_p)vec2);
2014inline uint32x4_p VecAdd64(
const uint32x4_p& vec1,
const uint32x4_p& vec2)
2017#if defined(_ARCH_PWR8) && !defined(CRYPTOPP_DEBUG)
2018 return (uint32x4_p)vec_add((uint64x2_p)vec1, (uint64x2_p)vec2);
2023#if defined(CRYPTOPP_BIG_ENDIAN)
2024 const uint32x4_p zero = {0, 0, 0, 0};
2025 const uint32x4_p mask = {0, 1, 0, 1};
2027 const uint32x4_p zero = {0, 0, 0, 0};
2028 const uint32x4_p mask = {1, 0, 1, 0};
2031 uint32x4_p cy = vec_addc(vec1, vec2);
2032 uint32x4_p res = vec_add(vec1, vec2);
2033 cy = vec_and(mask, cy);
2034 cy = vec_sld (cy, zero, 4);
2035 return vec_add(res, cy);
2039#if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
2050inline uint64x2_p VecAdd64(
const uint64x2_p& vec1,
const uint64x2_p& vec2)
2053 const uint64x2_p res = vec_add(vec1, vec2);
2055#if defined(CRYPTOPP_DEBUG)
2057 const uint32x4_p x = (uint32x4_p)vec1;
2058 const uint32x4_p y = (uint32x4_p)vec2;
2059 const uint32x4_p r = VecAdd64(x, y);
2061 CRYPTOPP_ASSERT(vec_all_eq((uint32x4_p)res, r) == 1);
2077inline uint32x4_p VecSub64(
const uint32x4_p& vec1,
const uint32x4_p& vec2)
2079#if defined(_ARCH_PWR8) && !defined(CRYPTOPP_DEBUG)
2081 return (uint32x4_p)vec_sub((uint64x2_p)vec1, (uint64x2_p)vec2);
2086#if defined(CRYPTOPP_BIG_ENDIAN)
2087 const uint32x4_p zero = {0, 0, 0, 0};
2088 const uint32x4_p mask = {0, 1, 0, 1};
2090 const uint32x4_p zero = {0, 0, 0, 0};
2091 const uint32x4_p mask = {1, 0, 1, 0};
2096 uint32x4_p bw = vec_subc(vec1, vec2);
2097 uint32x4_p res = vec_sub(vec1, vec2);
2098 bw = vec_andc(mask, bw);
2099 bw = vec_sld (bw, zero, 4);
2100 return vec_sub(res, bw);
2104#if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
2114inline uint64x2_p VecSub64(
const uint64x2_p& vec1,
const uint64x2_p& vec2)
2117 const uint64x2_p res = vec_sub(vec1, vec2);
2119#if defined(CRYPTOPP_DEBUG)
2121 const uint32x4_p x = (uint32x4_p)vec1;
2122 const uint32x4_p y = (uint32x4_p)vec2;
2123 const uint32x4_p r = VecSub64(x, y);
2125 CRYPTOPP_ASSERT(vec_all_eq((uint32x4_p)res, r) == 1);
2141template<
unsigned int C>
2142inline uint32x4_p VecRotateLeft64(
const uint32x4_p vec)
2144#if defined(_ARCH_PWR8) && !defined(CRYPTOPP_DEBUG)
2146 return (uint32x4_p)VecRotateLeft<C>((uint64x2_p)vec);
2149 enum {S64=C&63, S32=C&31, BR=(S64>=32)};
2152 uint32x4_p t1 = VecShiftLeft<S32>(vec);
2154 uint32x4_p t2 = VecShiftRight<32-S32>(vec);
2158 const uint8x16_p m = {0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15};
2159 return VecPermute(vec, m);
2163 const uint8x16_p m = {4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
2164 return VecPermute(vec, m);
2168 const uint8x16_p m = {4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
2169 t1 = VecPermute(t1, m);
2173 const uint8x16_p m = {4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
2174 t2 = VecPermute(t2, m);
2177 return vec_or(t1, t2);
2191inline uint32x4_p VecRotateLeft64<8>(
const uint32x4_p vec)
2193#if (CRYPTOPP_BIG_ENDIAN)
2194 const uint8x16_p m = { 1,2,3,4, 5,6,7,0, 9,10,11,12, 13,14,15,8 };
2195 return VecPermute(vec, m);
2197 const uint8x16_p m = { 7,0,1,2, 3,4,5,6, 15,8,9,10, 11,12,13,14 };
2198 return VecPermute(vec, m);
2202#if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
2212template<
unsigned int C>
2213inline uint64x2_p VecRotateLeft64(
const uint64x2_p vec)
2216 const uint64x2_p res = VecRotateLeft<C>(vec);
2218#if defined(CRYPTOPP_DEBUG)
2220 const uint32x4_p x = (uint32x4_p)vec;
2221 const uint32x4_p r = VecRotateLeft64<C>(x);
2223 CRYPTOPP_ASSERT(vec_all_eq((uint32x4_p)res, r) == 1);
2239template<
unsigned int C>
2240inline uint32x4_p VecRotateRight64(
const uint32x4_p vec)
2242#if defined(_ARCH_PWR8) && !defined(CRYPTOPP_DEBUG)
2244 return (uint32x4_p)VecRotateRight<C>((uint64x2_p)vec);
2247 enum {S64=C&63, S32=C&31, BR=(S64>=32)};
2250 uint32x4_p t1 = VecShiftRight<S32>(vec);
2252 uint32x4_p t2 = VecShiftLeft<32-S32>(vec);
2256 const uint8x16_p m = {0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15};
2257 return VecPermute(vec, m);
2261 const uint8x16_p m = {4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
2262 return VecPermute(vec, m);
2266 const uint8x16_p m = {4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
2267 t1 = VecPermute(t1, m);
2271 const uint8x16_p m = {4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
2272 t2 = VecPermute(t2, m);
2275 return vec_or(t1, t2);
2290inline uint32x4_p VecRotateRight64<8>(
const uint32x4_p vec)
2292#if (CRYPTOPP_BIG_ENDIAN)
2293 const uint8x16_p m = { 7,0,1,2, 3,4,5,6, 15,8,9,10, 11,12,13,14 };
2294 return VecPermute(vec, m);
2296 const uint8x16_p m = { 1,2,3,4, 5,6,7,0, 9,10,11,12, 13,14,15,8 };
2297 return VecPermute(vec, m);
2301#if defined(__VSX__) || defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
2311template<
unsigned int C>
2312inline uint64x2_p VecRotateRight64(
const uint64x2_p vec)
2315 const uint64x2_p res = VecRotateRight<C>(vec);
2317#if defined(CRYPTOPP_DEBUG)
2319 const uint32x4_p x = (uint32x4_p)vec;
2320 const uint32x4_p r = VecRotateRight64<C>(x);
2322 CRYPTOPP_ASSERT(vec_all_eq((uint32x4_p)res, r) == 1);
2342template <
class T1,
class T2>
2343inline T1 VecAnd64(
const T1 vec1,
const T2 vec2)
2345 return (T1)vec_and(vec1, (T1)vec2);
2361template <
class T1,
class T2>
2362inline T1 VecOr64(
const T1 vec1,
const T2 vec2)
2364 return (T1)vec_or(vec1, (T1)vec2);
2380template <
class T1,
class T2>
2381inline T1 VecXor64(
const T1 vec1,
const T2 vec2)
2383 return (T1)vec_xor(vec1, (T1)vec2);
2392inline uint32x4_p VecSplatWord64(word64 val)
2394#if defined(_ARCH_PWR8)
2396 return (uint32x4_p)vec_splats((
unsigned long long)val);
2398 const word64 x[2] = {val,val};
2399 return (uint32x4_p)VecLoad((
const word32*)x);
2410template <
unsigned int N>
2411inline uint32x4_p VecSplatElement64(
const uint32x4_p val)
2413#if defined(__VSX__) || defined(_ARCH_PWR8)
2414 return (uint32x4_p)vec_splat((uint64x2_p)val, N);
2419 const uint8x16_p m = {0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7};
2420 return (uint32x4_p)vec_perm(val, val, m);
2424 const uint8x16_p m = {8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15};
2425 return (uint32x4_p)vec_perm(val, val, m);
2430#if defined(__VSX__) || defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
2436template <
unsigned int N>
2437inline uint64x2_p VecSplatElement64(
const uint64x2_p val)
2439 return vec_splat(val, N);
2449#if (defined(_ARCH_PWR8) && defined(__CRYPTO__)) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
2468inline uint32x4_p VecPolyMultiply(
const uint32x4_p& a,
const uint32x4_p& b)
2470#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
2471 return __vpmsumw (a, b);
2472#elif defined(__clang__)
2473 return __builtin_altivec_crypto_vpmsumw (a, b);
2475 return __builtin_crypto_vpmsumw (a, b);
2493inline uint64x2_p VecPolyMultiply(
const uint64x2_p& a,
const uint64x2_p& b)
2495#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
2496 return __vpmsumd (a, b);
2497#elif defined(__clang__)
2498 return __builtin_altivec_crypto_vpmsumd (a, b);
2500 return __builtin_crypto_vpmsumd (a, b);
2517inline uint64x2_p VecIntelMultiply00(
const uint64x2_p& a,
const uint64x2_p& b)
2519#if defined(CRYPTOPP_BIG_ENDIAN)
2520 return VecSwapWords(VecPolyMultiply(VecGetHigh(a), VecGetHigh(b)));
2522 return VecPolyMultiply(VecGetHigh(a), VecGetHigh(b));
2539inline uint64x2_p VecIntelMultiply01(
const uint64x2_p& a,
const uint64x2_p& b)
2541#if defined(CRYPTOPP_BIG_ENDIAN)
2542 return VecSwapWords(VecPolyMultiply(a, VecGetHigh(b)));
2544 return VecPolyMultiply(a, VecGetHigh(b));
2561inline uint64x2_p VecIntelMultiply10(
const uint64x2_p& a,
const uint64x2_p& b)
2563#if defined(CRYPTOPP_BIG_ENDIAN)
2564 return VecSwapWords(VecPolyMultiply(VecGetHigh(a), b));
2566 return VecPolyMultiply(VecGetHigh(a), b);
2583inline uint64x2_p VecIntelMultiply11(
const uint64x2_p& a,
const uint64x2_p& b)
2585#if defined(CRYPTOPP_BIG_ENDIAN)
2586 return VecSwapWords(VecPolyMultiply(VecGetLow(a), b));
2588 return VecPolyMultiply(VecGetLow(a), b);
2608template <
class T1,
class T2>
2609inline T1 VecEncrypt(
const T1 state,
const T2 key)
2611#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
2612 return (T1)__vcipher((uint8x16_p)state, (uint8x16_p)key);
2613#elif defined(__clang__)
2614 return (T1)__builtin_altivec_crypto_vcipher((uint64x2_p)state, (uint64x2_p)key);
2615#elif defined(__GNUC__)
2616 return (T1)__builtin_crypto_vcipher((uint64x2_p)state, (uint64x2_p)key);
2633template <
class T1,
class T2>
2634inline T1 VecEncryptLast(
const T1 state,
const T2 key)
2636#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
2637 return (T1)__vcipherlast((uint8x16_p)state, (uint8x16_p)key);
2638#elif defined(__clang__)
2639 return (T1)__builtin_altivec_crypto_vcipherlast((uint64x2_p)state, (uint64x2_p)key);
2640#elif defined(__GNUC__)
2641 return (T1)__builtin_crypto_vcipherlast((uint64x2_p)state, (uint64x2_p)key);
2658template <
class T1,
class T2>
2659inline T1 VecDecrypt(
const T1 state,
const T2 key)
2661#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
2662 return (T1)__vncipher((uint8x16_p)state, (uint8x16_p)key);
2663#elif defined(__clang__)
2664 return (T1)__builtin_altivec_crypto_vncipher((uint64x2_p)state, (uint64x2_p)key);
2665#elif defined(__GNUC__)
2666 return (T1)__builtin_crypto_vncipher((uint64x2_p)state, (uint64x2_p)key);
2683template <
class T1,
class T2>
2684inline T1 VecDecryptLast(
const T1 state,
const T2 key)
2686#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
2687 return (T1)__vncipherlast((uint8x16_p)state, (uint8x16_p)key);
2688#elif defined(__clang__)
2689 return (T1)__builtin_altivec_crypto_vncipherlast((uint64x2_p)state, (uint64x2_p)key);
2690#elif defined(__GNUC__)
2691 return (T1)__builtin_crypto_vncipherlast((uint64x2_p)state, (uint64x2_p)key);
2713template <
int func,
int fmask,
class T>
2714inline T VecSHA256(
const T data)
2716#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
2717 return (T)__vshasigmaw((uint32x4_p)data, func, fmask);
2718#elif defined(__clang__)
2719 return (T)__builtin_altivec_crypto_vshasigmaw((uint32x4_p)data, func, fmask);
2720#elif defined(__GNUC__)
2721 return (T)__builtin_crypto_vshasigmaw((uint32x4_p)data, func, fmask);
2738template <
int func,
int fmask,
class T>
2739inline T VecSHA512(
const T data)
2741#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
2742 return (T)__vshasigmad((uint64x2_p)data, func, fmask);
2743#elif defined(__clang__)
2744 return (T)__builtin_altivec_crypto_vshasigmad((uint64x2_p)data, func, fmask);
2745#elif defined(__GNUC__)
2746 return (T)__builtin_crypto_vshasigmad((uint64x2_p)data, func, fmask);
2760#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
2761# pragma GCC diagnostic pop
Library configuration file.
unsigned int word32
32-bit unsigned datatype
Utility functions for the Crypto++ library.
#define NCONST_V32_CAST(x)
Cast array to vector pointer.
#define CONST_V8_CAST(x)
Cast array to vector pointer.
#define NCONST_V8_CAST(x)
Cast array to vector pointer.
#define CONST_V32_CAST(x)
Cast array to vector pointer.