19#if (CRYPTOPP_SSSE3_AVAILABLE)
21# include <pmmintrin.h>
22# include <tmmintrin.h>
26# include <ammintrin.h>
28# include <x86intrin.h>
32#if (CRYPTOPP_ARM_NEON_HEADER)
37#if (CRYPTOPP_ARM_ACLE_HEADER)
46#if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
52extern const char SPECK128_SIMD_FNAME[] = __FILE__;
54ANONYMOUS_NAMESPACE_BEGIN
57using CryptoPP::word32;
58using CryptoPP::word64;
62#if (CRYPTOPP_ARM_NEON_AVAILABLE)
65#if defined(_MSC_VER) && !defined(_M_ARM64)
66inline uint64x2_t vld1q_dup_u64(
const uint64_t* ptr)
68 return vmovq_n_u64(*ptr);
73inline T UnpackHigh64(
const T& a,
const T& b)
75 const uint64x1_t x(vget_high_u64((uint64x2_t)a));
76 const uint64x1_t y(vget_high_u64((uint64x2_t)b));
77 return (T)vcombine_u64(x, y);
81inline T UnpackLow64(
const T& a,
const T& b)
83 const uint64x1_t x(vget_low_u64((uint64x2_t)a));
84 const uint64x1_t y(vget_low_u64((uint64x2_t)b));
85 return (T)vcombine_u64(x, y);
88template <
unsigned int R>
89inline uint64x2_t RotateLeft64(
const uint64x2_t& val)
91 const uint64x2_t a(vshlq_n_u64(val, R));
92 const uint64x2_t b(vshrq_n_u64(val, 64 - R));
93 return vorrq_u64(a, b);
96template <
unsigned int R>
97inline uint64x2_t RotateRight64(
const uint64x2_t& val)
99 const uint64x2_t a(vshlq_n_u64(val, 64 - R));
100 const uint64x2_t b(vshrq_n_u64(val, R));
101 return vorrq_u64(a, b);
104#if defined(__aarch32__) || defined(__aarch64__)
107inline uint64x2_t RotateLeft64<8>(
const uint64x2_t& val)
109 const uint8_t maskb[16] = { 7,0,1,2, 3,4,5,6, 15,8,9,10, 11,12,13,14 };
110 const uint8x16_t mask = vld1q_u8(maskb);
112 return vreinterpretq_u64_u8(
113 vqtbl1q_u8(vreinterpretq_u8_u64(val), mask));
118inline uint64x2_t RotateRight64<8>(
const uint64x2_t& val)
120 const uint8_t maskb[16] = { 1,2,3,4, 5,6,7,0, 9,10,11,12, 13,14,15,8 };
121 const uint8x16_t mask = vld1q_u8(maskb);
123 return vreinterpretq_u64_u8(
124 vqtbl1q_u8(vreinterpretq_u8_u64(val), mask));
128inline void SPECK128_Enc_Block(uint64x2_t &block0, uint64x2_t &block1,
129 const word64 *subkeys,
unsigned int rounds)
132 uint64x2_t x1 = UnpackHigh64(block0, block1);
133 uint64x2_t y1 = UnpackLow64(block0, block1);
135 for (
size_t i=0; i < static_cast<size_t>(rounds); ++i)
137 const uint64x2_t rk = vld1q_dup_u64(subkeys+i);
139 x1 = RotateRight64<8>(x1);
140 x1 = vaddq_u64(x1, y1);
141 x1 = veorq_u64(x1, rk);
142 y1 = RotateLeft64<3>(y1);
143 y1 = veorq_u64(y1, x1);
147 block0 = UnpackLow64(y1, x1);
148 block1 = UnpackHigh64(y1, x1);
151inline void SPECK128_Enc_6_Blocks(uint64x2_t &block0, uint64x2_t &block1,
152 uint64x2_t &block2, uint64x2_t &block3, uint64x2_t &block4, uint64x2_t &block5,
153 const word64 *subkeys,
unsigned int rounds)
156 uint64x2_t x1 = UnpackHigh64(block0, block1);
157 uint64x2_t y1 = UnpackLow64(block0, block1);
158 uint64x2_t x2 = UnpackHigh64(block2, block3);
159 uint64x2_t y2 = UnpackLow64(block2, block3);
160 uint64x2_t x3 = UnpackHigh64(block4, block5);
161 uint64x2_t y3 = UnpackLow64(block4, block5);
163 for (
size_t i=0; i < static_cast<size_t>(rounds); ++i)
165 const uint64x2_t rk = vld1q_dup_u64(subkeys+i);
167 x1 = RotateRight64<8>(x1);
168 x2 = RotateRight64<8>(x2);
169 x3 = RotateRight64<8>(x3);
170 x1 = vaddq_u64(x1, y1);
171 x2 = vaddq_u64(x2, y2);
172 x3 = vaddq_u64(x3, y3);
173 x1 = veorq_u64(x1, rk);
174 x2 = veorq_u64(x2, rk);
175 x3 = veorq_u64(x3, rk);
176 y1 = RotateLeft64<3>(y1);
177 y2 = RotateLeft64<3>(y2);
178 y3 = RotateLeft64<3>(y3);
179 y1 = veorq_u64(y1, x1);
180 y2 = veorq_u64(y2, x2);
181 y3 = veorq_u64(y3, x3);
185 block0 = UnpackLow64(y1, x1);
186 block1 = UnpackHigh64(y1, x1);
187 block2 = UnpackLow64(y2, x2);
188 block3 = UnpackHigh64(y2, x2);
189 block4 = UnpackLow64(y3, x3);
190 block5 = UnpackHigh64(y3, x3);
193inline void SPECK128_Dec_Block(uint64x2_t &block0, uint64x2_t &block1,
194 const word64 *subkeys,
unsigned int rounds)
197 uint64x2_t x1 = UnpackHigh64(block0, block1);
198 uint64x2_t y1 = UnpackLow64(block0, block1);
200 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
202 const uint64x2_t rk = vld1q_dup_u64(subkeys+i);
204 y1 = veorq_u64(y1, x1);
205 y1 = RotateRight64<3>(y1);
206 x1 = veorq_u64(x1, rk);
207 x1 = vsubq_u64(x1, y1);
208 x1 = RotateLeft64<8>(x1);
212 block0 = UnpackLow64(y1, x1);
213 block1 = UnpackHigh64(y1, x1);
216inline void SPECK128_Dec_6_Blocks(uint64x2_t &block0, uint64x2_t &block1,
217 uint64x2_t &block2, uint64x2_t &block3, uint64x2_t &block4, uint64x2_t &block5,
218 const word64 *subkeys,
unsigned int rounds)
221 uint64x2_t x1 = UnpackHigh64(block0, block1);
222 uint64x2_t y1 = UnpackLow64(block0, block1);
223 uint64x2_t x2 = UnpackHigh64(block2, block3);
224 uint64x2_t y2 = UnpackLow64(block2, block3);
225 uint64x2_t x3 = UnpackHigh64(block4, block5);
226 uint64x2_t y3 = UnpackLow64(block4, block5);
228 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
230 const uint64x2_t rk = vld1q_dup_u64(subkeys+i);
232 y1 = veorq_u64(y1, x1);
233 y2 = veorq_u64(y2, x2);
234 y3 = veorq_u64(y3, x3);
235 y1 = RotateRight64<3>(y1);
236 y2 = RotateRight64<3>(y2);
237 y3 = RotateRight64<3>(y3);
238 x1 = veorq_u64(x1, rk);
239 x2 = veorq_u64(x2, rk);
240 x3 = veorq_u64(x3, rk);
241 x1 = vsubq_u64(x1, y1);
242 x2 = vsubq_u64(x2, y2);
243 x3 = vsubq_u64(x3, y3);
244 x1 = RotateLeft64<8>(x1);
245 x2 = RotateLeft64<8>(x2);
246 x3 = RotateLeft64<8>(x3);
250 block0 = UnpackLow64(y1, x1);
251 block1 = UnpackHigh64(y1, x1);
252 block2 = UnpackLow64(y2, x2);
253 block3 = UnpackHigh64(y2, x2);
254 block4 = UnpackLow64(y3, x3);
255 block5 = UnpackHigh64(y3, x3);
262#if defined(CRYPTOPP_SSSE3_AVAILABLE)
266# define DOUBLE_CAST(x) ((double *)(void *)(x))
268#ifndef CONST_DOUBLE_CAST
269# define CONST_DOUBLE_CAST(x) ((const double *)(const void *)(x))
272template <
unsigned int R>
273inline __m128i RotateLeft64(
const __m128i& val)
276 return _mm_roti_epi64(val, R);
279 _mm_slli_epi64(val, R), _mm_srli_epi64(val, 64-R));
283template <
unsigned int R>
284inline __m128i RotateRight64(
const __m128i& val)
287 return _mm_roti_epi64(val, 64-R);
290 _mm_slli_epi64(val, 64-R), _mm_srli_epi64(val, R));
296__m128i RotateLeft64<8>(
const __m128i& val)
299 return _mm_roti_epi64(val, 8);
301 const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7);
302 return _mm_shuffle_epi8(val, mask);
308__m128i RotateRight64<8>(
const __m128i& val)
311 return _mm_roti_epi64(val, 64-8);
313 const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1);
314 return _mm_shuffle_epi8(val, mask);
318inline void SPECK128_Enc_Block(__m128i &block0, __m128i &block1,
319 const word64 *subkeys,
unsigned int rounds)
322 __m128i x1 = _mm_unpackhi_epi64(block0, block1);
323 __m128i y1 = _mm_unpacklo_epi64(block0, block1);
325 for (
size_t i=0; i < static_cast<size_t>(rounds); ++i)
328 const __m128i rk = _mm_load_si128(CONST_M128_CAST(subkeys+i*2));
330 x1 = RotateRight64<8>(x1);
331 x1 = _mm_add_epi64(x1, y1);
332 x1 = _mm_xor_si128(x1, rk);
333 y1 = RotateLeft64<3>(y1);
334 y1 = _mm_xor_si128(y1, x1);
338 block0 = _mm_unpacklo_epi64(y1, x1);
339 block1 = _mm_unpackhi_epi64(y1, x1);
342inline void SPECK128_Enc_6_Blocks(__m128i &block0, __m128i &block1,
343 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
344 const word64 *subkeys,
unsigned int rounds)
347 __m128i x1 = _mm_unpackhi_epi64(block0, block1);
348 __m128i y1 = _mm_unpacklo_epi64(block0, block1);
349 __m128i x2 = _mm_unpackhi_epi64(block2, block3);
350 __m128i y2 = _mm_unpacklo_epi64(block2, block3);
351 __m128i x3 = _mm_unpackhi_epi64(block4, block5);
352 __m128i y3 = _mm_unpacklo_epi64(block4, block5);
354 for (
size_t i=0; i < static_cast<size_t>(rounds); ++i)
357 const __m128i rk = _mm_load_si128(CONST_M128_CAST(subkeys+i*2));
359 x1 = RotateRight64<8>(x1);
360 x2 = RotateRight64<8>(x2);
361 x3 = RotateRight64<8>(x3);
362 x1 = _mm_add_epi64(x1, y1);
363 x2 = _mm_add_epi64(x2, y2);
364 x3 = _mm_add_epi64(x3, y3);
365 x1 = _mm_xor_si128(x1, rk);
366 x2 = _mm_xor_si128(x2, rk);
367 x3 = _mm_xor_si128(x3, rk);
368 y1 = RotateLeft64<3>(y1);
369 y2 = RotateLeft64<3>(y2);
370 y3 = RotateLeft64<3>(y3);
371 y1 = _mm_xor_si128(y1, x1);
372 y2 = _mm_xor_si128(y2, x2);
373 y3 = _mm_xor_si128(y3, x3);
377 block0 = _mm_unpacklo_epi64(y1, x1);
378 block1 = _mm_unpackhi_epi64(y1, x1);
379 block2 = _mm_unpacklo_epi64(y2, x2);
380 block3 = _mm_unpackhi_epi64(y2, x2);
381 block4 = _mm_unpacklo_epi64(y3, x3);
382 block5 = _mm_unpackhi_epi64(y3, x3);
385inline void SPECK128_Dec_Block(__m128i &block0, __m128i &block1,
386 const word64 *subkeys,
unsigned int rounds)
389 __m128i x1 = _mm_unpackhi_epi64(block0, block1);
390 __m128i y1 = _mm_unpacklo_epi64(block0, block1);
392 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
394 const __m128i rk = _mm_castpd_si128(
395 _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i)));
397 y1 = _mm_xor_si128(y1, x1);
398 y1 = RotateRight64<3>(y1);
399 x1 = _mm_xor_si128(x1, rk);
400 x1 = _mm_sub_epi64(x1, y1);
401 x1 = RotateLeft64<8>(x1);
405 block0 = _mm_unpacklo_epi64(y1, x1);
406 block1 = _mm_unpackhi_epi64(y1, x1);
409inline void SPECK128_Dec_6_Blocks(__m128i &block0, __m128i &block1,
410 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
411 const word64 *subkeys,
unsigned int rounds)
414 __m128i x1 = _mm_unpackhi_epi64(block0, block1);
415 __m128i y1 = _mm_unpacklo_epi64(block0, block1);
416 __m128i x2 = _mm_unpackhi_epi64(block2, block3);
417 __m128i y2 = _mm_unpacklo_epi64(block2, block3);
418 __m128i x3 = _mm_unpackhi_epi64(block4, block5);
419 __m128i y3 = _mm_unpacklo_epi64(block4, block5);
421 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
423 const __m128i rk = _mm_castpd_si128(
424 _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i)));
426 y1 = _mm_xor_si128(y1, x1);
427 y2 = _mm_xor_si128(y2, x2);
428 y3 = _mm_xor_si128(y3, x3);
429 y1 = RotateRight64<3>(y1);
430 y2 = RotateRight64<3>(y2);
431 y3 = RotateRight64<3>(y3);
432 x1 = _mm_xor_si128(x1, rk);
433 x2 = _mm_xor_si128(x2, rk);
434 x3 = _mm_xor_si128(x3, rk);
435 x1 = _mm_sub_epi64(x1, y1);
436 x2 = _mm_sub_epi64(x2, y2);
437 x3 = _mm_sub_epi64(x3, y3);
438 x1 = RotateLeft64<8>(x1);
439 x2 = RotateLeft64<8>(x2);
440 x3 = RotateLeft64<8>(x3);
444 block0 = _mm_unpacklo_epi64(y1, x1);
445 block1 = _mm_unpackhi_epi64(y1, x1);
446 block2 = _mm_unpacklo_epi64(y2, x2);
447 block3 = _mm_unpackhi_epi64(y2, x2);
448 block4 = _mm_unpacklo_epi64(y3, x3);
449 block5 = _mm_unpackhi_epi64(y3, x3);
456#if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
471using CryptoPP::uint8x16_p;
472using CryptoPP::uint32x4_p;
473#if defined(_ARCH_PWR8)
474using CryptoPP::uint64x2_p;
477using CryptoPP::VecAdd64;
478using CryptoPP::VecSub64;
479using CryptoPP::VecAnd64;
480using CryptoPP::VecOr64;
481using CryptoPP::VecXor64;
482using CryptoPP::VecSplatWord64;
483using CryptoPP::VecRotateLeft64;
484using CryptoPP::VecRotateRight64;
485using CryptoPP::VecLoad;
486using CryptoPP::VecLoadAligned;
487using CryptoPP::VecPermute;
489#if defined(_ARCH_PWR8)
490#define speck128_t uint64x2_p
492#define speck128_t uint32x4_p
495void SPECK128_Enc_Block(uint32x4_p &block,
const word64 *subkeys,
unsigned int rounds)
497#if (CRYPTOPP_BIG_ENDIAN)
498 const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
499 const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
501 const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
502 const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
506 speck128_t x1 = (speck128_t)VecPermute(block, block, m1);
507 speck128_t y1 = (speck128_t)VecPermute(block, block, m2);
509 for (
size_t i=0; i < static_cast<size_t>(rounds); ++i)
512 const word32* ptr =
reinterpret_cast<const word32*
>(subkeys+i*2);
513 const speck128_t rk = (speck128_t)VecLoadAligned(ptr);
515 x1 = (speck128_t)VecRotateRight64<8>(x1);
516 x1 = (speck128_t)VecAdd64(x1, y1);
517 x1 = (speck128_t)VecXor64(x1, rk);
519 y1 = (speck128_t)VecRotateLeft64<3>(y1);
520 y1 = (speck128_t)VecXor64(y1, x1);
523#if (CRYPTOPP_BIG_ENDIAN)
524 const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
527 const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
532 block = (uint32x4_p)VecPermute(x1, y1, m3);
535void SPECK128_Dec_Block(uint32x4_p &block,
const word64 *subkeys,
unsigned int rounds)
537#if (CRYPTOPP_BIG_ENDIAN)
538 const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
539 const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
541 const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
542 const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
546 speck128_t x1 = (speck128_t)VecPermute(block, block, m1);
547 speck128_t y1 = (speck128_t)VecPermute(block, block, m2);
549 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
551 const speck128_t rk = (speck128_t)VecSplatWord64(subkeys[i]);
553 y1 = (speck128_t)VecXor64(y1, x1);
554 y1 = (speck128_t)VecRotateRight64<3>(y1);
555 x1 = (speck128_t)VecXor64(x1, rk);
556 x1 = (speck128_t)VecSub64(x1, y1);
557 x1 = (speck128_t)VecRotateLeft64<8>(x1);
560#if (CRYPTOPP_BIG_ENDIAN)
561 const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
564 const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
569 block = (uint32x4_p)VecPermute(x1, y1, m3);
572void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
573 uint32x4_p &block2, uint32x4_p &block3, uint32x4_p &block4,
574 uint32x4_p &block5,
const word64 *subkeys,
unsigned int rounds)
576#if (CRYPTOPP_BIG_ENDIAN)
577 const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
578 const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
580 const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
581 const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
585 speck128_t x1 = (speck128_t)VecPermute(block0, block1, m1);
586 speck128_t y1 = (speck128_t)VecPermute(block0, block1, m2);
587 speck128_t x2 = (speck128_t)VecPermute(block2, block3, m1);
588 speck128_t y2 = (speck128_t)VecPermute(block2, block3, m2);
589 speck128_t x3 = (speck128_t)VecPermute(block4, block5, m1);
590 speck128_t y3 = (speck128_t)VecPermute(block4, block5, m2);
592 for (
size_t i=0; i < static_cast<size_t>(rounds); ++i)
595 const word32* ptr =
reinterpret_cast<const word32*
>(subkeys+i*2);
596 const speck128_t rk = (speck128_t)VecLoadAligned(ptr);
598 x1 = (speck128_t)VecRotateRight64<8>(x1);
599 x2 = (speck128_t)VecRotateRight64<8>(x2);
600 x3 = (speck128_t)VecRotateRight64<8>(x3);
601 x1 = (speck128_t)VecAdd64(x1, y1);
602 x2 = (speck128_t)VecAdd64(x2, y2);
603 x3 = (speck128_t)VecAdd64(x3, y3);
604 x1 = (speck128_t)VecXor64(x1, rk);
605 x2 = (speck128_t)VecXor64(x2, rk);
606 x3 = (speck128_t)VecXor64(x3, rk);
608 y1 = (speck128_t)VecRotateLeft64<3>(y1);
609 y2 = (speck128_t)VecRotateLeft64<3>(y2);
610 y3 = (speck128_t)VecRotateLeft64<3>(y3);
611 y1 = (speck128_t)VecXor64(y1, x1);
612 y2 = (speck128_t)VecXor64(y2, x2);
613 y3 = (speck128_t)VecXor64(y3, x3);
616#if (CRYPTOPP_BIG_ENDIAN)
617 const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
618 const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
620 const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
621 const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
625 block0 = (uint32x4_p)VecPermute(x1, y1, m3);
626 block1 = (uint32x4_p)VecPermute(x1, y1, m4);
627 block2 = (uint32x4_p)VecPermute(x2, y2, m3);
628 block3 = (uint32x4_p)VecPermute(x2, y2, m4);
629 block4 = (uint32x4_p)VecPermute(x3, y3, m3);
630 block5 = (uint32x4_p)VecPermute(x3, y3, m4);
633void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
634 uint32x4_p &block2, uint32x4_p &block3, uint32x4_p &block4,
635 uint32x4_p &block5,
const word64 *subkeys,
unsigned int rounds)
637#if (CRYPTOPP_BIG_ENDIAN)
638 const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
639 const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
641 const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
642 const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
646 speck128_t x1 = (speck128_t)VecPermute(block0, block1, m1);
647 speck128_t y1 = (speck128_t)VecPermute(block0, block1, m2);
648 speck128_t x2 = (speck128_t)VecPermute(block2, block3, m1);
649 speck128_t y2 = (speck128_t)VecPermute(block2, block3, m2);
650 speck128_t x3 = (speck128_t)VecPermute(block4, block5, m1);
651 speck128_t y3 = (speck128_t)VecPermute(block4, block5, m2);
653 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
655 const speck128_t rk = (speck128_t)VecSplatWord64(subkeys[i]);
657 y1 = (speck128_t)VecXor64(y1, x1);
658 y2 = (speck128_t)VecXor64(y2, x2);
659 y3 = (speck128_t)VecXor64(y3, x3);
660 y1 = (speck128_t)VecRotateRight64<3>(y1);
661 y2 = (speck128_t)VecRotateRight64<3>(y2);
662 y3 = (speck128_t)VecRotateRight64<3>(y3);
664 x1 = (speck128_t)VecXor64(x1, rk);
665 x2 = (speck128_t)VecXor64(x2, rk);
666 x3 = (speck128_t)VecXor64(x3, rk);
667 x1 = (speck128_t)VecSub64(x1, y1);
668 x2 = (speck128_t)VecSub64(x2, y2);
669 x3 = (speck128_t)VecSub64(x3, y3);
670 x1 = (speck128_t)VecRotateLeft64<8>(x1);
671 x2 = (speck128_t)VecRotateLeft64<8>(x2);
672 x3 = (speck128_t)VecRotateLeft64<8>(x3);
675#if (CRYPTOPP_BIG_ENDIAN)
676 const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
677 const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
679 const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
680 const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
684 block0 = (uint32x4_p)VecPermute(x1, y1, m3);
685 block1 = (uint32x4_p)VecPermute(x1, y1, m4);
686 block2 = (uint32x4_p)VecPermute(x2, y2, m3);
687 block3 = (uint32x4_p)VecPermute(x2, y2, m4);
688 block4 = (uint32x4_p)VecPermute(x3, y3, m3);
689 block5 = (uint32x4_p)VecPermute(x3, y3, m4);
694ANONYMOUS_NAMESPACE_END
698NAMESPACE_BEGIN(CryptoPP)
702#if (CRYPTOPP_ARM_NEON_AVAILABLE)
703size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(
const word64* subKeys,
size_t rounds,
704 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
706 return AdvancedProcessBlocks128_6x2_NEON(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
707 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
710size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(
const word64* subKeys,
size_t rounds,
711 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
713 return AdvancedProcessBlocks128_6x2_NEON(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
714 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
720#if (CRYPTOPP_SSSE3_AVAILABLE)
721size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(
const word64* subKeys,
size_t rounds,
722 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
724 return AdvancedProcessBlocks128_6x2_SSE(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
725 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
728size_t SPECK128_Dec_AdvancedProcessBlocks_SSSE3(
const word64* subKeys,
size_t rounds,
729 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
731 return AdvancedProcessBlocks128_6x2_SSE(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
732 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
738#if (CRYPTOPP_ALTIVEC_AVAILABLE)
739size_t SPECK128_Enc_AdvancedProcessBlocks_ALTIVEC(
const word64* subKeys,
size_t rounds,
740 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
742 return AdvancedProcessBlocks128_6x1_ALTIVEC(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
743 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
746size_t SPECK128_Dec_AdvancedProcessBlocks_ALTIVEC(
const word64* subKeys,
size_t rounds,
747 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
749 return AdvancedProcessBlocks128_6x1_ALTIVEC(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
750 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
Template for AdvancedProcessBlocks and SIMD processing.
Library configuration file.
unsigned int word32
32-bit unsigned datatype
Utility functions for the Crypto++ library.
Support functions for PowerPC and vector operations.
Classes for the Speck block cipher.