19 #if (CRYPTOPP_SSSE3_AVAILABLE) 21 # include <pmmintrin.h> 22 # include <tmmintrin.h> 25 #if (CRYPTOPP_SSE41_AVAILABLE) 26 # include <smmintrin.h> 30 # include <ammintrin.h> 33 #if defined(__AVX512F__) 34 # define CRYPTOPP_AVX512_ROTATE 1 35 # include <immintrin.h> 39 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 42 # include <arm_neon.h> 46 #if (CRYPTOPP_ARM_ACLE_AVAILABLE) 48 # include <arm_acle.h> 51 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 57 extern const char SPECK64_SIMD_FNAME[] = __FILE__;
59 ANONYMOUS_NAMESPACE_BEGIN
62 using CryptoPP::word32;
63 using CryptoPP::word64;
67 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 70 inline T UnpackHigh32(
const T& a,
const T& b)
72 const uint32x2_t x(vget_high_u32((uint32x4_t)a));
73 const uint32x2_t y(vget_high_u32((uint32x4_t)b));
74 const uint32x2x2_t r = vzip_u32(x, y);
75 return (T)vcombine_u32(r.val[0], r.val[1]);
79 inline T UnpackLow32(
const T& a,
const T& b)
81 const uint32x2_t x(vget_low_u32((uint32x4_t)a));
82 const uint32x2_t y(vget_low_u32((uint32x4_t)b));
83 const uint32x2x2_t r = vzip_u32(x, y);
84 return (T)vcombine_u32(r.val[0], r.val[1]);
87 template <
unsigned int R>
88 inline uint32x4_t RotateLeft32(
const uint32x4_t& val)
90 const uint32x4_t a(vshlq_n_u32(val, R));
91 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
92 return vorrq_u32(a, b);
95 template <
unsigned int R>
96 inline uint32x4_t RotateRight32(
const uint32x4_t& val)
98 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
99 const uint32x4_t b(vshrq_n_u32(val, R));
100 return vorrq_u32(a, b);
103 #if defined(__aarch32__) || defined(__aarch64__) 106 inline uint32x4_t RotateLeft32<8>(
const uint32x4_t& val)
108 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
109 const uint8x16_t mask = vld1q_u8(maskb);
111 return vreinterpretq_u32_u8(
112 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
117 inline uint32x4_t RotateRight32<8>(
const uint32x4_t& val)
119 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,15,12 };
120 const uint8x16_t mask = vld1q_u8(maskb);
122 return vreinterpretq_u32_u8(
123 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
125 #endif // Aarch32 or Aarch64 127 inline void SPECK64_Enc_Block(uint32x4_t &block0, uint32x4_t &block1,
128 const word32 *subkeys,
unsigned int rounds)
131 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
132 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
134 for (
int i=0; i < static_cast<int>(rounds); ++i)
136 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
138 x1 = RotateRight32<8>(x1);
139 x1 = vaddq_u32(x1, y1);
140 x1 = veorq_u32(x1, rk);
141 y1 = RotateLeft32<3>(y1);
142 y1 = veorq_u32(y1, x1);
146 block0 = UnpackLow32(y1, x1);
147 block1 = UnpackHigh32(y1, x1);
150 inline void SPECK64_Dec_Block(uint32x4_t &block0, uint32x4_t &block1,
151 const word32 *subkeys,
unsigned int rounds)
154 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
155 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
157 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
159 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
161 y1 = veorq_u32(y1, x1);
162 y1 = RotateRight32<3>(y1);
163 x1 = veorq_u32(x1, rk);
164 x1 = vsubq_u32(x1, y1);
165 x1 = RotateLeft32<8>(x1);
169 block0 = UnpackLow32(y1, x1);
170 block1 = UnpackHigh32(y1, x1);
173 inline void SPECK64_Enc_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
174 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
175 const word32 *subkeys,
unsigned int rounds)
178 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
179 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
180 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
181 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
182 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
183 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
185 for (
int i=0; i < static_cast<int>(rounds); ++i)
187 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
189 x1 = RotateRight32<8>(x1);
190 x2 = RotateRight32<8>(x2);
191 x3 = RotateRight32<8>(x3);
192 x1 = vaddq_u32(x1, y1);
193 x2 = vaddq_u32(x2, y2);
194 x3 = vaddq_u32(x3, y3);
195 x1 = veorq_u32(x1, rk);
196 x2 = veorq_u32(x2, rk);
197 x3 = veorq_u32(x3, rk);
198 y1 = RotateLeft32<3>(y1);
199 y2 = RotateLeft32<3>(y2);
200 y3 = RotateLeft32<3>(y3);
201 y1 = veorq_u32(y1, x1);
202 y2 = veorq_u32(y2, x2);
203 y3 = veorq_u32(y3, x3);
207 block0 = UnpackLow32(y1, x1);
208 block1 = UnpackHigh32(y1, x1);
209 block2 = UnpackLow32(y2, x2);
210 block3 = UnpackHigh32(y2, x2);
211 block4 = UnpackLow32(y3, x3);
212 block5 = UnpackHigh32(y3, x3);
215 inline void SPECK64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
216 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
217 const word32 *subkeys,
unsigned int rounds)
220 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
221 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
222 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
223 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
224 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
225 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
227 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
229 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
231 y1 = veorq_u32(y1, x1);
232 y2 = veorq_u32(y2, x2);
233 y3 = veorq_u32(y3, x3);
234 y1 = RotateRight32<3>(y1);
235 y2 = RotateRight32<3>(y2);
236 y3 = RotateRight32<3>(y3);
237 x1 = veorq_u32(x1, rk);
238 x2 = veorq_u32(x2, rk);
239 x3 = veorq_u32(x3, rk);
240 x1 = vsubq_u32(x1, y1);
241 x2 = vsubq_u32(x2, y2);
242 x3 = vsubq_u32(x3, y3);
243 x1 = RotateLeft32<8>(x1);
244 x2 = RotateLeft32<8>(x2);
245 x3 = RotateLeft32<8>(x3);
249 block0 = UnpackLow32(y1, x1);
250 block1 = UnpackHigh32(y1, x1);
251 block2 = UnpackLow32(y2, x2);
252 block3 = UnpackHigh32(y2, x2);
253 block4 = UnpackLow32(y3, x3);
254 block5 = UnpackHigh32(y3, x3);
257 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 261 #if defined(CRYPTOPP_SSE41_AVAILABLE) 263 template <
unsigned int R>
264 inline __m128i RotateLeft32(
const __m128i& val)
267 return _mm_roti_epi32(val, R);
270 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
274 template <
unsigned int R>
275 inline __m128i RotateRight32(
const __m128i& val)
278 return _mm_roti_epi32(val, 32-R);
281 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
287 __m128i RotateLeft32<8>(
const __m128i& val)
290 return _mm_roti_epi32(val, 8);
292 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
293 return _mm_shuffle_epi8(val, mask);
299 __m128i RotateRight32<8>(
const __m128i& val)
302 return _mm_roti_epi32(val, 32-8);
304 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
305 return _mm_shuffle_epi8(val, mask);
309 inline void SPECK64_Enc_Block(__m128i &block0, __m128i &block1,
310 const word32 *subkeys,
unsigned int rounds)
313 const __m128 t0 = _mm_castsi128_ps(block0);
314 const __m128 t1 = _mm_castsi128_ps(block1);
315 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
316 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
318 for (
int i=0; i < static_cast<int>(rounds); ++i)
320 const __m128i rk = _mm_set1_epi32(subkeys[i]);
322 x1 = RotateRight32<8>(x1);
323 x1 = _mm_add_epi32(x1, y1);
324 x1 = _mm_xor_si128(x1, rk);
325 y1 = RotateLeft32<3>(y1);
326 y1 = _mm_xor_si128(y1, x1);
331 block0 = _mm_unpacklo_epi32(y1, x1);
332 block1 = _mm_unpackhi_epi32(y1, x1);
335 inline void SPECK64_Dec_Block(__m128i &block0, __m128i &block1,
336 const word32 *subkeys,
unsigned int rounds)
339 const __m128 t0 = _mm_castsi128_ps(block0);
340 const __m128 t1 = _mm_castsi128_ps(block1);
341 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
342 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
344 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
346 const __m128i rk = _mm_set1_epi32(subkeys[i]);
348 y1 = _mm_xor_si128(y1, x1);
349 y1 = RotateRight32<3>(y1);
350 x1 = _mm_xor_si128(x1, rk);
351 x1 = _mm_sub_epi32(x1, y1);
352 x1 = RotateLeft32<8>(x1);
357 block0 = _mm_unpacklo_epi32(y1, x1);
358 block1 = _mm_unpackhi_epi32(y1, x1);
361 inline void SPECK64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
362 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
363 const word32 *subkeys,
unsigned int rounds)
366 const __m128 t0 = _mm_castsi128_ps(block0);
367 const __m128 t1 = _mm_castsi128_ps(block1);
368 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
369 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
371 const __m128 t2 = _mm_castsi128_ps(block2);
372 const __m128 t3 = _mm_castsi128_ps(block3);
373 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
374 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
376 const __m128 t4 = _mm_castsi128_ps(block4);
377 const __m128 t5 = _mm_castsi128_ps(block5);
378 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
379 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
381 for (
int i=0; i < static_cast<int>(rounds); ++i)
383 const __m128i rk = _mm_set1_epi32(subkeys[i]);
385 x1 = RotateRight32<8>(x1);
386 x2 = RotateRight32<8>(x2);
387 x3 = RotateRight32<8>(x3);
388 x1 = _mm_add_epi32(x1, y1);
389 x2 = _mm_add_epi32(x2, y2);
390 x3 = _mm_add_epi32(x3, y3);
391 x1 = _mm_xor_si128(x1, rk);
392 x2 = _mm_xor_si128(x2, rk);
393 x3 = _mm_xor_si128(x3, rk);
394 y1 = RotateLeft32<3>(y1);
395 y2 = RotateLeft32<3>(y2);
396 y3 = RotateLeft32<3>(y3);
397 y1 = _mm_xor_si128(y1, x1);
398 y2 = _mm_xor_si128(y2, x2);
399 y3 = _mm_xor_si128(y3, x3);
404 block0 = _mm_unpacklo_epi32(y1, x1);
405 block1 = _mm_unpackhi_epi32(y1, x1);
406 block2 = _mm_unpacklo_epi32(y2, x2);
407 block3 = _mm_unpackhi_epi32(y2, x2);
408 block4 = _mm_unpacklo_epi32(y3, x3);
409 block5 = _mm_unpackhi_epi32(y3, x3);
412 inline void SPECK64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
413 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
414 const word32 *subkeys,
unsigned int rounds)
417 const __m128 t0 = _mm_castsi128_ps(block0);
418 const __m128 t1 = _mm_castsi128_ps(block1);
419 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
420 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
422 const __m128 t2 = _mm_castsi128_ps(block2);
423 const __m128 t3 = _mm_castsi128_ps(block3);
424 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
425 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
427 const __m128 t4 = _mm_castsi128_ps(block4);
428 const __m128 t5 = _mm_castsi128_ps(block5);
429 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
430 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
432 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
434 const __m128i rk = _mm_set1_epi32(subkeys[i]);
436 y1 = _mm_xor_si128(y1, x1);
437 y2 = _mm_xor_si128(y2, x2);
438 y3 = _mm_xor_si128(y3, x3);
439 y1 = RotateRight32<3>(y1);
440 y2 = RotateRight32<3>(y2);
441 y3 = RotateRight32<3>(y3);
442 x1 = _mm_xor_si128(x1, rk);
443 x2 = _mm_xor_si128(x2, rk);
444 x3 = _mm_xor_si128(x3, rk);
445 x1 = _mm_sub_epi32(x1, y1);
446 x2 = _mm_sub_epi32(x2, y2);
447 x3 = _mm_sub_epi32(x3, y3);
448 x1 = RotateLeft32<8>(x1);
449 x2 = RotateLeft32<8>(x2);
450 x3 = RotateLeft32<8>(x3);
455 block0 = _mm_unpacklo_epi32(y1, x1);
456 block1 = _mm_unpackhi_epi32(y1, x1);
457 block2 = _mm_unpacklo_epi32(y2, x2);
458 block3 = _mm_unpackhi_epi32(y2, x2);
459 block4 = _mm_unpacklo_epi32(y3, x3);
460 block5 = _mm_unpackhi_epi32(y3, x3);
463 #endif // CRYPTOPP_SSE41_AVAILABLE 467 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 478 template<
unsigned int C>
482 return vec_rl(val, m);
486 template<
unsigned int C>
489 const uint32x4_p m = {32-C, 32-C, 32-C, 32-C};
490 return vec_rl(val, m);
494 const word32 *subkeys,
unsigned int rounds)
496 #if (CRYPTOPP_BIG_ENDIAN) 497 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
498 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
500 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
501 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
508 for (
int i=0; i < static_cast<int>(rounds); ++i)
510 #if CRYPTOPP_POWER8_AVAILABLE 514 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
519 x1 = RotateRight32<8>(x1);
523 y1 = RotateLeft32<3>(y1);
527 #if (CRYPTOPP_BIG_ENDIAN) 528 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
529 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
531 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
532 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
541 const word32 *subkeys,
unsigned int rounds)
543 #if (CRYPTOPP_BIG_ENDIAN) 544 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
545 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
547 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
548 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
555 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
557 #if CRYPTOPP_POWER8_AVAILABLE 561 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
567 y1 = RotateRight32<3>(y1);
571 x1 = RotateLeft32<8>(x1);
574 #if (CRYPTOPP_BIG_ENDIAN) 575 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
576 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
578 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
579 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
589 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
591 #if (CRYPTOPP_BIG_ENDIAN) 592 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
593 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
595 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
596 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
607 for (
int i=0; i < static_cast<int>(rounds); ++i)
609 #if CRYPTOPP_POWER8_AVAILABLE 613 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
618 x1 = RotateRight32<8>(x1);
619 x2 = RotateRight32<8>(x2);
620 x3 = RotateRight32<8>(x3);
630 y1 = RotateLeft32<3>(y1);
631 y2 = RotateLeft32<3>(y2);
632 y3 = RotateLeft32<3>(y3);
639 #if (CRYPTOPP_BIG_ENDIAN) 640 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
641 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
643 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
644 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
658 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
660 #if (CRYPTOPP_BIG_ENDIAN) 661 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
662 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
664 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
665 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
676 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
678 #if CRYPTOPP_POWER8_AVAILABLE 682 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
691 y1 = RotateRight32<3>(y1);
692 y2 = RotateRight32<3>(y2);
693 y3 = RotateRight32<3>(y3);
703 x1 = RotateLeft32<8>(x1);
704 x2 = RotateLeft32<8>(x2);
705 x3 = RotateLeft32<8>(x3);
708 #if (CRYPTOPP_BIG_ENDIAN) 709 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
710 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
712 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
713 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
725 #endif // CRYPTOPP_ALTIVEC_AVAILABLE 727 ANONYMOUS_NAMESPACE_END
735 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 736 size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
737 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
739 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
740 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
743 size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
744 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
746 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
747 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
753 #if defined(CRYPTOPP_SSE41_AVAILABLE) 754 size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
755 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
757 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
758 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
761 size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
762 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
764 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
765 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
771 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 772 size_t SPECK64_Enc_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
773 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
775 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
776 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
779 size_t SPECK64_Dec_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
780 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
782 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
783 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
Utility functions for the Crypto++ library.
T1 VecSub(const T1 vec1, const T2 vec2)
Subtract two vectors.
Library configuration file.
T1 VecAdd(const T1 vec1, const T2 vec2)
Add two vectors.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
Support functions for PowerPC and vector operations.
Template for AdvancedProcessBlocks and SIMD processing.
Classes for the Speck block cipher.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
Crypto++ library namespace.
uint32x4_p VecLoad(const byte src[16])
Loads a vector from a byte array.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.