19 #if (CRYPTOPP_SSSE3_AVAILABLE) 21 # include <pmmintrin.h> 22 # include <tmmintrin.h> 26 # include <ammintrin.h> 29 #if defined(__AVX512F__) 30 # define CRYPTOPP_AVX512_ROTATE 1 31 # include <immintrin.h> 35 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 38 # include <arm_neon.h> 42 #if (CRYPTOPP_ARM_ACLE_AVAILABLE) 44 # include <arm_acle.h> 61 #undef CRYPTOPP_POWER8_AVAILABLE 62 #if defined(CRYPTOPP_POWER8_AVAILABLE) 68 extern const char LEA_SIMD_FNAME[] = __FILE__;
70 ANONYMOUS_NAMESPACE_BEGIN
72 using CryptoPP::word32;
76 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 78 inline uint32x4_t Xor(
const uint32x4_t& a,
const uint32x4_t& b)
80 return veorq_u32(a, b);
83 inline uint32x4_t Add(
const uint32x4_t& a,
const uint32x4_t& b)
85 return vaddq_u32(a, b);
88 inline uint32x4_t Sub(
const uint32x4_t& a,
const uint32x4_t& b)
90 return vsubq_u32(a, b);
93 template <
unsigned int R>
94 inline uint32x4_t RotateLeft(
const uint32x4_t& val)
96 const uint32x4_t a(vshlq_n_u32(val, R));
97 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
98 return vorrq_u32(a, b);
101 template <
unsigned int R>
102 inline uint32x4_t RotateRight(
const uint32x4_t& val)
104 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
105 const uint32x4_t b(vshrq_n_u32(val, R));
106 return vorrq_u32(a, b);
109 #if defined(__aarch32__) || defined(__aarch64__) 111 inline uint32x4_t RotateLeft<8>(
const uint32x4_t& val)
113 #if (CRYPTOPP_BIG_ENDIAN) 114 const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
115 const uint8x16_t mask = vld1q_u8(maskb);
117 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
118 const uint8x16_t mask = vld1q_u8(maskb);
121 return vreinterpretq_u32_u8(
122 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
126 inline uint32x4_t RotateRight<8>(
const uint32x4_t& val)
128 #if (CRYPTOPP_BIG_ENDIAN) 129 const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
130 const uint8x16_t mask = vld1q_u8(maskb);
132 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 };
133 const uint8x16_t mask = vld1q_u8(maskb);
136 return vreinterpretq_u32_u8(
137 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
141 uint32x4_t UnpackLow32(uint32x4_t a, uint32x4_t b)
143 uint32x2_t a1 = vget_low_u32(a);
144 uint32x2_t b1 = vget_low_u32(b);
145 uint32x2x2_t result = vzip_u32(a1, b1);
146 return vcombine_u32(result.val[0], result.val[1]);
149 uint32x4_t UnpackHigh32(uint32x4_t a, uint32x4_t b)
151 uint32x2_t a1 = vget_high_u32(a);
152 uint32x2_t b1 = vget_high_u32(b);
153 uint32x2x2_t result = vzip_u32(a1, b1);
154 return vcombine_u32(result.val[0], result.val[1]);
157 uint32x4_t UnpackLow64(uint32x4_t a, uint32x4_t b)
159 uint64x1_t a1 = vget_low_u64((uint64x2_t)a);
160 uint64x1_t b1 = vget_low_u64((uint64x2_t)b);
161 return (uint32x4_t)vcombine_u64(a1, b1);
164 uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b)
166 uint64x1_t a1 = vget_high_u64((uint64x2_t)a);
167 uint64x1_t b1 = vget_high_u64((uint64x2_t)b);
168 return (uint32x4_t)vcombine_u64(a1, b1);
171 template <
unsigned int IDX>
172 inline uint32x4_t LoadKey(
const word32 rkey[])
174 return vdupq_n_u32(rkey[IDX]);
177 template <
unsigned int IDX>
178 inline uint32x4_t UnpackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
182 return vmovq_n_u32(0);
186 inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
188 const uint32x4_t r1 = UnpackLow32(a, b);
189 const uint32x4_t r2 = UnpackLow32(c, d);
190 return UnpackLow64(r1, r2);
194 inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
196 const uint32x4_t r1 = UnpackLow32(a, b);
197 const uint32x4_t r2 = UnpackLow32(c, d);
198 return UnpackHigh64(r1, r2);
202 inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
204 const uint32x4_t r1 = UnpackHigh32(a, b);
205 const uint32x4_t r2 = UnpackHigh32(c, d);
206 return UnpackLow64(r1, r2);
210 inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
212 const uint32x4_t r1 = UnpackHigh32(a, b);
213 const uint32x4_t r2 = UnpackHigh32(c, d);
214 return UnpackHigh64(r1, r2);
217 template <
unsigned int IDX>
218 inline uint32x4_t UnpackNEON(
const uint32x4_t& v)
222 return vmovq_n_u32(0);
226 inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& v)
229 return vdupq_n_u32(vgetq_lane_u32(v, 0));
233 inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& v)
236 return vdupq_n_u32(vgetq_lane_u32(v, 1));
240 inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& v)
243 return vdupq_n_u32(vgetq_lane_u32(v, 2));
247 inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& v)
250 return vdupq_n_u32(vgetq_lane_u32(v, 3));
253 template <
unsigned int IDX>
254 inline uint32x4_t RepackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
256 return UnpackNEON<IDX>(a, b, c, d);
259 template <
unsigned int IDX>
260 inline uint32x4_t RepackNEON(
const uint32x4_t& v)
262 return UnpackNEON<IDX>(v);
265 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 269 #if (CRYPTOPP_SSSE3_AVAILABLE) 271 inline __m128i Xor(
const __m128i& a,
const __m128i& b)
273 return _mm_xor_si128(a, b);
276 inline __m128i Add(
const __m128i& a,
const __m128i& b)
278 return _mm_add_epi32(a, b);
281 inline __m128i Sub(
const __m128i& a,
const __m128i& b)
283 return _mm_sub_epi32(a, b);
286 template <
unsigned int R>
287 inline __m128i RotateLeft(
const __m128i& val)
290 return _mm_roti_epi32(val, R);
293 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
297 template <
unsigned int R>
298 inline __m128i RotateRight(
const __m128i& val)
301 return _mm_roti_epi32(val, 32-R);
304 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
310 inline __m128i RotateLeft<8>(
const __m128i& val)
313 return _mm_roti_epi32(val, 8);
315 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
316 return _mm_shuffle_epi8(val, mask);
322 inline __m128i RotateRight<8>(
const __m128i& val)
325 return _mm_roti_epi32(val, 32-8);
327 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
328 return _mm_shuffle_epi8(val, mask);
332 template <
unsigned int IDX>
333 inline __m128i LoadKey(
const word32 rkey[])
335 float rk; std::memcpy(&rk, rkey+IDX,
sizeof(rk));
336 return _mm_castps_si128(_mm_load_ps1(&rk));
339 template <
unsigned int IDX>
340 inline __m128i UnpackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
343 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
344 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
346 return _mm_setzero_si128();
350 inline __m128i UnpackXMM<0>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
353 const __m128i r1 = _mm_unpacklo_epi32(a, b);
354 const __m128i r2 = _mm_unpacklo_epi32(c, d);
355 return _mm_unpacklo_epi64(r1, r2);
359 inline __m128i UnpackXMM<1>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
362 const __m128i r1 = _mm_unpacklo_epi32(a, b);
363 const __m128i r2 = _mm_unpacklo_epi32(c, d);
364 return _mm_unpackhi_epi64(r1, r2);
368 inline __m128i UnpackXMM<2>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
371 const __m128i r1 = _mm_unpackhi_epi32(a, b);
372 const __m128i r2 = _mm_unpackhi_epi32(c, d);
373 return _mm_unpacklo_epi64(r1, r2);
377 inline __m128i UnpackXMM<3>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
380 const __m128i r1 = _mm_unpackhi_epi32(a, b);
381 const __m128i r2 = _mm_unpackhi_epi32(c, d);
382 return _mm_unpackhi_epi64(r1, r2);
385 template <
unsigned int IDX>
386 inline __m128i UnpackXMM(
const __m128i& v)
390 return _mm_setzero_si128();
394 inline __m128i UnpackXMM<0>(
const __m128i& v)
397 return _mm_shuffle_epi8(v, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
401 inline __m128i UnpackXMM<1>(
const __m128i& v)
404 return _mm_shuffle_epi8(v, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
408 inline __m128i UnpackXMM<2>(
const __m128i& v)
411 return _mm_shuffle_epi8(v, _mm_set_epi8(11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8));
415 inline __m128i UnpackXMM<3>(
const __m128i& v)
418 return _mm_shuffle_epi8(v, _mm_set_epi8(15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12));
421 template <
unsigned int IDX>
422 inline __m128i RepackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
424 return UnpackXMM<IDX>(a, b, c, d);
427 template <
unsigned int IDX>
428 inline __m128i RepackXMM(
const __m128i& v)
430 return UnpackXMM<IDX>(v);
433 #endif // CRYPTOPP_SSSE3_AVAILABLE 437 #if (CRYPTOPP_POWER8_AVAILABLE) 458 template <
unsigned int R>
462 return vec_rl(val, m);
465 template <
unsigned int R>
468 const uint32x4_p m = {32-R, 32-R, 32-R, 32-R};
469 return vec_rl(val, m);
472 template <
unsigned int IDX>
473 inline uint32x4_p LoadKey(
const word32 rkey[])
475 return vec_splats(rkey[IDX]);
478 template <
unsigned int IDX>
482 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
483 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
520 template <
unsigned int IDX>
532 const uint8x16_p m = {3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0};
540 const uint8x16_p m = {7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4};
548 const uint8x16_p m = {11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8};
556 const uint8x16_p m = {15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12};
560 template <
unsigned int IDX>
563 return UnpackSIMD<IDX>(a, b, c, d);
566 template <
unsigned int IDX>
569 return UnpackSIMD<IDX>(v);
572 #endif // CRYPTOPP_POWER8_AVAILABLE 576 #if (CRYPTOPP_ARM_NEON_AVAILABLE || CRYPTOPP_SSSE3_AVAILABLE) 579 inline void LEA_Encryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
581 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<4>(subkeys)), Xor(temp[3], LoadKey<5>(subkeys))));
582 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<2>(subkeys)), Xor(temp[2], LoadKey<3>(subkeys))));
583 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<0>(subkeys)), Xor(temp[1], LoadKey<1>(subkeys))));
584 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<10>(subkeys)), Xor(temp[0], LoadKey<11>(subkeys))));
585 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<8>(subkeys)), Xor(temp[3], LoadKey<9>(subkeys))));
586 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<6>(subkeys)), Xor(temp[2], LoadKey<7>(subkeys))));
587 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<16>(subkeys)), Xor(temp[1], LoadKey<17>(subkeys))));
588 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<14>(subkeys)), Xor(temp[0], LoadKey<15>(subkeys))));
589 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<12>(subkeys)), Xor(temp[3], LoadKey<13>(subkeys))));
590 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<22>(subkeys)), Xor(temp[2], LoadKey<23>(subkeys))));
591 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<20>(subkeys)), Xor(temp[1], LoadKey<21>(subkeys))));
592 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<18>(subkeys)), Xor(temp[0], LoadKey<19>(subkeys))));
594 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<28>(subkeys)), Xor(temp[3], LoadKey<29>(subkeys))));
595 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<26>(subkeys)), Xor(temp[2], LoadKey<27>(subkeys))));
596 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<24>(subkeys)), Xor(temp[1], LoadKey<25>(subkeys))));
597 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<34>(subkeys)), Xor(temp[0], LoadKey<35>(subkeys))));
598 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<32>(subkeys)), Xor(temp[3], LoadKey<33>(subkeys))));
599 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<30>(subkeys)), Xor(temp[2], LoadKey<31>(subkeys))));
600 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<40>(subkeys)), Xor(temp[1], LoadKey<41>(subkeys))));
601 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<38>(subkeys)), Xor(temp[0], LoadKey<39>(subkeys))));
602 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<36>(subkeys)), Xor(temp[3], LoadKey<37>(subkeys))));
603 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<46>(subkeys)), Xor(temp[2], LoadKey<47>(subkeys))));
604 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<44>(subkeys)), Xor(temp[1], LoadKey<45>(subkeys))));
605 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<42>(subkeys)), Xor(temp[0], LoadKey<43>(subkeys))));
607 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<52>(subkeys)), Xor(temp[3], LoadKey<53>(subkeys))));
608 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<50>(subkeys)), Xor(temp[2], LoadKey<51>(subkeys))));
609 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<48>(subkeys)), Xor(temp[1], LoadKey<49>(subkeys))));
610 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<58>(subkeys)), Xor(temp[0], LoadKey<59>(subkeys))));
611 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<56>(subkeys)), Xor(temp[3], LoadKey<57>(subkeys))));
612 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<54>(subkeys)), Xor(temp[2], LoadKey<55>(subkeys))));
613 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<64>(subkeys)), Xor(temp[1], LoadKey<65>(subkeys))));
614 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<62>(subkeys)), Xor(temp[0], LoadKey<63>(subkeys))));
615 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<60>(subkeys)), Xor(temp[3], LoadKey<61>(subkeys))));
616 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<70>(subkeys)), Xor(temp[2], LoadKey<71>(subkeys))));
617 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<68>(subkeys)), Xor(temp[1], LoadKey<69>(subkeys))));
618 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<66>(subkeys)), Xor(temp[0], LoadKey<67>(subkeys))));
620 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<76>(subkeys)), Xor(temp[3], LoadKey<77>(subkeys))));
621 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<74>(subkeys)), Xor(temp[2], LoadKey<75>(subkeys))));
622 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<72>(subkeys)), Xor(temp[1], LoadKey<73>(subkeys))));
623 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<82>(subkeys)), Xor(temp[0], LoadKey<83>(subkeys))));
624 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<80>(subkeys)), Xor(temp[3], LoadKey<81>(subkeys))));
625 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<78>(subkeys)), Xor(temp[2], LoadKey<79>(subkeys))));
626 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<88>(subkeys)), Xor(temp[1], LoadKey<89>(subkeys))));
627 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<86>(subkeys)), Xor(temp[0], LoadKey<87>(subkeys))));
628 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<84>(subkeys)), Xor(temp[3], LoadKey<85>(subkeys))));
629 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<94>(subkeys)), Xor(temp[2], LoadKey<95>(subkeys))));
630 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<92>(subkeys)), Xor(temp[1], LoadKey<93>(subkeys))));
631 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<90>(subkeys)), Xor(temp[0], LoadKey<91>(subkeys))));
633 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<100>(subkeys)), Xor(temp[3], LoadKey<101>(subkeys))));
634 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<98>(subkeys)), Xor(temp[2], LoadKey<99>(subkeys))));
635 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<96>(subkeys)), Xor(temp[1], LoadKey<97>(subkeys))));
636 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<106>(subkeys)), Xor(temp[0], LoadKey<107>(subkeys))));
637 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<104>(subkeys)), Xor(temp[3], LoadKey<105>(subkeys))));
638 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<102>(subkeys)), Xor(temp[2], LoadKey<103>(subkeys))));
639 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<112>(subkeys)), Xor(temp[1], LoadKey<113>(subkeys))));
640 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<110>(subkeys)), Xor(temp[0], LoadKey<111>(subkeys))));
641 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<108>(subkeys)), Xor(temp[3], LoadKey<109>(subkeys))));
642 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<118>(subkeys)), Xor(temp[2], LoadKey<119>(subkeys))));
643 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<116>(subkeys)), Xor(temp[1], LoadKey<117>(subkeys))));
644 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<114>(subkeys)), Xor(temp[0], LoadKey<115>(subkeys))));
646 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<124>(subkeys)), Xor(temp[3], LoadKey<125>(subkeys))));
647 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<122>(subkeys)), Xor(temp[2], LoadKey<123>(subkeys))));
648 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<120>(subkeys)), Xor(temp[1], LoadKey<121>(subkeys))));
649 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<130>(subkeys)), Xor(temp[0], LoadKey<131>(subkeys))));
650 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<128>(subkeys)), Xor(temp[3], LoadKey<129>(subkeys))));
651 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<126>(subkeys)), Xor(temp[2], LoadKey<127>(subkeys))));
652 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<136>(subkeys)), Xor(temp[1], LoadKey<137>(subkeys))));
653 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<134>(subkeys)), Xor(temp[0], LoadKey<135>(subkeys))));
654 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<132>(subkeys)), Xor(temp[3], LoadKey<133>(subkeys))));
655 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<142>(subkeys)), Xor(temp[2], LoadKey<143>(subkeys))));
656 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<140>(subkeys)), Xor(temp[1], LoadKey<141>(subkeys))));
657 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<138>(subkeys)), Xor(temp[0], LoadKey<139>(subkeys))));
661 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<148>(subkeys)), Xor(temp[3], LoadKey<149>(subkeys))));
662 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<146>(subkeys)), Xor(temp[2], LoadKey<147>(subkeys))));
663 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<144>(subkeys)), Xor(temp[1], LoadKey<145>(subkeys))));
664 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<154>(subkeys)), Xor(temp[0], LoadKey<155>(subkeys))));
665 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<152>(subkeys)), Xor(temp[3], LoadKey<153>(subkeys))));
666 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<150>(subkeys)), Xor(temp[2], LoadKey<151>(subkeys))));
667 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<160>(subkeys)), Xor(temp[1], LoadKey<161>(subkeys))));
668 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<158>(subkeys)), Xor(temp[0], LoadKey<159>(subkeys))));
669 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<156>(subkeys)), Xor(temp[3], LoadKey<157>(subkeys))));
670 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<166>(subkeys)), Xor(temp[2], LoadKey<167>(subkeys))));
671 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<164>(subkeys)), Xor(temp[1], LoadKey<165>(subkeys))));
672 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<162>(subkeys)), Xor(temp[0], LoadKey<163>(subkeys))));
677 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<172>(subkeys)), Xor(temp[3], LoadKey<173>(subkeys))));
678 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<170>(subkeys)), Xor(temp[2], LoadKey<171>(subkeys))));
679 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<168>(subkeys)), Xor(temp[1], LoadKey<169>(subkeys))));
680 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<178>(subkeys)), Xor(temp[0], LoadKey<179>(subkeys))));
681 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<176>(subkeys)), Xor(temp[3], LoadKey<177>(subkeys))));
682 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<174>(subkeys)), Xor(temp[2], LoadKey<175>(subkeys))));
683 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<184>(subkeys)), Xor(temp[1], LoadKey<185>(subkeys))));
684 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<182>(subkeys)), Xor(temp[0], LoadKey<183>(subkeys))));
685 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<180>(subkeys)), Xor(temp[3], LoadKey<181>(subkeys))));
686 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<190>(subkeys)), Xor(temp[2], LoadKey<191>(subkeys))));
687 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<188>(subkeys)), Xor(temp[1], LoadKey<189>(subkeys))));
688 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<186>(subkeys)), Xor(temp[0], LoadKey<187>(subkeys))));
695 inline void LEA_Decryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
699 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<186>(subkeys))), LoadKey<187>(subkeys));
700 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<188>(subkeys))), LoadKey<189>(subkeys));
701 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<190>(subkeys))), LoadKey<191>(subkeys));
702 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<180>(subkeys))), LoadKey<181>(subkeys));
703 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<182>(subkeys))), LoadKey<183>(subkeys));
704 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<184>(subkeys))), LoadKey<185>(subkeys));
705 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<174>(subkeys))), LoadKey<175>(subkeys));
706 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<176>(subkeys))), LoadKey<177>(subkeys));
707 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<178>(subkeys))), LoadKey<179>(subkeys));
708 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<168>(subkeys))), LoadKey<169>(subkeys));
709 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<170>(subkeys))), LoadKey<171>(subkeys));
710 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<172>(subkeys))), LoadKey<173>(subkeys));
715 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<162>(subkeys))), LoadKey<163>(subkeys));
716 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<164>(subkeys))), LoadKey<165>(subkeys));
717 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<166>(subkeys))), LoadKey<167>(subkeys));
718 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<156>(subkeys))), LoadKey<157>(subkeys));
719 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<158>(subkeys))), LoadKey<159>(subkeys));
720 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<160>(subkeys))), LoadKey<161>(subkeys));
721 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<150>(subkeys))), LoadKey<151>(subkeys));
722 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<152>(subkeys))), LoadKey<153>(subkeys));
723 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<154>(subkeys))), LoadKey<155>(subkeys));
724 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<144>(subkeys))), LoadKey<145>(subkeys));
725 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<146>(subkeys))), LoadKey<147>(subkeys));
726 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<148>(subkeys))), LoadKey<149>(subkeys));
729 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<138>(subkeys))), LoadKey<139>(subkeys));
730 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<140>(subkeys))), LoadKey<141>(subkeys));
731 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<142>(subkeys))), LoadKey<143>(subkeys));
732 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<132>(subkeys))), LoadKey<133>(subkeys));
733 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<134>(subkeys))), LoadKey<135>(subkeys));
734 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<136>(subkeys))), LoadKey<137>(subkeys));
735 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<126>(subkeys))), LoadKey<127>(subkeys));
736 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<128>(subkeys))), LoadKey<129>(subkeys));
737 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<130>(subkeys))), LoadKey<131>(subkeys));
738 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<120>(subkeys))), LoadKey<121>(subkeys));
739 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<122>(subkeys))), LoadKey<123>(subkeys));
740 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<124>(subkeys))), LoadKey<125>(subkeys));
742 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<114>(subkeys))), LoadKey<115>(subkeys));
743 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<116>(subkeys))), LoadKey<117>(subkeys));
744 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<118>(subkeys))), LoadKey<119>(subkeys));
745 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<108>(subkeys))), LoadKey<109>(subkeys));
746 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<110>(subkeys))), LoadKey<111>(subkeys));
747 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<112>(subkeys))), LoadKey<113>(subkeys));
748 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<102>(subkeys))), LoadKey<103>(subkeys));
749 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<104>(subkeys))), LoadKey<105>(subkeys));
750 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<106>(subkeys))), LoadKey<107>(subkeys));
751 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<96>(subkeys))), LoadKey<97>(subkeys));
752 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<98>(subkeys))), LoadKey<99>(subkeys));
753 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<100>(subkeys))), LoadKey<101>(subkeys));
755 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<90>(subkeys))), LoadKey<91>(subkeys));
756 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<92>(subkeys))), LoadKey<93>(subkeys));
757 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<94>(subkeys))), LoadKey<95>(subkeys));
758 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<84>(subkeys))), LoadKey<85>(subkeys));
759 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<86>(subkeys))), LoadKey<87>(subkeys));
760 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<88>(subkeys))), LoadKey<89>(subkeys));
761 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<78>(subkeys))), LoadKey<79>(subkeys));
762 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<80>(subkeys))), LoadKey<81>(subkeys));
763 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<82>(subkeys))), LoadKey<83>(subkeys));
764 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<72>(subkeys))), LoadKey<73>(subkeys));
765 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<74>(subkeys))), LoadKey<75>(subkeys));
766 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<76>(subkeys))), LoadKey<77>(subkeys));
768 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<66>(subkeys))), LoadKey<67>(subkeys));
769 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<68>(subkeys))), LoadKey<69>(subkeys));
770 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<70>(subkeys))), LoadKey<71>(subkeys));
771 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<60>(subkeys))), LoadKey<61>(subkeys));
772 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<62>(subkeys))), LoadKey<63>(subkeys));
773 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<64>(subkeys))), LoadKey<65>(subkeys));
774 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<54>(subkeys))), LoadKey<55>(subkeys));
775 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<56>(subkeys))), LoadKey<57>(subkeys));
776 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<58>(subkeys))), LoadKey<59>(subkeys));
777 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<48>(subkeys))), LoadKey<49>(subkeys));
778 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<50>(subkeys))), LoadKey<51>(subkeys));
779 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<52>(subkeys))), LoadKey<53>(subkeys));
781 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<42>(subkeys))), LoadKey<43>(subkeys));
782 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<44>(subkeys))), LoadKey<45>(subkeys));
783 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<46>(subkeys))), LoadKey<47>(subkeys));
784 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<36>(subkeys))), LoadKey<37>(subkeys));
785 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<38>(subkeys))), LoadKey<39>(subkeys));
786 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<40>(subkeys))), LoadKey<41>(subkeys));
787 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<30>(subkeys))), LoadKey<31>(subkeys));
788 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<32>(subkeys))), LoadKey<33>(subkeys));
789 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<34>(subkeys))), LoadKey<35>(subkeys));
790 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<24>(subkeys))), LoadKey<25>(subkeys));
791 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<26>(subkeys))), LoadKey<27>(subkeys));
792 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<28>(subkeys))), LoadKey<29>(subkeys));
794 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<18>(subkeys))), LoadKey<19>(subkeys));
795 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<20>(subkeys))), LoadKey<21>(subkeys));
796 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<22>(subkeys))), LoadKey<23>(subkeys));
797 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<12>(subkeys))), LoadKey<13>(subkeys));
798 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<14>(subkeys))), LoadKey<15>(subkeys));
799 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<16>(subkeys))), LoadKey<17>(subkeys));
800 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<6>(subkeys))), LoadKey<7>(subkeys));
801 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<8>(subkeys))), LoadKey<9>(subkeys));
802 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<10>(subkeys))), LoadKey<11>(subkeys));
803 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<0>(subkeys))), LoadKey<1>(subkeys));
804 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<2>(subkeys))), LoadKey<3>(subkeys));
805 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<4>(subkeys))), LoadKey<5>(subkeys));
808 #endif // LEA Encryption and Decryption 812 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 814 inline void LEA_Enc_Block(uint32x4_t &block0,
815 const word32 *subkeys,
unsigned int rounds)
818 temp[0] = UnpackNEON<0>(block0);
819 temp[1] = UnpackNEON<1>(block0);
820 temp[2] = UnpackNEON<2>(block0);
821 temp[3] = UnpackNEON<3>(block0);
823 LEA_Encryption(temp, subkeys, rounds);
825 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
828 inline void LEA_Dec_Block(uint32x4_t &block0,
829 const word32 *subkeys,
unsigned int rounds)
832 temp[0] = UnpackNEON<0>(block0);
833 temp[1] = UnpackNEON<1>(block0);
834 temp[2] = UnpackNEON<2>(block0);
835 temp[3] = UnpackNEON<3>(block0);
837 LEA_Decryption(temp, subkeys, rounds);
839 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
842 inline void LEA_Enc_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
843 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
846 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
847 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
848 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
849 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
851 LEA_Encryption(temp, subkeys, rounds);
853 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
854 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
855 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
856 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
859 inline void LEA_Dec_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
860 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
863 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
864 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
865 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
866 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
868 LEA_Decryption(temp, subkeys, rounds);
870 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
871 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
872 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
873 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
876 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 880 #if (CRYPTOPP_SSSE3_AVAILABLE) 882 inline void LEA_Enc_Block(__m128i &block0,
883 const word32 *subkeys,
unsigned int rounds)
886 temp[0] = UnpackXMM<0>(block0);
887 temp[1] = UnpackXMM<1>(block0);
888 temp[2] = UnpackXMM<2>(block0);
889 temp[3] = UnpackXMM<3>(block0);
891 LEA_Encryption(temp, subkeys, rounds);
893 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
896 inline void LEA_Dec_Block(__m128i &block0,
897 const word32 *subkeys,
unsigned int rounds)
900 temp[0] = UnpackXMM<0>(block0);
901 temp[1] = UnpackXMM<1>(block0);
902 temp[2] = UnpackXMM<2>(block0);
903 temp[3] = UnpackXMM<3>(block0);
905 LEA_Decryption(temp, subkeys, rounds);
907 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
910 inline void LEA_Enc_4_Blocks(__m128i &block0, __m128i &block1,
911 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
914 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
915 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
916 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
917 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
919 LEA_Encryption(temp, subkeys, rounds);
921 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
922 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
923 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
924 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
927 inline void LEA_Dec_4_Blocks(__m128i &block0, __m128i &block1,
928 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
931 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
932 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
933 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
934 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
936 LEA_Decryption(temp, subkeys, rounds);
938 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
939 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
940 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
941 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
944 #endif // CRYPTOPP_SSSE3_AVAILABLE 948 #if (CRYPTOPP_POWER8_AVAILABLE) 951 const word32 *subkeys,
unsigned int rounds)
954 temp[0] = UnpackSIMD<0>(block0);
955 temp[1] = UnpackSIMD<1>(block0);
956 temp[2] = UnpackSIMD<2>(block0);
957 temp[3] = UnpackSIMD<3>(block0);
959 LEA_Encryption(temp, subkeys, rounds);
961 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
965 const word32 *subkeys,
unsigned int rounds)
968 temp[0] = UnpackSIMD<0>(block0);
969 temp[1] = UnpackSIMD<1>(block0);
970 temp[2] = UnpackSIMD<2>(block0);
971 temp[3] = UnpackSIMD<3>(block0);
973 LEA_Decryption(temp, subkeys, rounds);
975 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
982 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
983 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
984 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
985 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
987 LEA_Encryption(temp, subkeys, rounds);
989 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
990 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
991 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
992 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
999 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
1000 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
1001 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
1002 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
1004 LEA_Decryption(temp, subkeys, rounds);
1006 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
1007 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
1008 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
1009 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
1012 #endif // CRYPTOPP_POWER8_AVAILABLE 1014 ANONYMOUS_NAMESPACE_END
1020 #if defined(CRYPTOPP_SSSE3_AVAILABLE) 1021 size_t LEA_Enc_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1022 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1024 return AdvancedProcessBlocks128_4x1_SSE(LEA_Enc_Block, LEA_Enc_4_Blocks,
1025 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1028 size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1029 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1031 return AdvancedProcessBlocks128_4x1_SSE(LEA_Dec_Block, LEA_Dec_4_Blocks,
1032 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1034 #endif // CRYPTOPP_SSSE3_AVAILABLE 1036 #if defined(CRYPTOPP_ARM_NEON_AVAILABLE) 1037 size_t LEA_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1038 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1040 return AdvancedProcessBlocks128_4x1_NEON(LEA_Enc_Block, LEA_Enc_4_Blocks,
1041 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1044 size_t LEA_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1045 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1047 return AdvancedProcessBlocks128_4x1_NEON(LEA_Dec_Block, LEA_Dec_4_Blocks,
1048 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1050 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 1052 #if defined(CRYPTOPP_POWER8_AVAILABLE) 1053 size_t LEA_Enc_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1054 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1056 return AdvancedProcessBlocks128_4x1_ALTIVEC(LEA_Enc_Block, LEA_Enc_4_Blocks,
1057 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1060 size_t LEA_Dec_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1061 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1063 return AdvancedProcessBlocks128_4x1_ALTIVEC(LEA_Dec_Block, LEA_Dec_4_Blocks,
1064 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1066 #endif // CRYPTOPP_POWER8_AVAILABLE Utility functions for the Crypto++ library.
Classes for the LEA block cipher.
T1 VecSub(const T1 vec1, const T2 vec2)
Subtract two vectors.
Library configuration file.
T1 VecAdd(const T1 vec1, const T2 vec2)
Add two vectors.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
Support functions for PowerPC and vector operations.
Template for AdvancedProcessBlocks and SIMD processing.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
__vector unsigned long long uint64x2_p
Vector of 64-bit elements.
Crypto++ library namespace.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.