84 #ifndef CRYPTOPP_IMPORTS 85 #ifndef CRYPTOPP_GENERATE_X64_MASM 94 #if (_MSC_VER >= 1910) 95 # ifndef CRYPTOPP_DEBUG 96 # pragma optimize("", off) 97 # pragma optimize("ts", on) 104 #if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) 105 # define CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS 1 109 #define M128I_CAST(x) ((__m128i *)(void *)(x)) 110 #define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x)) 112 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 113 # if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 114 namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];}
115 using namespace rdtable;
117 static word64 Te[256];
119 static word64 Td[256];
120 #else // Not CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS 121 # if defined(CRYPTOPP_X64_MASM_AVAILABLE) 123 namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];}
125 CRYPTOPP_ALIGN_DATA(16) static word32 Te[256*4];
126 CRYPTOPP_ALIGN_DATA(16) static word32 Td[256*4];
127 #endif // CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS 129 static volatile bool s_TeFilled =
false, s_TdFilled =
false;
131 ANONYMOUS_NAMESPACE_BEGIN
133 #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 158 static inline bool AliasedWithTable(
const byte *begin,
const byte *end)
160 ptrdiff_t s0 = uintptr_t(begin)%4096, s1 = uintptr_t(end)%4096;
161 ptrdiff_t t0 = uintptr_t(Te)%4096, t1 = (uintptr_t(Te)+
sizeof(Te))%4096;
163 return (s0 >= t0 && s0 < t1) || (s1 > t0 && s1 <= t1);
165 return (s0 < t1 || s1 <= t1) || (s0 >= t0 || s1 > t0);
170 word32 subkeys[4*12], workspace[8];
171 const byte *inBlocks, *inXorBlocks, *outXorBlocks;
173 size_t inIncrement, inXorIncrement, outXorIncrement, outIncrement;
174 size_t regSpill, lengthAndCounterFlag, keysBegin;
177 const size_t s_aliasPageSize = 4096;
178 const size_t s_aliasBlockSize = 256;
179 const size_t s_sizeToAllocate = s_aliasPageSize + s_aliasBlockSize +
sizeof(Locals);
181 #endif // CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 183 ANONYMOUS_NAMESPACE_END
187 #define QUARTER_ROUND(L, T, t, a, b, c, d) \ 188 a ^= L(T, 3, byte(t)); t >>= 8;\ 189 b ^= L(T, 2, byte(t)); t >>= 8;\ 190 c ^= L(T, 1, byte(t)); t >>= 8;\ 193 #define QUARTER_ROUND_LE(t, a, b, c, d) \ 194 tempBlock[a] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\ 195 tempBlock[b] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\ 196 tempBlock[c] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\ 197 tempBlock[d] = ((byte *)(Te+t))[1]; 199 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 200 #define QUARTER_ROUND_LD(t, a, b, c, d) \ 201 tempBlock[a] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\ 202 tempBlock[b] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\ 203 tempBlock[c] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\ 204 tempBlock[d] = ((byte *)(Td+t))[GetNativeByteOrder()*7]; 206 #define QUARTER_ROUND_LD(t, a, b, c, d) \ 207 tempBlock[a] = Sd[byte(t)]; t >>= 8;\ 208 tempBlock[b] = Sd[byte(t)]; t >>= 8;\ 209 tempBlock[c] = Sd[byte(t)]; t >>= 8;\ 210 tempBlock[d] = Sd[t]; 213 #define QUARTER_ROUND_E(t, a, b, c, d) QUARTER_ROUND(TL_M, Te, t, a, b, c, d) 214 #define QUARTER_ROUND_D(t, a, b, c, d) QUARTER_ROUND(TL_M, Td, t, a, b, c, d) 216 #if (CRYPTOPP_LITTLE_ENDIAN) 217 #define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, d, c, b, a) 218 #define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, d, c, b, a) 219 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 220 #define TL_F(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (6-i)%4+1)) 221 #define TL_M(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (i+3)%4+1)) 223 #define TL_F(T, i, x) rotrFixed(T[x], (3-i)*8) 224 #define TL_M(T, i, x) T[i*256 + x] 227 #define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, a, b, c, d) 228 #define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, a, b, c, d) 229 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 230 #define TL_F(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (4-i)%4)) 233 #define TL_F(T, i, x) rotrFixed(T[x], i*8) 234 #define TL_M(T, i, x) T[i*256 + x] 239 #define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) 240 #define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) 241 #define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) 243 #define f3(x) (f2(x) ^ x) 244 #define f9(x) (f8(x) ^ x) 245 #define fb(x) (f8(x) ^ f2(x) ^ x) 246 #define fd(x) (f8(x) ^ f4(x) ^ x) 247 #define fe(x) (f8(x) ^ f4(x) ^ f2(x)) 249 unsigned int Rijndael::Base::OptimalDataAlignment()
const 251 #if (CRYPTOPP_AESNI_AVAILABLE) 255 #if (CRYPTOPP_ARM_AES_AVAILABLE) 259 #if (CRYPTOGAMS_ARM_AES) 263 #if (CRYPTOPP_POWER8_AES_AVAILABLE) 270 void Rijndael::Base::FillEncTable()
272 for (
int i=0; i<256; i++)
275 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 276 word32 y = word32(x)<<8 | word32(x)<<16 | word32(f2(x))<<24;
277 Te[i] = word64(y | f3(x))<<32 | y;
279 word32 y = f3(x) | word32(x)<<8 | word32(x)<<16 | word32(f2(x))<<24;
280 for (
int j=0; j<4; j++)
283 y = rotrConstant<8>(y);
287 #if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 288 Te[256] = Te[257] = 0;
293 void Rijndael::Base::FillDecTable()
295 for (
int i=0; i<256; i++)
298 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 299 word32 y = word32(fd(x))<<8 | word32(f9(x))<<16 | word32(fe(x))<<24;
300 Td[i] = word64(y | fb(x))<<32 | y | x;
302 word32 y = fb(x) | word32(fd(x))<<8 | word32(f9(x))<<16 | word32(fe(x))<<24;;
303 for (
int j=0; j<4; j++)
306 y = rotrConstant<8>(y);
313 #if (CRYPTOPP_AESNI_AVAILABLE) 314 extern void Rijndael_UncheckedSetKey_SSE4_AESNI(
const byte *userKey,
size_t keyLen, word32* rk);
315 extern void Rijndael_UncheckedSetKeyRev_AESNI(word32 *key,
unsigned int rounds);
317 extern size_t Rijndael_Enc_AdvancedProcessBlocks_AESNI(
const word32 *subkeys,
size_t rounds,
318 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
319 extern size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(
const word32 *subkeys,
size_t rounds,
320 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
323 #if (CRYPTOPP_ARM_AES_AVAILABLE) 324 extern size_t Rijndael_Enc_AdvancedProcessBlocks_ARMV8(
const word32 *subkeys,
size_t rounds,
325 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
326 extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(
const word32 *subkeys,
size_t rounds,
327 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
330 #if (CRYPTOGAMS_ARM_AES) 331 extern "C" int AES_set_encrypt_key(
const unsigned char *userKey,
const int bitLen, word32 *rkey);
332 extern "C" int AES_set_decrypt_key(
const unsigned char *userKey,
const int bitLen, word32 *rkey);
333 extern "C" void AES_encrypt(
const unsigned char in[16],
unsigned char out[16],
const word32 *rkey);
334 extern "C" void AES_decrypt(
const unsigned char in[16],
unsigned char out[16],
const word32 *rkey);
337 #if (CRYPTOPP_POWER8_AES_AVAILABLE) 338 extern void Rijndael_UncheckedSetKey_POWER8(
const byte* userKey,
size_t keyLen,
339 word32* rk,
const byte* Se);
341 extern size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subkeys,
size_t rounds,
342 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
343 extern size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subkeys,
size_t rounds,
344 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
347 #if (CRYPTOGAMS_ARM_AES) 348 int CRYPTOGAMS_set_encrypt_key(
const byte *userKey,
const int bitLen, word32 *rkey)
350 return AES_set_encrypt_key(userKey, bitLen, rkey);
352 int CRYPTOGAMS_set_decrypt_key(
const byte *userKey,
const int bitLen, word32 *rkey)
354 return AES_set_decrypt_key(userKey, bitLen, rkey);
356 void CRYPTOGAMS_encrypt(
const byte *inBlock,
const byte *xorBlock, byte *outBlock,
const word32 *rkey)
358 AES_encrypt(inBlock, outBlock, rkey);
360 xorbuf (outBlock, xorBlock, 16);
362 void CRYPTOGAMS_decrypt(
const byte *inBlock,
const byte *xorBlock, byte *outBlock,
const word32 *rkey)
364 AES_decrypt(inBlock, outBlock, rkey);
366 xorbuf (outBlock, xorBlock, 16);
370 std::string Rijndael::Base::AlgorithmProvider()
const 372 #if (CRYPTOPP_AESNI_AVAILABLE) 376 #if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 380 #if (CRYPTOPP_ARM_AES_AVAILABLE) 384 #if (CRYPTOGAMS_ARM_AES) 388 #if (CRYPTOPP_POWER8_AES_AVAILABLE) 395 void Rijndael::Base::UncheckedSetKey(
const byte *userKey,
unsigned int keyLen,
const NameValuePairs &)
397 AssertValidKeyLength(keyLen);
399 #if (CRYPTOGAMS_ARM_AES) 402 m_rounds = keyLen/4 + 6;
403 m_key.New(4*(15+1)+4);
405 if (IsForwardTransformation())
406 CRYPTOGAMS_set_encrypt_key(userKey, keyLen*8, m_key.begin());
408 CRYPTOGAMS_set_decrypt_key(userKey, keyLen*8, m_key.begin());
413 #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 414 m_aliasBlock.New(s_sizeToAllocate);
417 m_aliasBlock.SetMark(0);
420 m_rounds = keyLen/4 + 6;
421 m_key.New(4*(m_rounds+1));
424 #if (CRYPTOPP_AESNI_AVAILABLE && CRYPTOPP_SSE41_AVAILABLE && (!defined(_MSC_VER) || _MSC_VER >= 1600 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)) 430 Rijndael_UncheckedSetKey_SSE4_AESNI(userKey, keyLen, rk);
431 if (!IsForwardTransformation())
432 Rijndael_UncheckedSetKeyRev_AESNI(m_key, m_rounds);
438 #if CRYPTOPP_POWER8_AES_AVAILABLE 443 Rijndael_UncheckedSetKey_POWER8(userKey, keyLen, rk, Se);
449 const word32 *rc = rcon;
454 temp = rk[keyLen/4-1];
455 word32 x = (word32(Se[GETBYTE(temp, 2)]) << 24) ^ (word32(Se[GETBYTE(temp, 1)]) << 16) ^
456 (word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
457 rk[keyLen/4] = rk[0] ^ x ^ *(rc++);
458 rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
459 rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
460 rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
462 if (rk + keyLen/4 + 4 == m_key.end())
467 rk[10] = rk[ 4] ^ rk[ 9];
468 rk[11] = rk[ 5] ^ rk[10];
470 else if (keyLen == 32)
473 rk[12] = rk[ 4] ^ (word32(Se[GETBYTE(temp, 3)]) << 24) ^ (word32(Se[GETBYTE(temp, 2)]) << 16) ^ (word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
474 rk[13] = rk[ 5] ^ rk[12];
475 rk[14] = rk[ 6] ^ rk[13];
476 rk[15] = rk[ 7] ^ rk[14];
483 if (IsForwardTransformation())
496 #define InverseMixColumn(x) \ 497 TL_M(Td, 0, Se[GETBYTE(x, 3)]) ^ TL_M(Td, 1, Se[GETBYTE(x, 2)]) ^ \ 498 TL_M(Td, 2, Se[GETBYTE(x, 1)]) ^ TL_M(Td, 3, Se[GETBYTE(x, 0)]) 501 for (i = 4, j = 4*m_rounds-4; i < j; i += 4, j -= 4)
503 temp = InverseMixColumn(rk[i ]); rk[i ] = InverseMixColumn(rk[j ]); rk[j ] = temp;
504 temp = InverseMixColumn(rk[i + 1]); rk[i + 1] = InverseMixColumn(rk[j + 1]); rk[j + 1] = temp;
505 temp = InverseMixColumn(rk[i + 2]); rk[i + 2] = InverseMixColumn(rk[j + 2]); rk[j + 2] = temp;
506 temp = InverseMixColumn(rk[i + 3]); rk[i + 3] = InverseMixColumn(rk[j + 3]); rk[j + 3] = temp;
509 rk[i+0] = InverseMixColumn(rk[i+0]);
510 rk[i+1] = InverseMixColumn(rk[i+1]);
511 rk[i+2] = InverseMixColumn(rk[i+2]);
512 rk[i+3] = InverseMixColumn(rk[i+3]);
520 #if CRYPTOPP_AESNI_AVAILABLE 524 #if CRYPTOPP_ARM_AES_AVAILABLE 530 void Rijndael::Enc::ProcessAndXorBlock(
const byte *inBlock,
const byte *xorBlock, byte *outBlock)
const 532 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) || CRYPTOPP_AESNI_AVAILABLE 533 # if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 539 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
544 #if (CRYPTOPP_ARM_AES_AVAILABLE) 547 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
552 #if (CRYPTOGAMS_ARM_AES) 555 CRYPTOGAMS_encrypt(inBlock, xorBlock, outBlock, m_key.begin());
560 #if (CRYPTOPP_POWER8_AES_AVAILABLE) 563 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
570 word32 s0, s1, s2, s3, t0, t1, t2, t3;
571 Block::Get(inBlock)(s0)(s1)(s2)(s3);
573 const word32 *rk = m_key;
588 volatile word32 _u = 0;
590 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 591 for (i=0; i<2048; i+=cacheLineSize)
593 for (i=0; i<1024; i+=cacheLineSize)
595 u &= *(
const word32 *)(
const void *)(((
const byte *)Te)+i);
597 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
599 QUARTER_ROUND_FE(s3, t0, t1, t2, t3)
600 QUARTER_ROUND_FE(s2, t3, t0, t1, t2)
601 QUARTER_ROUND_FE(s1, t2, t3, t0, t1)
602 QUARTER_ROUND_FE(s0, t1, t2, t3, t0)
605 unsigned int r = m_rounds/2 - 1;
608 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
610 QUARTER_ROUND_E(t3, s0, s1, s2, s3)
611 QUARTER_ROUND_E(t2, s3, s0, s1, s2)
612 QUARTER_ROUND_E(t1, s2, s3, s0, s1)
613 QUARTER_ROUND_E(t0, s1, s2, s3, s0)
615 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
617 QUARTER_ROUND_E(s3, t0, t1, t2, t3)
618 QUARTER_ROUND_E(s2, t3, t0, t1, t2)
619 QUARTER_ROUND_E(s1, t2, t3, t0, t1)
620 QUARTER_ROUND_E(s0, t1, t2, t3, t0)
626 byte *
const tempBlock = (byte *)tbw;
628 QUARTER_ROUND_LE(t2, 15, 2, 5, 8)
629 QUARTER_ROUND_LE(t1, 11, 14, 1, 4)
630 QUARTER_ROUND_LE(t0, 7, 10, 13, 0)
631 QUARTER_ROUND_LE(t3, 3, 6, 9, 12)
633 Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
636 void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock)
const 638 #if CRYPTOPP_AESNI_AVAILABLE 641 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
646 #if (CRYPTOPP_ARM_AES_AVAILABLE) 649 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
654 #if (CRYPTOGAMS_ARM_AES) 657 CRYPTOGAMS_decrypt(inBlock, xorBlock, outBlock, m_key.begin());
662 #if (CRYPTOPP_POWER8_AES_AVAILABLE) 665 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
672 word32 s0, s1, s2, s3, t0, t1, t2, t3;
673 Block::Get(inBlock)(s0)(s1)(s2)(s3);
675 const word32 *rk = m_key;
690 volatile word32 _u = 0;
692 #if defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 693 for (i=0; i<2048; i+=cacheLineSize)
695 for (i=0; i<1024; i+=cacheLineSize)
697 u &= *(
const word32 *)(
const void *)(((
const byte *)Td)+i);
699 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
701 QUARTER_ROUND_FD(s3, t2, t1, t0, t3)
702 QUARTER_ROUND_FD(s2, t1, t0, t3, t2)
703 QUARTER_ROUND_FD(s1, t0, t3, t2, t1)
704 QUARTER_ROUND_FD(s0, t3, t2, t1, t0)
707 unsigned int r = m_rounds/2 - 1;
710 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
712 QUARTER_ROUND_D(t3, s2, s1, s0, s3)
713 QUARTER_ROUND_D(t2, s1, s0, s3, s2)
714 QUARTER_ROUND_D(t1, s0, s3, s2, s1)
715 QUARTER_ROUND_D(t0, s3, s2, s1, s0)
717 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
719 QUARTER_ROUND_D(s3, t2, t1, t0, t3)
720 QUARTER_ROUND_D(s2, t1, t0, t3, t2)
721 QUARTER_ROUND_D(s1, t0, t3, t2, t1)
722 QUARTER_ROUND_D(s0, t3, t2, t1, t0)
727 #if !(defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)) 732 for (i=0; i<256; i+=cacheLineSize)
733 u &= *(
const word32 *)(
const void *)(Sd+i);
734 u &= *(
const word32 *)(
const void *)(Sd+252);
735 t0 |= u; t1 |= u; t2 |= u; t3 |= u;
739 byte *
const tempBlock = (byte *)tbw;
741 QUARTER_ROUND_LD(t2, 7, 2, 13, 8)
742 QUARTER_ROUND_LD(t1, 3, 14, 9, 4)
743 QUARTER_ROUND_LD(t0, 15, 10, 5, 0)
744 QUARTER_ROUND_LD(t3, 11, 6, 1, 12)
746 Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
751 #if CRYPTOPP_MSC_VERSION 752 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 755 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 757 #if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 759 CRYPTOPP_NAKED
void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks_SSE2(
void *locals,
const word32 *k)
761 CRYPTOPP_UNUSED(locals); CRYPTOPP_UNUSED(k);
763 #if CRYPTOPP_BOOL_X86 766 #define L_INDEX(i) (L_REG+768+i) 767 #define L_INXORBLOCKS L_INBLOCKS+4 768 #define L_OUTXORBLOCKS L_INBLOCKS+8 769 #define L_OUTBLOCKS L_INBLOCKS+12 770 #define L_INCREMENTS L_INDEX(16*15) 771 #define L_SP L_INDEX(16*16) 772 #define L_LENGTH L_INDEX(16*16+4) 773 #define L_KEYS_BEGIN L_INDEX(16*16+8) 778 #define MXOR(a,b,c) \ 780 AS2( movd mm7, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 781 AS2( pxor MM(a), mm7)\ 783 #define MMOV(a,b,c) \ 785 AS2( movd MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 790 #define L_INDEX(i) (L_REG+i) 791 #define L_INXORBLOCKS L_INBLOCKS+8 792 #define L_OUTXORBLOCKS L_INBLOCKS+16 793 #define L_OUTBLOCKS L_INBLOCKS+24 794 #define L_INCREMENTS L_INDEX(16*16) 795 #define L_LENGTH L_INDEX(16*18+8) 796 #define L_KEYS_BEGIN L_INDEX(16*19) 808 #define MXOR(a,b,c) \ 810 AS2( xor MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 812 #define MMOV(a,b,c) \ 814 AS2( mov MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 818 #define L_SUBKEYS L_INDEX(0) 819 #define L_SAVED_X L_SUBKEYS 820 #define L_KEY12 L_INDEX(16*12) 821 #define L_LASTROUND L_INDEX(16*13) 822 #define L_INBLOCKS L_INDEX(16*14) 823 #define MAP0TO4(i) (ASM_MOD(i+3,4)+1) 827 AS2( xor a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 831 AS2( mov a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 833 #ifdef CRYPTOPP_GENERATE_X64_MASM 835 Rijndael_Enc_AdvancedProcessBlocks PROC FRAME
842 mov AS_REG_7, ?Te@rdtable@
CryptoPP@@3PA_KA
843 mov edi, DWORD PTR [?g_cacheLineSize@
CryptoPP@@3IA]
844 #elif defined(__GNUC__) 848 #
if CRYPTOPP_BOOL_X64
853 AS2( mov AS_REG_7, WORD_REG(si))
859 AS2( lea AS_REG_7, [Te])
860 AS2( mov edi, [g_cacheLineSize])
863 #
if CRYPTOPP_BOOL_X86
864 AS2( mov [ecx+16*12+16*4], esp)
865 AS2( lea esp, [ecx-768])
869 AS2( mov WORD_REG(si), [L_KEYS_BEGIN])
870 AS2( mov WORD_REG(ax), 16)
871 AS2( and WORD_REG(ax), WORD_REG(si))
872 AS2( movdqa xmm3, XMMWORD_PTR [WORD_REG(dx)+16+WORD_REG(ax)])
873 AS2( movdqa [L_KEY12], xmm3)
874 AS2( lea WORD_REG(ax), [WORD_REG(dx)+WORD_REG(ax)+2*16])
875 AS2( sub WORD_REG(ax), WORD_REG(si))
877 AS2( movdqa xmm0, [WORD_REG(ax)+WORD_REG(si)])
878 AS2( movdqa XMMWORD_PTR [L_SUBKEYS+WORD_REG(si)], xmm0)
879 AS2( add WORD_REG(si), 16)
880 AS2( cmp WORD_REG(si), 16*12)
886 AS2( movdqa xmm4, [WORD_REG(ax)+WORD_REG(si)])
887 AS2( movdqa xmm1, [WORD_REG(dx)])
888 AS2( MOVD MM(1), [WORD_REG(dx)+4*4])
889 AS2( mov ebx, [WORD_REG(dx)+5*4])
890 AS2( mov ecx, [WORD_REG(dx)+6*4])
891 AS2( mov edx, [WORD_REG(dx)+7*4])
894 AS2( xor WORD_REG(ax), WORD_REG(ax))
896 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
897 AS2( add WORD_REG(ax), WORD_REG(di))
898 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
899 AS2( add WORD_REG(ax), WORD_REG(di))
900 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
901 AS2( add WORD_REG(ax), WORD_REG(di))
902 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
903 AS2( add WORD_REG(ax), WORD_REG(di))
904 AS2( cmp WORD_REG(ax), 2048)
910 AS2( test DWORD PTR [L_LENGTH], 1)
916 AS2( mov WORD_REG(si), [L_INBLOCKS])
917 AS2( movdqu xmm2, [WORD_REG(si)])
918 AS2( pxor xmm2, xmm1)
919 AS2( psrldq xmm1, 14)
921 AS2( mov al, BYTE PTR [WORD_REG(si)+15])
922 AS2( MOVD MM(2), eax)
923 #
if CRYPTOPP_BOOL_X86
961 AS2( mov eax, [L_KEY12+0*4])
962 AS2( mov edi, [L_KEY12+2*4])
963 AS2( MOVD MM(0), [L_KEY12+3*4])
970 AS2( xor ebx, [L_KEY12+1*4])
982 AS2( MOVD edx, MM(1))
983 AS2( MOVD [L_SAVED_X+3*4], MM(0))
984 AS2( mov [L_SAVED_X+0*4], eax)
985 AS2( mov [L_SAVED_X+1*4], ebx)
986 AS2( mov [L_SAVED_X+2*4], edi)
992 AS2( MOVD MM(1), [L_KEY12+0*4])
993 AS2( mov ebx, [L_KEY12+1*4])
994 AS2( mov ecx, [L_KEY12+2*4])
995 AS2( mov edx, [L_KEY12+3*4])
997 AS2( mov WORD_REG(ax), [L_INBLOCKS])
998 AS2( movdqu xmm2, [WORD_REG(ax)])
999 AS2( mov WORD_REG(si), [L_INXORBLOCKS])
1000 AS2( movdqu xmm5, [WORD_REG(si)])
1001 AS2( pxor xmm2, xmm1)
1002 AS2( pxor xmm2, xmm5)
1005 AS2( movd eax, xmm2)
1006 AS2( psrldq xmm2, 4)
1007 AS2( movd edi, xmm2)
1008 AS2( psrldq xmm2, 4)
1015 AS2( movd edi, xmm2)
1016 AS2( psrldq xmm2, 4)
1023 AS2( movd edi, xmm2)
1035 AS2( MOVD eax, MM(1))
1037 AS2( add L_REG, [L_KEYS_BEGIN])
1038 AS2( add L_REG, 4*16)
1044 AS2( MOVD ecx, MM(2))
1045 AS2( MOVD edx, MM(1))
1046 AS2( mov eax, [L_SAVED_X+0*4])
1047 AS2( mov ebx, [L_SAVED_X+1*4])
1049 AS2( and WORD_REG(cx), 255)
1051 #
if CRYPTOPP_BOOL_X86
1052 AS2( paddb MM(2), mm3)
1057 AS2( xor edx, DWORD PTR [AS_REG_7+WORD_REG(cx)*8+3])
1061 AS2( xor ecx, [L_SAVED_X+2*4])
1064 AS2( xor edx, [L_SAVED_X+3*4])
1066 AS2( add L_REG, [L_KEYS_BEGIN])
1067 AS2( add L_REG, 3*16)
1098 AS2( MOVD MM(0), [L_SUBKEYS-4*16+3*4])
1099 AS2( mov edi, [L_SUBKEYS-4*16+2*4])
1102 AS2( xor eax, [L_SUBKEYS-4*16+0*4])
1103 AS2( xor ebx, [L_SUBKEYS-4*16+1*4])
1104 AS2( MOVD edx, MM(0))
1107 AS2( MOVD MM(0), [L_SUBKEYS-4*16+7*4])
1108 AS2( mov edi, [L_SUBKEYS-4*16+6*4])
1111 AS2( xor eax, [L_SUBKEYS-4*16+4*4])
1112 AS2( xor ebx, [L_SUBKEYS-4*16+5*4])
1113 AS2( MOVD edx, MM(0))
1116 AS2( test L_REG, 255)
1120 AS2( sub L_REG, 16*16)
1122 #define LAST(a, b, c) \
1123 AS2( movzx esi, a )\
1124 AS2( movzx edi, BYTE PTR [AS_REG_7+WORD_REG(si)*8+1] )\
1125 AS2( movzx esi, b )\
1126 AS2( xor edi, DWORD PTR [AS_REG_7+WORD_REG(si)*8+0] )\
1127 AS2( mov WORD PTR [L_LASTROUND+c], di )\
1143 AS2( mov WORD_REG(ax), [L_OUTXORBLOCKS])
1144 AS2( mov WORD_REG(bx), [L_OUTBLOCKS])
1146 AS2( mov WORD_REG(cx), [L_LENGTH])
1147 AS2( sub WORD_REG(cx), 16)
1149 AS2( movdqu xmm2, [WORD_REG(ax)])
1150 AS2( pxor xmm2, xmm4)
1152 #
if CRYPTOPP_BOOL_X86
1153 AS2( movdqa xmm0, [L_INCREMENTS])
1154 AS2( paddd xmm0, [L_INBLOCKS])
1155 AS2( movdqa [L_INBLOCKS], xmm0)
1157 AS2( movdqa xmm0, [L_INCREMENTS+16])
1158 AS2( paddq xmm0, [L_INBLOCKS+16])
1159 AS2( movdqa [L_INBLOCKS+16], xmm0)
1162 AS2( pxor xmm2, [L_LASTROUND])
1163 AS2( movdqu [WORD_REG(bx)], xmm2)
1168 AS2( mov [L_LENGTH], WORD_REG(cx))
1169 AS2( test WORD_REG(cx), 1)
1173 #
if CRYPTOPP_BOOL_X64
1174 AS2( movdqa xmm0, [L_INCREMENTS])
1175 AS2( paddq xmm0, [L_INBLOCKS])
1176 AS2( movdqa [L_INBLOCKS], xmm0)
1184 AS2( xorps xmm0, xmm0)
1185 AS2( lea WORD_REG(ax), [L_SUBKEYS+7*16])
1186 AS2( movaps [WORD_REG(ax)-7*16], xmm0)
1187 AS2( movaps [WORD_REG(ax)-6*16], xmm0)
1188 AS2( movaps [WORD_REG(ax)-5*16], xmm0)
1189 AS2( movaps [WORD_REG(ax)-4*16], xmm0)
1190 AS2( movaps [WORD_REG(ax)-3*16], xmm0)
1191 AS2( movaps [WORD_REG(ax)-2*16], xmm0)
1192 AS2( movaps [WORD_REG(ax)-1*16], xmm0)
1193 AS2( movaps [WORD_REG(ax)+0*16], xmm0)
1194 AS2( movaps [WORD_REG(ax)+1*16], xmm0)
1195 AS2( movaps [WORD_REG(ax)+2*16], xmm0)
1196 AS2( movaps [WORD_REG(ax)+3*16], xmm0)
1197 AS2( movaps [WORD_REG(ax)+4*16], xmm0)
1198 AS2( movaps [WORD_REG(ax)+5*16], xmm0)
1199 AS2( movaps [WORD_REG(ax)+6*16], xmm0)
1200 #
if CRYPTOPP_BOOL_X86
1201 AS2( mov esp, [L_SP])
1206 #
if defined(_MSC_VER) && CRYPTOPP_BOOL_X86
1211 #ifdef CRYPTOPP_GENERATE_X64_MASM
1217 Rijndael_Enc_AdvancedProcessBlocks ENDP
1222 :
"c" (locals),
"d" (k),
"S" (Te),
"D" (g_cacheLineSize)
1223 :
"memory",
"cc",
"%eax" 1224 #
if CRYPTOPP_BOOL_X64
1225 ,
"%rbx",
"%r8",
"%r9",
"%r10",
"%r11",
"%r12" 1233 #ifndef CRYPTOPP_GENERATE_X64_MASM 1235 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 1237 void Rijndael_Enc_AdvancedProcessBlocks_SSE2(
void *locals,
const word32 *k);
1241 #if CRYPTOPP_RIJNDAEL_ADVANCED_PROCESS_BLOCKS 1242 size_t Rijndael::Enc::AdvancedProcessBlocks(
const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
const 1244 #if CRYPTOPP_AESNI_AVAILABLE 1246 return Rijndael_Enc_AdvancedProcessBlocks_AESNI(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1248 #if CRYPTOPP_ARM_AES_AVAILABLE 1250 return Rijndael_Enc_AdvancedProcessBlocks_ARMV8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1252 #if CRYPTOPP_POWER8_AES_AVAILABLE 1254 return Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1257 #if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 1260 if (length < BLOCKSIZE)
1263 static const byte *zeros = (
const byte*)(Te+256);
1264 m_aliasBlock.SetMark(m_aliasBlock.size());
1265 byte *space = NULLPTR, *originalSpace =
const_cast<byte*
>(m_aliasBlock.data());
1268 space = originalSpace + (s_aliasBlockSize - (uintptr_t)originalSpace % s_aliasBlockSize) % s_aliasBlockSize;
1269 while (AliasedWithTable(space, space +
sizeof(Locals)))
1275 size_t increment = BLOCKSIZE;
1276 if (flags & BT_ReverseDirection)
1279 inBlocks += length - BLOCKSIZE;
1280 xorBlocks += length - BLOCKSIZE;
1281 outBlocks += length - BLOCKSIZE;
1282 increment = 0-increment;
1285 Locals &locals = *(Locals *)(
void *)space;
1287 locals.inBlocks = inBlocks;
1288 locals.inXorBlocks = (flags & BT_XorInput) && xorBlocks ? xorBlocks : zeros;
1289 locals.outXorBlocks = (flags & BT_XorInput) || !xorBlocks ? zeros : xorBlocks;
1290 locals.outBlocks = outBlocks;
1292 locals.inIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
1293 locals.inXorIncrement = (flags & BT_XorInput) && xorBlocks ? increment : 0;
1294 locals.outXorIncrement = (flags & BT_XorInput) || !xorBlocks ? 0 : increment;
1295 locals.outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
1297 locals.lengthAndCounterFlag = length - (length%16) -
bool(flags & BT_InBlockIsCounter);
1298 int keysToCopy = m_rounds - (flags & BT_InBlockIsCounter ? 3 : 2);
1299 locals.keysBegin = (12-keysToCopy)*16;
1301 Rijndael_Enc_AdvancedProcessBlocks_SSE2(&locals, m_key);
1303 return length % BLOCKSIZE;
1310 size_t Rijndael::Dec::AdvancedProcessBlocks(
const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
const 1312 #if CRYPTOPP_AESNI_AVAILABLE 1314 return Rijndael_Dec_AdvancedProcessBlocks_AESNI(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1316 #if CRYPTOPP_ARM_AES_AVAILABLE 1318 return Rijndael_Dec_AdvancedProcessBlocks_ARMV8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1320 #if CRYPTOPP_POWER8_AES_AVAILABLE 1322 return Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1327 #endif // CRYPTOPP_RIJNDAEL_ADVANCED_PROCESS_BLOCKS Utility functions for the Crypto++ library.
bool HasAES()
Determine if an ARM processor has AES available.
Library configuration file.
int GetCacheLineSize()
Provides the cache line size.
Access a block of memory.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Classes for Rijndael encryption algorithm.
Functions for CPU features and intrinsics.
bool HasAESNI()
Determines AES-NI availability.
bool HasSSE2()
Determines SSE2 availability.
void xorbuf(byte *buf, const byte *mask, size_t count)
Performs an XOR of a buffer with a mask.
bool HasSSE41()
Determines SSE4.1 availability.
Crypto++ library namespace.
bool HasARMv7()
Determine if an ARM processor is ARMv7 or above.
Interface for retrieving values given their names.