8 #ifndef CRYPTOPP_GENERATE_X64_MASM 15 #if CRYPTOPP_MSC_VERSION 16 # pragma warning(disable: 4702 4740) 21 #if defined(CRYPTOPP_DISABLE_SALSA_ASM) 22 # undef CRYPTOPP_X86_ASM_AVAILABLE 23 # undef CRYPTOPP_X32_ASM_AVAILABLE 24 # undef CRYPTOPP_X64_ASM_AVAILABLE 25 # undef CRYPTOPP_SSE2_ASM_AVAILABLE 26 # undef CRYPTOPP_SSSE3_ASM_AVAILABLE 31 #if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING) 32 void Salsa20_TestInstantiations()
44 CRYPTOPP_ALIGN_DATA(16) word32 x[16];
46 for (
size_t i = 0; i < 16; ++i)
50 for (
size_t i = 0; i < rounds; i += 2)
52 x[ 4] ^= rotlConstant< 7>(x[ 0]+x[12]);
53 x[ 8] ^= rotlConstant< 9>(x[ 4]+x[ 0]);
54 x[12] ^= rotlConstant<13>(x[ 8]+x[ 4]);
55 x[ 0] ^= rotlConstant<18>(x[12]+x[ 8]);
57 x[ 9] ^= rotlConstant< 7>(x[ 5]+x[ 1]);
58 x[13] ^= rotlConstant< 9>(x[ 9]+x[ 5]);
59 x[ 1] ^= rotlConstant<13>(x[13]+x[ 9]);
60 x[ 5] ^= rotlConstant<18>(x[ 1]+x[13]);
62 x[14] ^= rotlConstant< 7>(x[10]+x[ 6]);
63 x[ 2] ^= rotlConstant< 9>(x[14]+x[10]);
64 x[ 6] ^= rotlConstant<13>(x[ 2]+x[14]);
65 x[10] ^= rotlConstant<18>(x[ 6]+x[ 2]);
67 x[ 3] ^= rotlConstant< 7>(x[15]+x[11]);
68 x[ 7] ^= rotlConstant< 9>(x[ 3]+x[15]);
69 x[11] ^= rotlConstant<13>(x[ 7]+x[ 3]);
70 x[15] ^= rotlConstant<18>(x[11]+x[ 7]);
72 x[ 1] ^= rotlConstant< 7>(x[ 0]+x[ 3]);
73 x[ 2] ^= rotlConstant< 9>(x[ 1]+x[ 0]);
74 x[ 3] ^= rotlConstant<13>(x[ 2]+x[ 1]);
75 x[ 0] ^= rotlConstant<18>(x[ 3]+x[ 2]);
77 x[ 6] ^= rotlConstant< 7>(x[ 5]+x[ 4]);
78 x[ 7] ^= rotlConstant< 9>(x[ 6]+x[ 5]);
79 x[ 4] ^= rotlConstant<13>(x[ 7]+x[ 6]);
80 x[ 5] ^= rotlConstant<18>(x[ 4]+x[ 7]);
82 x[11] ^= rotlConstant< 7>(x[10]+x[ 9]);
83 x[ 8] ^= rotlConstant< 9>(x[11]+x[10]);
84 x[ 9] ^= rotlConstant<13>(x[ 8]+x[11]);
85 x[10] ^= rotlConstant<18>(x[ 9]+x[ 8]);
87 x[12] ^= rotlConstant< 7>(x[15]+x[14]);
88 x[13] ^= rotlConstant< 9>(x[12]+x[15]);
89 x[14] ^= rotlConstant<13>(x[13]+x[12]);
90 x[15] ^= rotlConstant<18>(x[14]+x[13]);
96 for (
size_t i = 0; i < 16; ++i)
99 for (
size_t i = 0; i < 16; ++i)
104 std::string Salsa20_Policy::AlgorithmProvider()
const 106 #if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SALSA_ASM) 113 void Salsa20_Policy::CipherSetKey(
const NameValuePairs ¶ms,
const byte *key,
size_t length)
117 if (rounds != 20 && rounds != 12 && rounds != 8)
125 get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]);
127 get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]);
130 m_state[0] = 0x61707865;
131 m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
132 m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
133 m_state[3] = 0x6b206574;
136 void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer,
const byte *
IV,
size_t length)
138 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
142 get(m_state[14])(m_state[11]);
143 m_state[8] = m_state[5] = 0;
146 void Salsa20_Policy::SeekToIteration(lword iterationCount)
148 m_state[8] = (word32)iterationCount;
149 m_state[5] = (word32)SafeRightShift<32>(iterationCount);
152 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) 155 #if CRYPTOPP_SSE2_ASM_AVAILABLE 160 return GetAlignmentOf<word32>();
165 #if CRYPTOPP_SSE2_ASM_AVAILABLE 174 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 176 void Salsa20_OperateKeystream(byte *output,
const byte *input,
size_t iterationCount,
int rounds,
void *state);
180 #if CRYPTOPP_MSC_VERSION 181 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 184 void Salsa20_Policy::OperateKeystream(
KeystreamOperation operation, byte *output,
const byte *input,
size_t iterationCount)
186 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM 188 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 189 Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.
data());
193 #if CRYPTOPP_SSE2_ASM_AVAILABLE 194 #ifdef CRYPTOPP_GENERATE_X64_MASM 196 Salsa20_OperateKeystream PROC FRAME
197 mov r10, [rsp + 5*8] ; state
198 alloc_stack(10*16 + 32*16 + 8)
199 save_xmm128 xmm6, 0200h
200 save_xmm128 xmm7, 0210h
201 save_xmm128 xmm8, 0220h
202 save_xmm128 xmm9, 0230h
203 save_xmm128 xmm10, 0240h
204 save_xmm128 xmm11, 0250h
205 save_xmm128 xmm12, 0260h
206 save_xmm128 xmm13, 0270h
207 save_xmm128 xmm14, 0280h
208 save_xmm128 xmm15, 0290h
211 #define REG_output rcx 212 #define REG_input rdx 213 #define REG_iterationCount r8 214 #define REG_state r10 215 #define REG_rounds e9d 216 #define REG_roundsLeft eax 217 #define REG_temp32 r11d 219 #define SSE2_WORKSPACE rsp 223 #if CRYPTOPP_BOOL_X64 224 #define REG_output %1 226 #define REG_iterationCount %2 228 #define REG_rounds %3 229 #define REG_roundsLeft eax 230 #define REG_temp32 edx 232 #define SSE2_WORKSPACE %5 234 CRYPTOPP_ALIGN_DATA(16) byte workspace[16*32];
236 #define REG_output edi 237 #define REG_input eax 238 #define REG_iterationCount ecx 239 #define REG_state esi 240 #define REG_rounds edx 241 #define REG_roundsLeft ebx 242 #define REG_temp32 ebp 244 #define SSE2_WORKSPACE esp + WORD_SZ 253 void *s = m_state.
data();
256 AS2( mov REG_iterationCount, iterationCount)
257 AS2( mov REG_input, input)
258 AS2( mov REG_output, output)
259 AS2( mov REG_state, s)
260 AS2( mov REG_rounds, r)
262 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 265 AS2( cmp REG_iterationCount, 4)
268 #if CRYPTOPP_BOOL_X86 275 #define SSE2_EXPAND_S(i, j) \ 276 ASS( pshufd xmm4, xmm##i, j, j, j, j) \ 277 AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4) 279 AS2( movdqa xmm0, [REG_state + 0*16])
280 AS2( movdqa xmm1, [REG_state + 1*16])
281 AS2( movdqa xmm2, [REG_state + 2*16])
282 AS2( movdqa xmm3, [REG_state + 3*16])
298 #define SSE2_EXPAND_S85(i) \ 299 AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \ 300 AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \ 301 AS2( add REG_roundsLeft, 1) \ 302 AS2( adc REG_temp32, 0) 305 AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4])
306 AS2( mov REG_temp32, dword ptr [REG_state + 5*4])
311 AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft)
312 AS2( mov dword ptr [REG_state + 5*4], REG_temp32)
315 #define SSE2_QUARTER_ROUND(a, b, d, i) \ 316 AS2( movdqa xmm4, xmm##d) \ 317 AS2( paddd xmm4, xmm##a) \ 318 AS3( vprotd xmm4, xmm4, i) \ 319 AS2( pxor xmm##b, xmm4) 321 #define SSE2_QUARTER_ROUND(a, b, d, i) \ 322 AS2( movdqa xmm4, xmm##d) \ 323 AS2( paddd xmm4, xmm##a) \ 324 AS2( movdqa xmm5, xmm4) \ 325 AS2( pslld xmm4, i) \ 326 AS2( psrld xmm5, 32-i) \ 327 AS2( pxor xmm##b, xmm4) \ 328 AS2( pxor xmm##b, xmm5) 331 #define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 332 #define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) 333 #define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 336 #define L04(A,B,C,D,a,b,c,d,i) 337 #define L05(A,B,C,D,a,b,c,d,i) AS3( vprotd xmm##A, xmm##A, 7) 338 #define L06(A,B,C,D,a,b,c,d,i) 339 #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) 340 #define L08(A,B,C,D,a,b,c,d,i) 342 #define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 343 #define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7) 344 #define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7) 345 #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) 346 #define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 349 #define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A) 350 #define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 351 #define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 354 #define L12(A,B,C,D,a,b,c,d,i) 355 #define L13(A,B,C,D,a,b,c,d,i) AS3( vprotd xmm##A, xmm##A, 9) 356 #define L14(A,B,C,D,a,b,c,d,i) 357 #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) 358 #define L16(A,B,C,D,a,b,c,d,i) 360 #define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 361 #define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9) 362 #define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9) 363 #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) 364 #define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 367 #define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A) 368 #define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 369 #define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) 372 #define L20(A,B,C,D,a,b,c,d,i) 373 #define L21(A,B,C,D,a,b,c,d,i) AS3( vprotd xmm##A, xmm##A, 13) 374 #define L22(A,B,C,D,a,b,c,d,i) 375 #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 376 #define L24(A,B,C,D,a,b,c,d,i) 378 #define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 379 #define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13) 380 #define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13) 381 #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 382 #define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 385 #define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A) 386 #define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) 389 #define L27(A,B,C,D,a,b,c,d,i) 390 #define L28(A,B,C,D,a,b,c,d,i) AS3( vprotd xmm##A, xmm##A, 18) 391 #define L29(A,B,C,D,a,b,c,d,i) 392 #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) 393 #define L31(A,B,C,D,a,b,c,d,i) 395 #define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 396 #define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18) 397 #define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18) 398 #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) 399 #define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 402 #define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A) 404 #define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \ 405 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \ 406 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \ 407 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \ 408 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \ 409 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \ 410 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \ 411 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \ 412 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \ 413 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \ 414 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \ 415 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \ 416 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \ 417 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \ 418 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \ 419 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \ 420 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \ 421 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \ 422 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \ 423 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \ 424 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \ 425 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \ 426 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \ 427 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \ 428 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \ 429 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \ 430 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \ 431 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \ 432 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \ 433 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \ 434 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \ 435 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \ 436 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) 438 #define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \ 439 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \ 440 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \ 441 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \ 442 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \ 443 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \ 444 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \ 445 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \ 446 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \ 447 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \ 448 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \ 449 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \ 450 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \ 451 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \ 452 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \ 453 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \ 454 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \ 455 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \ 456 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \ 457 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \ 458 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \ 459 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \ 460 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \ 461 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \ 462 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \ 463 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \ 464 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \ 465 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \ 466 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \ 467 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \ 468 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \ 469 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \ 470 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i) 472 #if CRYPTOPP_BOOL_X64 473 SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
475 SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15)
476 SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13)
478 AS2( mov REG_roundsLeft, REG_rounds)
481 ASL(SSE2_Salsa_Output)
482 AS2( movdqa xmm0, xmm4)
483 AS2( punpckldq xmm4, xmm5)
484 AS2( movdqa xmm1, xmm6)
485 AS2( punpckldq xmm6, xmm7)
486 AS2( movdqa xmm2, xmm4)
487 AS2( punpcklqdq xmm4, xmm6)
488 AS2( punpckhqdq xmm2, xmm6)
489 AS2( punpckhdq xmm0, xmm5)
490 AS2( punpckhdq xmm1, xmm7)
491 AS2( movdqa xmm6, xmm0)
492 AS2( punpcklqdq xmm0, xmm1)
493 AS2( punpckhqdq xmm6, xmm1)
494 AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1)
498 #if CRYPTOPP_BOOL_X64 499 SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
501 SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6)
503 SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15)
504 SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13)
506 SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6)
507 SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4)
509 AS2( sub REG_roundsLeft, 2)
512 #define SSE2_OUTPUT_4(a, b, c, d) \ 513 AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\ 514 AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\ 515 AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\ 516 AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\ 517 AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\ 518 AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\ 519 AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\ 520 AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\ 521 ASC( call, SSE2_Salsa_Output) 523 SSE2_OUTPUT_4(0, 13, 10, 7)
524 SSE2_OUTPUT_4(4, 1, 14, 11)
525 SSE2_OUTPUT_4(8, 5, 2, 15)
526 SSE2_OUTPUT_4(12, 9, 6, 3)
527 AS2( test REG_input, REG_input)
529 AS2( add REG_input, 12*16)
531 AS2( add REG_output, 12*16)
532 AS2( sub REG_iterationCount, 4)
533 AS2( cmp REG_iterationCount, 4)
538 AS2( sub REG_iterationCount, 1)
540 AS2( movdqa xmm0, [REG_state + 0*16])
541 AS2( movdqa xmm1, [REG_state + 1*16])
542 AS2( movdqa xmm2, [REG_state + 2*16])
543 AS2( movdqa xmm3, [REG_state + 3*16])
544 AS2( mov REG_roundsLeft, REG_rounds)
547 SSE2_QUARTER_ROUND(0, 1, 3, 7)
548 SSE2_QUARTER_ROUND(1, 2, 0, 9)
549 SSE2_QUARTER_ROUND(2, 3, 1, 13)
550 SSE2_QUARTER_ROUND(3, 0, 2, 18)
551 ASS( pshufd xmm1, xmm1, 2, 1, 0, 3)
552 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
553 ASS( pshufd xmm3, xmm3, 0, 3, 2, 1)
554 SSE2_QUARTER_ROUND(0, 3, 1, 7)
555 SSE2_QUARTER_ROUND(3, 2, 0, 9)
556 SSE2_QUARTER_ROUND(2, 1, 3, 13)
557 SSE2_QUARTER_ROUND(1, 0, 2, 18)
558 ASS( pshufd xmm1, xmm1, 0, 3, 2, 1)
559 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
560 ASS( pshufd xmm3, xmm3, 2, 1, 0, 3)
561 AS2( sub REG_roundsLeft, 2)
564 AS2( paddd xmm0, [REG_state + 0*16])
565 AS2( paddd xmm1, [REG_state + 1*16])
566 AS2( paddd xmm2, [REG_state + 2*16])
567 AS2( paddd xmm3, [REG_state + 3*16])
569 AS2( add dword ptr [REG_state + 8*4], 1)
570 AS2( adc dword ptr [REG_state + 5*4], 0)
572 AS2( pcmpeqb xmm6, xmm6)
574 ASS( pshufd xmm7, xmm6, 0, 1, 2, 3)
575 AS2( movdqa xmm4, xmm0)
576 AS2( movdqa xmm5, xmm3)
577 AS2( pand xmm0, xmm7)
578 AS2( pand xmm4, xmm6)
579 AS2( pand xmm3, xmm6)
580 AS2( pand xmm5, xmm7)
582 AS2( movdqa xmm5, xmm1)
583 AS2( pand xmm1, xmm7)
584 AS2( pand xmm5, xmm6)
586 AS2( pand xmm6, xmm2)
587 AS2( pand xmm2, xmm7)
591 AS2( movdqa xmm5, xmm4)
592 AS2( movdqa xmm6, xmm0)
593 AS3( shufpd xmm4, xmm1, 2)
594 AS3( shufpd xmm0, xmm2, 2)
595 AS3( shufpd xmm1, xmm5, 2)
596 AS3( shufpd xmm2, xmm6, 2)
599 AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4)
607 #if CRYPTOPP_BOOL_X64 608 :
"+r" (input),
"+r" (output),
"+r" (iterationCount)
609 :
"r" (m_rounds),
"r" (m_state.
begin()),
"r" (workspace)
610 :
"%eax",
"%rdx",
"memory",
"cc",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7",
"%xmm8",
"%xmm9",
"%xmm10",
"%xmm11",
"%xmm12",
"%xmm13",
"%xmm14",
"%xmm15" 612 :
"+a" (input),
"+D" (output),
"+c" (iterationCount)
613 :
"d" (m_rounds),
"S" (m_state.
begin())
618 #ifdef CRYPTOPP_GENERATE_X64_MASM 619 movdqa xmm6, [rsp + 0200h]
620 movdqa xmm7, [rsp + 0210h]
621 movdqa xmm8, [rsp + 0220h]
622 movdqa xmm9, [rsp + 0230h]
623 movdqa xmm10, [rsp + 0240h]
624 movdqa xmm11, [rsp + 0250h]
625 movdqa xmm12, [rsp + 0260h]
626 movdqa xmm13, [rsp + 0270h]
627 movdqa xmm14, [rsp + 0280h]
628 movdqa xmm15, [rsp + 0290h]
629 add rsp, 10*16 + 32*16 + 8
631 Salsa20_OperateKeystream ENDP
637 #ifndef CRYPTOPP_GENERATE_X64_MASM 639 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
641 while (iterationCount--)
643 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
644 x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
645 x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
646 x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
648 for (
int i=m_rounds; i>0; i-=2)
650 #define QUARTER_ROUND(a, b, c, d) \ 651 b = b ^ rotlConstant<7>(a + d); \ 652 c = c ^ rotlConstant<9>(b + a); \ 653 d = d ^ rotlConstant<13>(c + b); \ 654 a = a ^ rotlConstant<18>(d + c); 656 QUARTER_ROUND(x0, x4, x8, x12)
657 QUARTER_ROUND(x1, x5, x9, x13)
658 QUARTER_ROUND(x2, x6, x10, x14)
659 QUARTER_ROUND(x3, x7, x11, x15)
661 QUARTER_ROUND(x0, x13, x10, x7)
662 QUARTER_ROUND(x1, x14, x11, x4)
663 QUARTER_ROUND(x2, x15, x8, x5)
664 QUARTER_ROUND(x3, x12, x9, x6)
667 #ifndef CRYPTOPP_DOXYGEN_PROCESSING 668 #define SALSA_OUTPUT(x) {\ 669 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\ 670 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\ 671 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\ 672 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\ 673 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\ 674 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\ 675 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\ 676 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\ 677 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\ 678 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\ 679 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\ 680 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\ 681 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\ 682 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\ 683 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\ 684 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);} 690 if (++m_state[8] == 0)
699 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
700 throw InvalidRounds(XSalsa20::StaticAlgorithmName(), m_rounds);
707 m_state[0] = 0x61707865;
708 m_state[1] = 0x3320646e;
709 m_state[2] = 0x79622d32;
710 m_state[3] = 0x6b206574;
715 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
718 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
721 get(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]);
723 x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3];
724 x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7];
725 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
727 for (
int i=m_rounds; i>0; i-=2)
729 QUARTER_ROUND(x0, x4, x8, x12)
730 QUARTER_ROUND(x1, x5, x9, x13)
731 QUARTER_ROUND(x2, x6, x10, x14)
732 QUARTER_ROUND(x3, x7, x11, x15)
734 QUARTER_ROUND(x0, x13, x10, x7)
735 QUARTER_ROUND(x1, x14, x11, x4)
736 QUARTER_ROUND(x2, x15, x8, x5)
737 QUARTER_ROUND(x3, x12, x9, x6)
740 m_state[13] = x0; m_state[10] = x1; m_state[7] = x2; m_state[4] = x3;
741 m_state[15] = x14; m_state[12] = x11; m_state[9] = x8; m_state[6] = x5;
742 m_state[8] = m_state[5] = 0;
747 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Standard names for retrieving values by name when working with NameValuePairs.
Utility functions for the Crypto++ library.
Library configuration file.
virtual unsigned int GetOptimalBlockSize() const
Provides number of ideal bytes to process.
unsigned int GetAlignment() const
Provides data alignment requirements.
#define CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(x, y)
Helper macro to implement OperateKeystream.
byte order is little-endian
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
Key the cipher.
static const int BYTES_PER_ITERATION
Number of bytes for an iteration.
Exception thrown when an invalid number of rounds is encountered.
void Salsa20_Core(word32 *data, unsigned int rounds)
Salsa20 core transform.
A::pointer data()
Provides a pointer to the first element in the memory block.
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
Resynchronize the cipher.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Functions for CPU features and intrinsics.
Classes for Salsa and Salsa20 stream ciphers.
iterator begin()
Provides an iterator pointing to the first element in the memory block.
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
bool HasSSE2()
Determines SSE2 availability.
Access a block of memory.
KeystreamOperation
Keystream operation flags.
Crypto++ library namespace.
SymmetricCipher implementation.
size_type size() const
Provides the count of elements in the SecBlock.
Interface for retrieving values given their names.