19 #if (CRYPTOPP_SSSE3_AVAILABLE) 21 # include <pmmintrin.h> 22 # include <tmmintrin.h> 26 # include <ammintrin.h> 29 #if defined(__AVX512F__) 30 # define CRYPTOPP_AVX512_ROTATE 1 31 # include <immintrin.h> 35 extern const char SIMECK_SIMD_FNAME[] = __FILE__;
37 ANONYMOUS_NAMESPACE_BEGIN
39 using CryptoPP::word16;
40 using CryptoPP::word32;
42 #if (CRYPTOPP_SSSE3_AVAILABLE) 46 template <
unsigned int R>
47 inline __m128i RotateLeft32(
const __m128i& val)
49 #if defined(CRYPTOPP_AVX512_ROTATE) 50 return _mm_rol_epi32(val, R);
51 #elif defined(__XOP__) 52 return _mm_roti_epi32(val, R);
55 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
59 template <
unsigned int R>
60 inline __m128i RotateRight32(
const __m128i& val)
62 #if defined(CRYPTOPP_AVX512_ROTATE) 63 return _mm_ror_epi32(val, R);
64 #elif defined(__XOP__) 65 return _mm_roti_epi32(val, 32-R);
68 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
74 inline __m128i RotateLeft32<8>(
const __m128i& val)
77 return _mm_roti_epi32(val, 8);
79 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
80 return _mm_shuffle_epi8(val, mask);
86 inline __m128i RotateRight32<8>(
const __m128i& val)
89 return _mm_roti_epi32(val, 32-8);
91 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
92 return _mm_shuffle_epi8(val, mask);
104 template <
unsigned int IDX>
105 inline __m128i UnpackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
108 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
109 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
111 return _mm_setzero_si128();
115 inline __m128i UnpackXMM<0>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
117 const __m128i r1 = _mm_unpacklo_epi32(a, b);
118 const __m128i r2 = _mm_unpacklo_epi32(c, d);
119 return _mm_shuffle_epi8(_mm_unpacklo_epi64(r1, r2),
120 _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3));
124 inline __m128i UnpackXMM<1>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
126 const __m128i r1 = _mm_unpacklo_epi32(a, b);
127 const __m128i r2 = _mm_unpacklo_epi32(c, d);
128 return _mm_shuffle_epi8(_mm_unpackhi_epi64(r1, r2),
129 _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3));
133 inline __m128i UnpackXMM<2>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
135 const __m128i r1 = _mm_unpackhi_epi32(a, b);
136 const __m128i r2 = _mm_unpackhi_epi32(c, d);
137 return _mm_shuffle_epi8(_mm_unpacklo_epi64(r1, r2),
138 _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3));
142 inline __m128i UnpackXMM<3>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
144 const __m128i r1 = _mm_unpackhi_epi32(a, b);
145 const __m128i r2 = _mm_unpackhi_epi32(c, d);
146 return _mm_shuffle_epi8(_mm_unpackhi_epi64(r1, r2),
147 _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3));
155 template <
unsigned int IDX>
156 inline __m128i UnpackXMM(
const __m128i& v)
160 return _mm_setzero_si128();
164 inline __m128i UnpackXMM<0>(
const __m128i& v)
166 return _mm_shuffle_epi8(v, _mm_set_epi8(0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3));
170 inline __m128i UnpackXMM<1>(
const __m128i& v)
172 return _mm_shuffle_epi8(v, _mm_set_epi8(4,5,6,7, 4,5,6,7, 4,5,6,7, 4,5,6,7));
176 inline __m128i UnpackXMM<2>(
const __m128i& v)
178 return _mm_shuffle_epi8(v, _mm_set_epi8(8,9,10,11, 8,9,10,11, 8,9,10,11, 8,9,10,11));
182 inline __m128i UnpackXMM<3>(
const __m128i& v)
184 return _mm_shuffle_epi8(v, _mm_set_epi8(12,13,14,15, 12,13,14,15, 12,13,14,15, 12,13,14,15));
187 template <
unsigned int IDX>
188 inline __m128i RepackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
190 return UnpackXMM<IDX>(a, b, c, d);
193 template <
unsigned int IDX>
194 inline __m128i RepackXMM(
const __m128i& v)
196 return UnpackXMM<IDX>(v);
199 inline void SIMECK64_Encrypt(__m128i &a, __m128i &b, __m128i &c, __m128i &d,
const __m128i key)
202 __m128i s, t; s = a; t = c;
203 a = _mm_xor_si128(_mm_and_si128(a, RotateLeft32<5>(a)), RotateLeft32<1>(a));
204 c = _mm_xor_si128(_mm_and_si128(c, RotateLeft32<5>(c)), RotateLeft32<1>(c));
205 a = _mm_xor_si128(a, _mm_xor_si128(b, key));
206 c = _mm_xor_si128(c, _mm_xor_si128(d, key));
210 inline void SIMECK64_Enc_Block(__m128i &block0,
const word32 *subkeys,
unsigned int )
213 __m128i a = UnpackXMM<0>(block0);
214 __m128i b = UnpackXMM<1>(block0);
215 __m128i c = UnpackXMM<2>(block0);
216 __m128i d = UnpackXMM<3>(block0);
218 const unsigned int rounds = 44;
219 for (
int i = 0; i < static_cast<int>(rounds); i += 4)
221 const __m128i key = _mm_loadu_si128((
const __m128i*)(subkeys + i));
222 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(0, 0, 0, 0)));
223 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(1, 1, 1, 1)));
224 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(2, 2, 2, 2)));
225 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(3, 3, 3, 3)));
229 block0 = RepackXMM<0>(a,b,c,d);
232 inline void SIMECK64_Dec_Block(__m128i &block0,
const word32 *subkeys,
unsigned int )
235 __m128i w = _mm_shuffle_epi32(block0, _MM_SHUFFLE(2, 3, 0, 1));
238 __m128i a = UnpackXMM<0>(w);
239 __m128i b = UnpackXMM<1>(w);
240 __m128i c = UnpackXMM<2>(w);
241 __m128i d = UnpackXMM<3>(w);
243 const unsigned int rounds = 44;
244 for (
int i = static_cast<int>(rounds)-1; i >= 0; i -= 4)
246 const __m128i key = _mm_loadu_si128((
const __m128i*)(subkeys + i - 3));
247 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(3, 3, 3, 3)));
248 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(2, 2, 2, 2)));
249 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(1, 1, 1, 1)));
250 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(0, 0, 0, 0)));
254 w = RepackXMM<0>(a,b,c,d);
256 block0 = _mm_shuffle_epi32(w, _MM_SHUFFLE(2, 3, 0, 1));
259 inline void SIMECK64_Enc_4_Blocks(__m128i &block0, __m128i &block1,
260 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int )
263 __m128i a = UnpackXMM<0>(block0, block1, block2, block3);
264 __m128i b = UnpackXMM<1>(block0, block1, block2, block3);
265 __m128i c = UnpackXMM<2>(block0, block1, block2, block3);
266 __m128i d = UnpackXMM<3>(block0, block1, block2, block3);
268 const unsigned int rounds = 44;
269 for (
int i = 0; i < static_cast<int>(rounds); i += 4)
271 const __m128i key = _mm_loadu_si128((
const __m128i*)(subkeys + i));
272 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(0, 0, 0, 0)));
273 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(1, 1, 1, 1)));
274 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(2, 2, 2, 2)));
275 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(3, 3, 3, 3)));
279 block0 = RepackXMM<0>(a, b, c, d);
280 block1 = RepackXMM<1>(a, b, c, d);
281 block2 = RepackXMM<2>(a, b, c, d);
282 block3 = RepackXMM<3>(a, b, c, d);
285 inline void SIMECK64_Dec_4_Blocks(__m128i &block0, __m128i &block1,
286 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int )
289 __m128i w = _mm_shuffle_epi32(block0, _MM_SHUFFLE(2, 3, 0, 1));
290 __m128i x = _mm_shuffle_epi32(block1, _MM_SHUFFLE(2, 3, 0, 1));
291 __m128i y = _mm_shuffle_epi32(block2, _MM_SHUFFLE(2, 3, 0, 1));
292 __m128i z = _mm_shuffle_epi32(block3, _MM_SHUFFLE(2, 3, 0, 1));
295 __m128i a = UnpackXMM<0>(w, x, y, z);
296 __m128i b = UnpackXMM<1>(w, x, y, z);
297 __m128i c = UnpackXMM<2>(w, x, y, z);
298 __m128i d = UnpackXMM<3>(w, x, y, z);
300 const unsigned int rounds = 44;
301 for (
int i = static_cast<int>(rounds)-1; i >= 0; i -= 4)
303 const __m128i key = _mm_loadu_si128((
const __m128i*)(subkeys + i - 3));
304 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(3, 3, 3, 3)));
305 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(2, 2, 2, 2)));
306 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(1, 1, 1, 1)));
307 SIMECK64_Encrypt(a, b, c, d, _mm_shuffle_epi32(key, _MM_SHUFFLE(0, 0, 0, 0)));
311 w = RepackXMM<0>(a, b, c, d);
312 x = RepackXMM<1>(a, b, c, d);
313 y = RepackXMM<2>(a, b, c, d);
314 z = RepackXMM<3>(a, b, c, d);
316 block0 = _mm_shuffle_epi32(w, _MM_SHUFFLE(2, 3, 0, 1));
317 block1 = _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1));
318 block2 = _mm_shuffle_epi32(y, _MM_SHUFFLE(2, 3, 0, 1));
319 block3 = _mm_shuffle_epi32(z, _MM_SHUFFLE(2, 3, 0, 1));
322 #endif // CRYPTOPP_SSSE3_AVAILABLE 324 ANONYMOUS_NAMESPACE_END
328 #if defined(CRYPTOPP_SSSE3_AVAILABLE) 329 size_t SIMECK64_Enc_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
330 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
332 return AdvancedProcessBlocks64_4x1_SSE(SIMECK64_Enc_Block, SIMECK64_Enc_4_Blocks,
333 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
336 size_t SIMECK64_Dec_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
337 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
339 return AdvancedProcessBlocks64_4x1_SSE(SIMECK64_Dec_Block, SIMECK64_Dec_4_Blocks,
340 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
342 #endif // CRYPTOPP_SSSE3_AVAILABLE Utility functions for the Crypto++ library.
Library configuration file.
Classes for the SIMECK block cipher.
Template for AdvancedProcessBlocks and SIMD processing.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Crypto++ library namespace.