6 #ifndef CRYPTOPP_ARM_SIMD_H 7 #define CRYPTOPP_ARM_SIMD_H 12 #if (CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(_M_ARM64) 13 # include <arm_neon.h> 16 #if (CRYPTOPP_ARM_ACLE_AVAILABLE) 18 # include <arm_acle.h> 21 #if (CRYPTOPP_ARM_PMULL_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) 35 inline uint64x2_t
PMULL_00(
const uint64x2_t a,
const uint64x2_t b)
38 const __n64 x = { vgetq_lane_u64(a, 0) };
39 const __n64 y = { vgetq_lane_u64(b, 0) };
40 return vmull_p64(x, y);
41 #elif defined(__GNUC__) 43 __asm __volatile(
"pmull %0.1q, %1.1d, %2.1d \n\t" 44 :
"=w" (r) :
"w" (a),
"w" (b) );
47 return (uint64x2_t)(vmull_p64(
48 vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
49 vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
65 inline uint64x2_t
PMULL_01(
const uint64x2_t a,
const uint64x2_t b)
68 const __n64 x = { vgetq_lane_u64(a, 0) };
69 const __n64 y = { vgetq_lane_u64(b, 1) };
70 return vmull_p64(x, y);
71 #elif defined(__GNUC__) 73 __asm __volatile(
"pmull %0.1q, %1.1d, %2.1d \n\t" 74 :
"=w" (r) :
"w" (a),
"w" (vget_high_u64(b)) );
77 return (uint64x2_t)(vmull_p64(
78 vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
79 vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
95 inline uint64x2_t
PMULL_10(
const uint64x2_t a,
const uint64x2_t b)
98 const __n64 x = { vgetq_lane_u64(a, 1) };
99 const __n64 y = { vgetq_lane_u64(b, 0) };
100 return vmull_p64(x, y);
101 #elif defined(__GNUC__) 103 __asm __volatile(
"pmull %0.1q, %1.1d, %2.1d \n\t" 104 :
"=w" (r) :
"w" (vget_high_u64(a)),
"w" (b) );
107 return (uint64x2_t)(vmull_p64(
108 vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
109 vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
125 inline uint64x2_t
PMULL_11(
const uint64x2_t a,
const uint64x2_t b)
127 #if defined(_MSC_VER) 128 const __n64 x = { vgetq_lane_u64(a, 1) };
129 const __n64 y = { vgetq_lane_u64(b, 1) };
130 return vmull_p64(x, y);
131 #elif defined(__GNUC__) 133 __asm __volatile(
"pmull2 %0.1q, %1.2d, %2.2d \n\t" 134 :
"=w" (r) :
"w" (a),
"w" (b) );
137 return (uint64x2_t)(vmull_p64(
138 vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
139 vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
151 inline uint64x2_t
VEXT_U8(uint64x2_t a, uint64x2_t b,
unsigned int c)
153 #if defined(_MSC_VER) 154 return (uint64x2_t)vextq_u8(
155 vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c);
158 __asm __volatile(
"ext %0.16b, %1.16b, %2.16b, %3 \n\t" 159 :
"=w" (r) :
"w" (a),
"w" (b),
"I" (c) );
172 template <
unsigned int C>
173 inline uint64x2_t
VEXT_U8(uint64x2_t a, uint64x2_t b)
176 #if defined(_MSC_VER) 177 return (uint64x2_t)vextq_u8(
178 vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
181 __asm __volatile(
"ext %0.16b, %1.16b, %2.16b, %3 \n\t" 182 :
"=w" (r) :
"w" (a),
"w" (b),
"I" (C) );
187 #endif // CRYPTOPP_ARM_PMULL_AVAILABLE 189 #endif // CRYPTOPP_ARM_SIMD_H uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
Polynomial multiplication.
Library configuration file.
uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
Polynomial multiplication.
uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
Polynomial multiplication.
uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
Vector extraction.
uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
Polynomial multiplication.