Crypto++  8.2
Free C++ class library of cryptographic schemes
donna_64.cpp
1 // donna_64.cpp - written and placed in public domain by Jeffrey Walton
2 // Crypto++ specific implementation wrapped around Andrew
3 // Moon's public domain curve25519-donna and ed25519-donna,
4 // https://github.com/floodyberry/curve25519-donna and
5 // https://github.com/floodyberry/ed25519-donna.
6 
7 // The curve25519 and ed25519 source files multiplex different repos and
8 // architectures using namespaces. The repos are Andrew Moon's
9 // curve25519-donna and ed25519-donna. The architectures are 32-bit, 64-bit
10 // and SSE. For example, 32-bit x25519 uses symbols from Donna::X25519 and
11 // Donna::Arch32.
12 
13 // A fair amount of duplication happens below, but we could not directly
14 // use curve25519 for both x25519 and ed25519. A close examination reveals
15 // slight differences in the implementation. For example, look at the
16 // two curve25519_sub functions.
17 
18 // If needed, see Moon's commit "Go back to ignoring 256th bit [sic]",
19 // https://github.com/floodyberry/curve25519-donna/commit/57a683d18721a658
20 
21 #include "pch.h"
22 
23 #include "config.h"
24 #include "donna.h"
25 #include "secblock.h"
26 #include "sha.h"
27 #include "misc.h"
28 #include "cpu.h"
29 
30 #include <istream>
31 #include <sstream>
32 
33 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
34 # pragma GCC diagnostic ignored "-Wunused-function"
35 #endif
36 
37 // Squash MS LNK4221 and libtool warnings
38 extern const char DONNA64_FNAME[] = __FILE__;
39 
40 #if defined(CRYPTOPP_CURVE25519_64BIT)
41 
42 #include "donna_64.h"
43 
44 ANONYMOUS_NAMESPACE_BEGIN
45 
46 using CryptoPP::byte;
47 using CryptoPP::word64;
48 using CryptoPP::GetWord;
49 using CryptoPP::PutWord;
51 
52 inline word64 U8TO64_LE(const byte* p)
53 {
54  return GetWord<word64>(false, LITTLE_ENDIAN_ORDER, p);
55 }
56 
57 inline void U64TO8_LE(byte* p, word64 w)
58 {
59  PutWord(false, LITTLE_ENDIAN_ORDER, p, w);
60 }
61 
62 ANONYMOUS_NAMESPACE_END
63 
64 NAMESPACE_BEGIN(CryptoPP)
65 NAMESPACE_BEGIN(Donna)
66 NAMESPACE_BEGIN(X25519)
67 ANONYMOUS_NAMESPACE_BEGIN
68 
69 using CryptoPP::byte;
70 using CryptoPP::word32;
71 using CryptoPP::sword32;
72 using CryptoPP::word64;
73 using CryptoPP::sword64;
74 
75 using CryptoPP::GetBlock;
77 
78 // Bring in all the symbols from the 64-bit header
79 using namespace CryptoPP::Donna::Arch64;
80 
81 /* out = in */
82 inline void
83 curve25519_copy(bignum25519 out, const bignum25519 in) {
84  out[0] = in[0]; out[1] = in[1];
85  out[2] = in[2]; out[3] = in[3];
86  out[4] = in[4];
87 }
88 
89 /* out = a + b */
90 inline void
91 curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {
92  out[0] = a[0] + b[0];
93  out[1] = a[1] + b[1];
94  out[2] = a[2] + b[2];
95  out[3] = a[3] + b[3];
96  out[4] = a[4] + b[4];
97 }
98 
99 /* out = a - b */
100 inline void
101 curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {
102  out[0] = a[0] + two54m152 - b[0];
103  out[1] = a[1] + two54m8 - b[1];
104  out[2] = a[2] + two54m8 - b[2];
105  out[3] = a[3] + two54m8 - b[3];
106  out[4] = a[4] + two54m8 - b[4];
107 }
108 
109 /* out = (in * scalar) */
110 inline void
111 curve25519_scalar_product(bignum25519 out, const bignum25519 in, const word64 scalar) {
112  word128 a;
113  word64 c;
114 
115 #if defined(CRYPTOPP_WORD128_AVAILABLE)
116  a = ((word128) in[0]) * scalar; out[0] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);
117  a = ((word128) in[1]) * scalar + c; out[1] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);
118  a = ((word128) in[2]) * scalar + c; out[2] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);
119  a = ((word128) in[3]) * scalar + c; out[3] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);
120  a = ((word128) in[4]) * scalar + c; out[4] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);
121  out[0] += c * 19;
122 #else
123  mul64x64_128(a, in[0], scalar) out[0] = lo128(a) & reduce_mask_51; shr128(c, a, 51);
124  mul64x64_128(a, in[1], scalar) add128_64(a, c) out[1] = lo128(a) & reduce_mask_51; shr128(c, a, 51);
125  mul64x64_128(a, in[2], scalar) add128_64(a, c) out[2] = lo128(a) & reduce_mask_51; shr128(c, a, 51);
126  mul64x64_128(a, in[3], scalar) add128_64(a, c) out[3] = lo128(a) & reduce_mask_51; shr128(c, a, 51);
127  mul64x64_128(a, in[4], scalar) add128_64(a, c) out[4] = lo128(a) & reduce_mask_51; shr128(c, a, 51);
128  out[0] += c * 19;
129 #endif
130 }
131 
132 /* out = a * b */
133 inline void
134 curve25519_mul(bignum25519 out, const bignum25519 a, const bignum25519 b) {
135 #if !defined(CRYPTOPP_WORD128_AVAILABLE)
136  word128 mul;
137 #endif
138  word128 t[5];
139  word64 r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
140 
141  r0 = b[0]; r1 = b[1]; r2 = b[2]; r3 = b[3]; r4 = b[4];
142  s0 = a[0]; s1 = a[1]; s2 = a[2]; s3 = a[3]; s4 = a[4];
143 
144 #if defined(CRYPTOPP_WORD128_AVAILABLE)
145  t[0] = ((word128) r0) * s0;
146  t[1] = ((word128) r0) * s1 + ((word128) r1) * s0;
147  t[2] = ((word128) r0) * s2 + ((word128) r2) * s0 + ((word128) r1) * s1;
148  t[3] = ((word128) r0) * s3 + ((word128) r3) * s0 + ((word128) r1) * s2 + ((word128) r2) * s1;
149  t[4] = ((word128) r0) * s4 + ((word128) r4) * s0 + ((word128) r3) * s1 + ((word128) r1) * s3 + ((word128) r2) * s2;
150 #else
151  mul64x64_128(t[0], r0, s0)
152  mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul)
153  mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul)
154  mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul)
155  mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul)
156 #endif
157 
158  r1 *= 19; r2 *= 19; r3 *= 19; r4 *= 19;
159 
160 #if defined(CRYPTOPP_WORD128_AVAILABLE)
161  t[0] += ((word128) r4) * s1 + ((word128) r1) * s4 + ((word128) r2) * s3 + ((word128) r3) * s2;
162  t[1] += ((word128) r4) * s2 + ((word128) r2) * s4 + ((word128) r3) * s3;
163  t[2] += ((word128) r4) * s3 + ((word128) r3) * s4;
164  t[3] += ((word128) r4) * s4;
165 #else
166  mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul)
167  mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul)
168  mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul)
169  mul64x64_128(mul, r4, s4) add128(t[3], mul)
170 #endif
171 
172  r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
173  add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
174  add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
175  add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
176  add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
177  r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
178  r1 += c;
179 
180  out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;
181 }
182 
183 /* out = in^(2 * count) */
184 inline void
185 curve25519_square_times(bignum25519 out, const bignum25519 in, word64 count) {
186 #if !defined(CRYPTOPP_WORD128_AVAILABLE)
187  word128 mul;
188 #endif
189  word128 t[5];
190  word64 r0,r1,r2,r3,r4,c;
191  word64 d0,d1,d2,d4,d419;
192 
193  r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4];
194 
195  do {
196  d0 = r0 * 2; d1 = r1 * 2;
197  d2 = r2 * 2 * 19;
198  d419 = r4 * 19; d4 = d419 * 2;
199 
200 #if defined(CRYPTOPP_WORD128_AVAILABLE)
201  t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 ));
202  t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19));
203  t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 ));
204  t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 ));
205  t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 ));
206 #else
207  mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)
208  mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
209  mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)
210  mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)
211  mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)
212 #endif
213 
214  r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
215  add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
216  add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
217  add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
218  add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
219  r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
220  r1 += c;
221  } while(--count);
222 
223  out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;
224 }
225 
226 inline void
227 curve25519_square(bignum25519 out, const bignum25519 in) {
228 #if !defined(CRYPTOPP_WORD128_AVAILABLE)
229  word128 mul;
230 #endif
231  word128 t[5];
232  word64 r0,r1,r2,r3,r4,c;
233  word64 d0,d1,d2,d4,d419;
234 
235  r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4];
236 
237  d0 = r0 * 2; d1 = r1 * 2;
238  d2 = r2 * 2 * 19;
239  d419 = r4 * 19; d4 = d419 * 2;
240 
241 #if defined(CRYPTOPP_WORD128_AVAILABLE)
242  t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 ));
243  t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19));
244  t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 ));
245  t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 ));
246  t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 ));
247 #else
248  mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)
249  mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
250  mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)
251  mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)
252  mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)
253 #endif
254 
255  r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
256  add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
257  add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
258  add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
259  add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
260  r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
261  r1 += c;
262 
263  out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;
264 }
265 
266 /* Take a little-endian, 32-byte number and expand it into polynomial form */
267 inline void
268 curve25519_expand(bignum25519 out, const byte *in) {
269  word64 x0,x1,x2,x3;
271  block(x0)(x1)(x2)(x3);
272 
273  out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13);
274  out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26);
275  out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39);
276  out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12);
277  out[4] = x3 & reduce_mask_51; /* ignore the top bit */
278 }
279 
280 /* Take a fully reduced polynomial form number and contract it into a
281  * little-endian, 32-byte array
282  */
283 inline void
284 curve25519_contract(byte *out, const bignum25519 input) {
285  word64 t[5];
286  word64 f, i;
287 
288  t[0] = input[0];
289  t[1] = input[1];
290  t[2] = input[2];
291  t[3] = input[3];
292  t[4] = input[4];
293 
294  #define curve25519_contract_carry() \
295  t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \
296  t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \
297  t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \
298  t[4] += t[3] >> 51; t[3] &= reduce_mask_51;
299 
300  #define curve25519_contract_carry_full() curve25519_contract_carry() \
301  t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51;
302 
303  #define curve25519_contract_carry_final() curve25519_contract_carry() \
304  t[4] &= reduce_mask_51;
305 
306  curve25519_contract_carry_full()
307  curve25519_contract_carry_full()
308 
309  /* now t is between 0 and 2^255-1, properly carried. */
310  /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
311  t[0] += 19;
312  curve25519_contract_carry_full()
313 
314  /* now between 19 and 2^255-1 in both cases, and offset by 19. */
315  t[0] += 0x8000000000000 - 19;
316  t[1] += 0x8000000000000 - 1;
317  t[2] += 0x8000000000000 - 1;
318  t[3] += 0x8000000000000 - 1;
319  t[4] += 0x8000000000000 - 1;
320 
321  /* now between 2^255 and 2^256-20, and offset by 2^255. */
322  curve25519_contract_carry_final()
323 
324  #define write51full(n,shift) \
325  f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \
326  for (i = 0; i < 8; i++, f >>= 8) *out++ = (byte)f;
327  #define write51(n) write51full(n,13*n)
328 
329  write51(0)
330  write51(1)
331  write51(2)
332  write51(3)
333 
334  #undef curve25519_contract_carry
335  #undef curve25519_contract_carry_full
336  #undef curve25519_contract_carry_final
337  #undef write51full
338  #undef write51
339 }
340 
341 /*
342  * Swap the contents of [qx] and [qpx] iff @swap is non-zero
343  */
344 inline void
345 curve25519_swap_conditional(bignum25519 x, bignum25519 qpx, word64 iswap) {
346  const word64 swap = (word64)(-(sword64)iswap);
347  word64 x0,x1,x2,x3,x4;
348 
349  x0 = swap & (x[0] ^ qpx[0]); x[0] ^= x0; qpx[0] ^= x0;
350  x1 = swap & (x[1] ^ qpx[1]); x[1] ^= x1; qpx[1] ^= x1;
351  x2 = swap & (x[2] ^ qpx[2]); x[2] ^= x2; qpx[2] ^= x2;
352  x3 = swap & (x[3] ^ qpx[3]); x[3] ^= x3; qpx[3] ^= x3;
353  x4 = swap & (x[4] ^ qpx[4]); x[4] ^= x4; qpx[4] ^= x4;
354 }
355 
356 /*
357  * In: b = 2^5 - 2^0
358  * Out: b = 2^250 - 2^0
359  */
360 void
361 curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) {
362  ALIGN(16) bignum25519 t0,c;
363 
364  /* 2^5 - 2^0 */ /* b */
365  /* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5);
366  /* 2^10 - 2^0 */ curve25519_mul(b, t0, b);
367  /* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10);
368  /* 2^20 - 2^0 */ curve25519_mul(c, t0, b);
369  /* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20);
370  /* 2^40 - 2^0 */ curve25519_mul(t0, t0, c);
371  /* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10);
372  /* 2^50 - 2^0 */ curve25519_mul(b, t0, b);
373  /* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50);
374  /* 2^100 - 2^0 */ curve25519_mul(c, t0, b);
375  /* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100);
376  /* 2^200 - 2^0 */ curve25519_mul(t0, t0, c);
377  /* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50);
378  /* 2^250 - 2^0 */ curve25519_mul(b, t0, b);
379 }
380 
381 /*
382  * z^(p - 2) = z(2^255 - 21)
383  */
384 void
385 curve25519_recip(bignum25519 out, const bignum25519 z) {
386  ALIGN(16) bignum25519 a, t0, b;
387 
388  /* 2 */ curve25519_square(a, z); /* a = 2 */
389  /* 8 */ curve25519_square_times(t0, a, 2);
390  /* 9 */ curve25519_mul(b, t0, z); /* b = 9 */
391  /* 11 */ curve25519_mul(a, b, a); /* a = 11 */
392  /* 22 */ curve25519_square(t0, a);
393  /* 2^5 - 2^0 = 31 */ curve25519_mul(b, t0, b);
394  /* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);
395  /* 2^255 - 2^5 */ curve25519_square_times(b, b, 5);
396  /* 2^255 - 21 */ curve25519_mul(out, b, a);
397 }
398 
399 ANONYMOUS_NAMESPACE_END
400 NAMESPACE_END // X25519
401 NAMESPACE_END // Donna
402 NAMESPACE_END // CryptoPP
403 
404 //******************************* ed25519 *******************************//
405 
406 NAMESPACE_BEGIN(CryptoPP)
407 NAMESPACE_BEGIN(Donna)
408 NAMESPACE_BEGIN(Ed25519)
409 ANONYMOUS_NAMESPACE_BEGIN
410 
411 using CryptoPP::byte;
412 using CryptoPP::word32;
413 using CryptoPP::sword32;
414 using CryptoPP::word64;
415 using CryptoPP::sword64;
416 
417 using CryptoPP::GetBlock;
418 using CryptoPP::LittleEndian;
419 
420 using CryptoPP::SHA512;
421 
422 // Bring in all the symbols from the 64-bit header
423 using namespace CryptoPP::Donna::Arch64;
424 
425 /* out = in */
426 inline void
427 curve25519_copy(bignum25519 out, const bignum25519 in) {
428  out[0] = in[0]; out[1] = in[1];
429  out[2] = in[2]; out[3] = in[3];
430  out[4] = in[4];
431 }
432 
433 /* out = a + b */
434 inline void
435 curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {
436  out[0] = a[0] + b[0]; out[1] = a[1] + b[1];
437  out[2] = a[2] + b[2]; out[3] = a[3] + b[3];
438  out[4] = a[4] + b[4];
439 }
440 
441 /* out = a + b, where a and/or b are the result of a basic op (add,sub) */
442 inline void
443 curve25519_add_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
444  out[0] = a[0] + b[0]; out[1] = a[1] + b[1];
445  out[2] = a[2] + b[2]; out[3] = a[3] + b[3];
446  out[4] = a[4] + b[4];
447 }
448 
449 inline void
450 curve25519_add_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
451  word64 c;
452  out[0] = a[0] + b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
453  out[1] = a[1] + b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
454  out[2] = a[2] + b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
455  out[3] = a[3] + b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
456  out[4] = a[4] + b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
457  out[0] += c * 19;
458 }
459 
460 /* out = a - b */
461 inline void
462 curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {
463  out[0] = a[0] + twoP0 - b[0];
464  out[1] = a[1] + twoP1234 - b[1];
465  out[2] = a[2] + twoP1234 - b[2];
466  out[3] = a[3] + twoP1234 - b[3];
467  out[4] = a[4] + twoP1234 - b[4];
468 }
469 
470 /* out = a - b, where a and/or b are the result of a basic op (add,sub) */
471 inline void
472 curve25519_sub_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
473  out[0] = a[0] + fourP0 - b[0];
474  out[1] = a[1] + fourP1234 - b[1];
475  out[2] = a[2] + fourP1234 - b[2];
476  out[3] = a[3] + fourP1234 - b[3];
477  out[4] = a[4] + fourP1234 - b[4];
478 }
479 
480 inline void
481 curve25519_sub_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
482  word64 c;
483  out[0] = a[0] + fourP0 - b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
484  out[1] = a[1] + fourP1234 - b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
485  out[2] = a[2] + fourP1234 - b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
486  out[3] = a[3] + fourP1234 - b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
487  out[4] = a[4] + fourP1234 - b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
488  out[0] += c * 19;
489 }
490 
491 /* out = -a */
492 inline void
493 curve25519_neg(bignum25519 out, const bignum25519 a) {
494  word64 c;
495  out[0] = twoP0 - a[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
496  out[1] = twoP1234 - a[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
497  out[2] = twoP1234 - a[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
498  out[3] = twoP1234 - a[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
499  out[4] = twoP1234 - a[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
500  out[0] += c * 19;
501 }
502 
503 /* out = a * b */
504 inline void
505 curve25519_mul(bignum25519 out, const bignum25519 in2, const bignum25519 in) {
506 #if !defined(CRYPTOPP_WORD128_AVAILABLE)
507  word128 mul;
508 #endif
509  word128 t[5];
510  word64 r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
511 
512  r0 = in[0]; r1 = in[1];
513  r2 = in[2]; r3 = in[3];
514  r4 = in[4];
515 
516  s0 = in2[0]; s1 = in2[1];
517  s2 = in2[2]; s3 = in2[3];
518  s4 = in2[4];
519 
520 #if defined(CRYPTOPP_WORD128_AVAILABLE)
521  t[0] = ((word128) r0) * s0;
522  t[1] = ((word128) r0) * s1 + ((word128) r1) * s0;
523  t[2] = ((word128) r0) * s2 + ((word128) r2) * s0 + ((word128) r1) * s1;
524  t[3] = ((word128) r0) * s3 + ((word128) r3) * s0 + ((word128) r1) * s2 + ((word128) r2) * s1;
525  t[4] = ((word128) r0) * s4 + ((word128) r4) * s0 + ((word128) r3) * s1 + ((word128) r1) * s3 + ((word128) r2) * s2;
526 #else
527  mul64x64_128(t[0], r0, s0)
528  mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul)
529  mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul)
530  mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul)
531  mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul)
532 #endif
533 
534  r1 *= 19; r2 *= 19;
535  r3 *= 19; r4 *= 19;
536 
537 #if defined(CRYPTOPP_WORD128_AVAILABLE)
538  t[0] += ((word128) r4) * s1 + ((word128) r1) * s4 + ((word128) r2) * s3 + ((word128) r3) * s2;
539  t[1] += ((word128) r4) * s2 + ((word128) r2) * s4 + ((word128) r3) * s3;
540  t[2] += ((word128) r4) * s3 + ((word128) r3) * s4;
541  t[3] += ((word128) r4) * s4;
542 #else
543  mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul)
544  mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul)
545  mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul)
546  mul64x64_128(mul, r4, s4) add128(t[3], mul)
547 #endif
548 
549  r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
550  add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
551  add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
552  add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
553  add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
554  r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
555  r1 += c;
556 
557  out[0] = r0; out[1] = r1;
558  out[2] = r2; out[3] = r3;
559  out[4] = r4;
560 }
561 
562 void
563 curve25519_mul_noinline(bignum25519 out, const bignum25519 in2, const bignum25519 in) {
564  curve25519_mul(out, in2, in);
565 }
566 
567 /* out = in^(2 * count) */
568 void
569 curve25519_square_times(bignum25519 out, const bignum25519 in, word64 count) {
570 #if !defined(CRYPTOPP_WORD128_AVAILABLE)
571  word128 mul;
572 #endif
573  word128 t[5];
574  word64 r0,r1,r2,r3,r4,c;
575  word64 d0,d1,d2,d4,d419;
576 
577  r0 = in[0]; r1 = in[1];
578  r2 = in[2]; r3 = in[3];
579  r4 = in[4];
580 
581  do {
582  d0 = r0 * 2;
583  d1 = r1 * 2;
584  d2 = r2 * 2 * 19;
585  d419 = r4 * 19;
586  d4 = d419 * 2;
587 
588 #if defined(CRYPTOPP_WORD128_AVAILABLE)
589  t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 ));
590  t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19));
591  t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 ));
592  t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 ));
593  t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 ));
594 #else
595  mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)
596  mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
597  mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)
598  mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)
599  mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)
600 #endif
601 
602  r0 = lo128(t[0]) & reduce_mask_51;
603  r1 = lo128(t[1]) & reduce_mask_51; shl128(c, t[0], 13); r1 += c;
604  r2 = lo128(t[2]) & reduce_mask_51; shl128(c, t[1], 13); r2 += c;
605  r3 = lo128(t[3]) & reduce_mask_51; shl128(c, t[2], 13); r3 += c;
606  r4 = lo128(t[4]) & reduce_mask_51; shl128(c, t[3], 13); r4 += c;
607  shl128(c, t[4], 13); r0 += c * 19;
608  c = r0 >> 51; r0 &= reduce_mask_51;
609  r1 += c ; c = r1 >> 51; r1 &= reduce_mask_51;
610  r2 += c ; c = r2 >> 51; r2 &= reduce_mask_51;
611  r3 += c ; c = r3 >> 51; r3 &= reduce_mask_51;
612  r4 += c ; c = r4 >> 51; r4 &= reduce_mask_51;
613  r0 += c * 19;
614  } while(--count);
615 
616  out[0] = r0; out[1] = r1;
617  out[2] = r2; out[3] = r3;
618  out[4] = r4;
619 }
620 
621 inline void
622 curve25519_square(bignum25519 out, const bignum25519 in) {
623 #if !defined(CRYPTOPP_WORD128_AVAILABLE)
624  word128 mul;
625 #endif
626  word128 t[5];
627  word64 r0,r1,r2,r3,r4,c;
628  word64 d0,d1,d2,d4,d419;
629 
630  r0 = in[0]; r1 = in[1];
631  r2 = in[2]; r3 = in[3];
632  r4 = in[4];
633 
634  d0 = r0 * 2; d1 = r1 * 2;
635  d2 = r2 * 2 * 19;
636  d419 = r4 * 19;
637  d4 = d419 * 2;
638 
639 #if defined(CRYPTOPP_WORD128_AVAILABLE)
640  t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 ));
641  t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19));
642  t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 ));
643  t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 ));
644  t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 ));
645 #else
646  mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)
647  mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
648  mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)
649  mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)
650  mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)
651 #endif
652 
653  r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
654  add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
655  add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
656  add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
657  add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
658  r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
659  r1 += c;
660 
661  out[0] = r0; out[1] = r1;
662  out[2] = r2; out[3] = r3;
663  out[4] = r4;
664 }
665 
666 /* Take a little-endian, 32-byte number and expand it into polynomial form */
667 inline void
668 curve25519_expand(bignum25519 out, const byte *in) {
669  word64 x0,x1,x2,x3;
671  block(x0)(x1)(x2)(x3);
672 
673  out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13);
674  out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26);
675  out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39);
676  out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12);
677  out[4] = x3 & reduce_mask_51;
678 }
679 
680 /* Take a fully reduced polynomial form number and contract it into a
681  * little-endian, 32-byte array
682  */
683 inline void
684 curve25519_contract(byte *out, const bignum25519 input) {
685  word64 t[5];
686  word64 f, i;
687 
688  t[0] = input[0];
689  t[1] = input[1];
690  t[2] = input[2];
691  t[3] = input[3];
692  t[4] = input[4];
693 
694  #define curve25519_contract_carry() \
695  t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \
696  t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \
697  t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \
698  t[4] += t[3] >> 51; t[3] &= reduce_mask_51;
699 
700  #define curve25519_contract_carry_full() curve25519_contract_carry() \
701  t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51;
702 
703  #define curve25519_contract_carry_final() curve25519_contract_carry() \
704  t[4] &= reduce_mask_51;
705 
706  curve25519_contract_carry_full()
707  curve25519_contract_carry_full()
708 
709  /* now t is between 0 and 2^255-1, properly carried. */
710  /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
711  t[0] += 19;
712  curve25519_contract_carry_full()
713 
714  /* now between 19 and 2^255-1 in both cases, and offset by 19. */
715  t[0] += (reduce_mask_51 + 1) - 19;
716  t[1] += (reduce_mask_51 + 1) - 1;
717  t[2] += (reduce_mask_51 + 1) - 1;
718  t[3] += (reduce_mask_51 + 1) - 1;
719  t[4] += (reduce_mask_51 + 1) - 1;
720 
721  /* now between 2^255 and 2^256-20, and offset by 2^255. */
722  curve25519_contract_carry_final()
723 
724  #define write51full(n,shift) \
725  f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \
726  for (i = 0; i < 8; i++, f >>= 8) *out++ = (byte)f;
727  #define write51(n) write51full(n,13*n)
728  write51(0)
729  write51(1)
730  write51(2)
731  write51(3)
732 }
733 
734 #if !defined(ED25519_GCC_64BIT_CHOOSE)
735 
736 /* out = (flag) ? in : out */
737 inline void
738 curve25519_move_conditional_bytes(byte out[96], const byte in[96], word64 flag) {
739  const word64 nb = flag - 1, b = ~nb;
740  const word64 *inq = (const word64 *)in;
741  word64 *outq = (word64 *)out;
742  outq[0] = (outq[0] & nb) | (inq[0] & b);
743  outq[1] = (outq[1] & nb) | (inq[1] & b);
744  outq[2] = (outq[2] & nb) | (inq[2] & b);
745  outq[3] = (outq[3] & nb) | (inq[3] & b);
746  outq[4] = (outq[4] & nb) | (inq[4] & b);
747  outq[5] = (outq[5] & nb) | (inq[5] & b);
748  outq[6] = (outq[6] & nb) | (inq[6] & b);
749  outq[7] = (outq[7] & nb) | (inq[7] & b);
750  outq[8] = (outq[8] & nb) | (inq[8] & b);
751  outq[9] = (outq[9] & nb) | (inq[9] & b);
752  outq[10] = (outq[10] & nb) | (inq[10] & b);
753  outq[11] = (outq[11] & nb) | (inq[11] & b);
754 }
755 
756 /* if (iswap) swap(a, b) */
757 inline void
758 curve25519_swap_conditional(bignum25519 a, bignum25519 b, word64 iswap) {
759  const word64 swap = (word64)(-(sword64)iswap);
760  word64 x0,x1,x2,x3,x4;
761 
762  x0 = swap & (a[0] ^ b[0]); a[0] ^= x0; b[0] ^= x0;
763  x1 = swap & (a[1] ^ b[1]); a[1] ^= x1; b[1] ^= x1;
764  x2 = swap & (a[2] ^ b[2]); a[2] ^= x2; b[2] ^= x2;
765  x3 = swap & (a[3] ^ b[3]); a[3] ^= x3; b[3] ^= x3;
766  x4 = swap & (a[4] ^ b[4]); a[4] ^= x4; b[4] ^= x4;
767 }
768 
769 #endif /* ED25519_GCC_64BIT_CHOOSE */
770 
771 // ************************************************************************************
772 
773 inline void
774 ed25519_hash(byte *hash, const byte *in, size_t inlen) {
775  SHA512().CalculateDigest(hash, in, inlen);
776 }
777 
778 inline void
779 ed25519_extsk(hash_512bits extsk, const byte sk[32]) {
780  ed25519_hash(extsk, sk, 32);
781  extsk[0] &= 248;
782  extsk[31] &= 127;
783  extsk[31] |= 64;
784 }
785 
786 void
787 UpdateFromStream(HashTransformation& hash, std::istream& stream)
788 {
789  SecByteBlock block(4096);
790  while (stream.read((char*)block.begin(), block.size()))
791  hash.Update(block, block.size());
792 
793  std::streamsize rem = stream.gcount();
794  if (rem)
795  hash.Update(block, rem);
796 
797  block.SetMark(0);
798 }
799 
800 void
801 ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], const byte *m, size_t mlen) {
802  SHA512 hash;
803  hash.Update(RS, 32);
804  hash.Update(pk, 32);
805  hash.Update(m, mlen);
806  hash.Final(hram);
807 }
808 
809 void
810 ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], std::istream& stream) {
811  SHA512 hash;
812  hash.Update(RS, 32);
813  hash.Update(pk, 32);
814  UpdateFromStream(hash, stream);
815  hash.Final(hram);
816 }
817 
818 bignum256modm_element_t
819 lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {
820  return (a - b) >> 63;
821 }
822 
823 void
824 reduce256_modm(bignum256modm r) {
825  bignum256modm t;
826  bignum256modm_element_t b = 0, pb, mask;
827 
828  /* t = r - m */
829  pb = 0;
830  pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 56)); pb = b;
831  pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 56)); pb = b;
832  pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 56)); pb = b;
833  pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 56)); pb = b;
834  pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 32));
835 
836  /* keep r if r was smaller than m */
837  mask = b - 1;
838 
839  r[0] ^= mask & (r[0] ^ t[0]);
840  r[1] ^= mask & (r[1] ^ t[1]);
841  r[2] ^= mask & (r[2] ^ t[2]);
842  r[3] ^= mask & (r[3] ^ t[3]);
843  r[4] ^= mask & (r[4] ^ t[4]);
844 }
845 
846 void
847 barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {
848  bignum256modm q3, r2;
849  word128 c, mul;
850  bignum256modm_element_t f, b, pb;
851 
852  /* q1 = x >> 248 = 264 bits = 5 56 bit elements
853  q2 = mu * q1
854  q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264 */
855  mul64x64_128(c, modm_mu[0], q1[3]) mul64x64_128(mul, modm_mu[3], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[2]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[1]) add128(c, mul) shr128(f, c, 56);
856  mul64x64_128(c, modm_mu[0], q1[4]) add128_64(c, f) mul64x64_128(mul, modm_mu[4], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[1]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[2]) add128(c, mul)
857  f = lo128(c); q3[0] = (f >> 40) & 0xffff; shr128(f, c, 56);
858  mul64x64_128(c, modm_mu[4], q1[1]) add128_64(c, f) mul64x64_128(mul, modm_mu[1], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[2]) add128(c, mul)
859  f = lo128(c); q3[0] |= (f << 16) & 0xffffffffffffff; q3[1] = (f >> 40) & 0xffff; shr128(f, c, 56);
860  mul64x64_128(c, modm_mu[4], q1[2]) add128_64(c, f) mul64x64_128(mul, modm_mu[2], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[3]) add128(c, mul)
861  f = lo128(c); q3[1] |= (f << 16) & 0xffffffffffffff; q3[2] = (f >> 40) & 0xffff; shr128(f, c, 56);
862  mul64x64_128(c, modm_mu[4], q1[3]) add128_64(c, f) mul64x64_128(mul, modm_mu[3], q1[4]) add128(c, mul)
863  f = lo128(c); q3[2] |= (f << 16) & 0xffffffffffffff; q3[3] = (f >> 40) & 0xffff; shr128(f, c, 56);
864  mul64x64_128(c, modm_mu[4], q1[4]) add128_64(c, f)
865  f = lo128(c); q3[3] |= (f << 16) & 0xffffffffffffff; q3[4] = (f >> 40) & 0xffff; shr128(f, c, 56);
866  q3[4] |= (f << 16);
867 
868  mul64x64_128(c, modm_m[0], q3[0])
869  r2[0] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
870  mul64x64_128(c, modm_m[0], q3[1]) add128_64(c, f) mul64x64_128(mul, modm_m[1], q3[0]) add128(c, mul)
871  r2[1] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
872  mul64x64_128(c, modm_m[0], q3[2]) add128_64(c, f) mul64x64_128(mul, modm_m[2], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[1]) add128(c, mul)
873  r2[2] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
874  mul64x64_128(c, modm_m[0], q3[3]) add128_64(c, f) mul64x64_128(mul, modm_m[3], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[2]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[1]) add128(c, mul)
875  r2[3] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
876  mul64x64_128(c, modm_m[0], q3[4]) add128_64(c, f) mul64x64_128(mul, modm_m[4], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[3], q3[1]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[3]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[2]) add128(c, mul)
877  r2[4] = lo128(c) & 0x0000ffffffffff;
878 
879  pb = 0;
880  pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 56)); pb = b;
881  pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 56)); pb = b;
882  pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 56)); pb = b;
883  pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 56)); pb = b;
884  pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 40));
885 
886  reduce256_modm(r);
887  reduce256_modm(r);
888 }
889 
890 void
891 add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
892  bignum256modm_element_t c;
893 
894  c = x[0] + y[0]; r[0] = c & 0xffffffffffffff; c >>= 56;
895  c += x[1] + y[1]; r[1] = c & 0xffffffffffffff; c >>= 56;
896  c += x[2] + y[2]; r[2] = c & 0xffffffffffffff; c >>= 56;
897  c += x[3] + y[3]; r[3] = c & 0xffffffffffffff; c >>= 56;
898  c += x[4] + y[4]; r[4] = c;
899 
900  reduce256_modm(r);
901 }
902 
903 void
904 mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
905  bignum256modm q1, r1;
906  word128 c, mul;
907  bignum256modm_element_t f;
908 
909  mul64x64_128(c, x[0], y[0])
910  f = lo128(c); r1[0] = f & 0xffffffffffffff; shr128(f, c, 56);
911  mul64x64_128(c, x[0], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[0]) add128(c, mul)
912  f = lo128(c); r1[1] = f & 0xffffffffffffff; shr128(f, c, 56);
913  mul64x64_128(c, x[0], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[1]) add128(c, mul)
914  f = lo128(c); r1[2] = f & 0xffffffffffffff; shr128(f, c, 56);
915  mul64x64_128(c, x[0], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[2]) add128(c, mul) mul64x64_128(mul, x[2], y[1]) add128(c, mul)
916  f = lo128(c); r1[3] = f & 0xffffffffffffff; shr128(f, c, 56);
917  mul64x64_128(c, x[0], y[4]) add128_64(c, f) mul64x64_128(mul, x[4], y[0]) add128(c, mul) mul64x64_128(mul, x[3], y[1]) add128(c, mul) mul64x64_128(mul, x[1], y[3]) add128(c, mul) mul64x64_128(mul, x[2], y[2]) add128(c, mul)
918  f = lo128(c); r1[4] = f & 0x0000ffffffffff; q1[0] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
919  mul64x64_128(c, x[4], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[4]) add128(c, mul) mul64x64_128(mul, x[2], y[3]) add128(c, mul) mul64x64_128(mul, x[3], y[2]) add128(c, mul)
920  f = lo128(c); q1[0] |= (f << 32) & 0xffffffffffffff; q1[1] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
921  mul64x64_128(c, x[4], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[4]) add128(c, mul) mul64x64_128(mul, x[3], y[3]) add128(c, mul)
922  f = lo128(c); q1[1] |= (f << 32) & 0xffffffffffffff; q1[2] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
923  mul64x64_128(c, x[4], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[4]) add128(c, mul)
924  f = lo128(c); q1[2] |= (f << 32) & 0xffffffffffffff; q1[3] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
925  mul64x64_128(c, x[4], y[4]) add128_64(c, f)
926  f = lo128(c); q1[3] |= (f << 32) & 0xffffffffffffff; q1[4] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
927  q1[4] |= (f << 32);
928 
929  barrett_reduce256_modm(r, q1, r1);
930 }
931 
932 void
933 expand256_modm(bignum256modm out, const byte *in, size_t len) {
934  byte work[64] = {0};
935  bignum256modm_element_t x[16];
936  bignum256modm q1;
937 
938  memcpy(work, in, len);
939  x[0] = U8TO64_LE(work + 0);
940  x[1] = U8TO64_LE(work + 8);
941  x[2] = U8TO64_LE(work + 16);
942  x[3] = U8TO64_LE(work + 24);
943  x[4] = U8TO64_LE(work + 32);
944  x[5] = U8TO64_LE(work + 40);
945  x[6] = U8TO64_LE(work + 48);
946  x[7] = U8TO64_LE(work + 56);
947 
948  /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */
949  out[0] = ( x[0]) & 0xffffffffffffff;
950  out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;
951  out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
952  out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
953  out[4] = ((x[ 3] >> 32) | (x[ 4] << 32)) & 0x0000ffffffffff;
954 
955  /* under 252 bits, no need to reduce */
956  if (len < 32)
957  return;
958 
959  /* q1 = x >> 248 = 264 bits */
960  q1[0] = ((x[ 3] >> 56) | (x[ 4] << 8)) & 0xffffffffffffff;
961  q1[1] = ((x[ 4] >> 48) | (x[ 5] << 16)) & 0xffffffffffffff;
962  q1[2] = ((x[ 5] >> 40) | (x[ 6] << 24)) & 0xffffffffffffff;
963  q1[3] = ((x[ 6] >> 32) | (x[ 7] << 32)) & 0xffffffffffffff;
964  q1[4] = ((x[ 7] >> 24) );
965 
966  barrett_reduce256_modm(out, q1, out);
967 }
968 
969 void
970 expand_raw256_modm(bignum256modm out, const byte in[32]) {
971  bignum256modm_element_t x[4];
972 
973  x[0] = U8TO64_LE(in + 0);
974  x[1] = U8TO64_LE(in + 8);
975  x[2] = U8TO64_LE(in + 16);
976  x[3] = U8TO64_LE(in + 24);
977 
978  out[0] = ( x[0]) & 0xffffffffffffff;
979  out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;
980  out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
981  out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
982  out[4] = ((x[ 3] >> 32) ) & 0x000000ffffffff;
983 }
984 
985 void
986 contract256_modm(byte out[32], const bignum256modm in) {
987  U64TO8_LE(out + 0, (in[0] ) | (in[1] << 56));
988  U64TO8_LE(out + 8, (in[1] >> 8) | (in[2] << 48));
989  U64TO8_LE(out + 16, (in[2] >> 16) | (in[3] << 40));
990  U64TO8_LE(out + 24, (in[3] >> 24) | (in[4] << 32));
991 }
992 
993 void
994 contract256_window4_modm(signed char r[64], const bignum256modm in) {
995  char carry;
996  signed char *quads = r;
997  bignum256modm_element_t i, j, v, m;
998 
999  for (i = 0; i < 5; i++) {
1000  v = in[i];
1001  m = (i == 4) ? 8 : 14;
1002  for (j = 0; j < m; j++) {
1003  *quads++ = (v & 15);
1004  v >>= 4;
1005  }
1006  }
1007 
1008  /* making it signed */
1009  carry = 0;
1010  for(i = 0; i < 63; i++) {
1011  r[i] += carry;
1012  r[i+1] += (r[i] >> 4);
1013  r[i] &= 15;
1014  carry = (r[i] >> 3);
1015  r[i] -= (carry << 4);
1016  }
1017  r[63] += carry;
1018 }
1019 
1020 void
1021 contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {
1022  int i,j,k,b;
1023  int m = (1 << (windowsize - 1)) - 1, soplen = 256;
1024  signed char *bits = r;
1025  bignum256modm_element_t v;
1026 
1027  /* first put the binary expansion into r */
1028  for (i = 0; i < 4; i++) {
1029  v = s[i];
1030  for (j = 0; j < 56; j++, v >>= 1)
1031  *bits++ = (v & 1);
1032  }
1033  v = s[4];
1034  for (j = 0; j < 32; j++, v >>= 1)
1035  *bits++ = (v & 1);
1036 
1037  /* Making it sliding window */
1038  for (j = 0; j < soplen; j++) {
1039  if (!r[j])
1040  continue;
1041 
1042  for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
1043  if ((r[j] + (r[j + b] << b)) <= m) {
1044  r[j] += r[j + b] << b;
1045  r[j + b] = 0;
1046  } else if ((r[j] - (r[j + b] << b)) >= -m) {
1047  r[j] -= r[j + b] << b;
1048  for (k = j + b; k < soplen; k++) {
1049  if (!r[k]) {
1050  r[k] = 1;
1051  break;
1052  }
1053  r[k] = 0;
1054  }
1055  } else if (r[j + b]) {
1056  break;
1057  }
1058  }
1059  }
1060 }
1061 
1062 /*
1063  * In: b = 2^5 - 2^0
1064  * Out: b = 2^250 - 2^0
1065  */
1066 void
1067 curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) {
1068  ALIGN(16) bignum25519 t0,c;
1069 
1070  /* 2^5 - 2^0 */ /* b */
1071  /* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5);
1072  /* 2^10 - 2^0 */ curve25519_mul_noinline(b, t0, b);
1073  /* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10);
1074  /* 2^20 - 2^0 */ curve25519_mul_noinline(c, t0, b);
1075  /* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20);
1076  /* 2^40 - 2^0 */ curve25519_mul_noinline(t0, t0, c);
1077  /* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10);
1078  /* 2^50 - 2^0 */ curve25519_mul_noinline(b, t0, b);
1079  /* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50);
1080  /* 2^100 - 2^0 */ curve25519_mul_noinline(c, t0, b);
1081  /* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100);
1082  /* 2^200 - 2^0 */ curve25519_mul_noinline(t0, t0, c);
1083  /* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50);
1084  /* 2^250 - 2^0 */ curve25519_mul_noinline(b, t0, b);
1085 }
1086 
1087 /*
1088  * z^(p - 2) = z(2^255 - 21)
1089  */
1090 void
1091 curve25519_recip(bignum25519 out, const bignum25519 z) {
1092  ALIGN(16) bignum25519 a,t0,b;
1093 
1094  /* 2 */ curve25519_square_times(a, z, 1); /* a = 2 */
1095  /* 8 */ curve25519_square_times(t0, a, 2);
1096  /* 9 */ curve25519_mul_noinline(b, t0, z); /* b = 9 */
1097  /* 11 */ curve25519_mul_noinline(a, b, a); /* a = 11 */
1098  /* 22 */ curve25519_square_times(t0, a, 1);
1099  /* 2^5 - 2^0 = 31 */ curve25519_mul_noinline(b, t0, b);
1100  /* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);
1101  /* 2^255 - 2^5 */ curve25519_square_times(b, b, 5);
1102  /* 2^255 - 21 */ curve25519_mul_noinline(out, b, a);
1103 }
1104 
1105 /*
1106  * z^((p-5)/8) = z^(2^252 - 3)
1107  */
1108 void
1109 curve25519_pow_two252m3(bignum25519 two252m3, const bignum25519 z) {
1110  ALIGN(16) bignum25519 b,c,t0;
1111 
1112  /* 2 */ curve25519_square_times(c, z, 1); /* c = 2 */
1113  /* 8 */ curve25519_square_times(t0, c, 2); /* t0 = 8 */
1114  /* 9 */ curve25519_mul_noinline(b, t0, z); /* b = 9 */
1115  /* 11 */ curve25519_mul_noinline(c, b, c); /* c = 11 */
1116  /* 22 */ curve25519_square_times(t0, c, 1);
1117  /* 2^5 - 2^0 = 31 */ curve25519_mul_noinline(b, t0, b);
1118  /* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);
1119  /* 2^252 - 2^2 */ curve25519_square_times(b, b, 2);
1120  /* 2^252 - 3 */ curve25519_mul_noinline(two252m3, b, z);
1121 }
1122 
1123 inline void
1124 ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) {
1125  curve25519_mul(r->x, p->x, p->t);
1126  curve25519_mul(r->y, p->y, p->z);
1127  curve25519_mul(r->z, p->z, p->t);
1128 }
1129 
1130 inline void
1131 ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) {
1132  curve25519_mul(r->x, p->x, p->t);
1133  curve25519_mul(r->y, p->y, p->z);
1134  curve25519_mul(r->z, p->z, p->t);
1135  curve25519_mul(r->t, p->x, p->y);
1136 }
1137 
1138 void
1139 ge25519_full_to_pniels(ge25519_pniels *p, const ge25519 *r) {
1140  curve25519_sub(p->ysubx, r->y, r->x);
1141  curve25519_add(p->xaddy, r->y, r->x);
1142  curve25519_copy(p->z, r->z);
1143  curve25519_mul(p->t2d, r->t, ge25519_ec2d);
1144 }
1145 
1146 void
1147 ge25519_add_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519 *q) {
1148  bignum25519 a,b,c,d,t,u;
1149 
1150  curve25519_sub(a, p->y, p->x);
1151  curve25519_add(b, p->y, p->x);
1152  curve25519_sub(t, q->y, q->x);
1153  curve25519_add(u, q->y, q->x);
1154  curve25519_mul(a, a, t);
1155  curve25519_mul(b, b, u);
1156  curve25519_mul(c, p->t, q->t);
1157  curve25519_mul(c, c, ge25519_ec2d);
1158  curve25519_mul(d, p->z, q->z);
1159  curve25519_add(d, d, d);
1160  curve25519_sub(r->x, b, a);
1161  curve25519_add(r->y, b, a);
1162  curve25519_add_after_basic(r->z, d, c);
1163  curve25519_sub_after_basic(r->t, d, c);
1164 }
1165 
1166 void
1167 ge25519_double_p1p1(ge25519_p1p1 *r, const ge25519 *p) {
1168  bignum25519 a,b,c;
1169 
1170  curve25519_square(a, p->x);
1171  curve25519_square(b, p->y);
1172  curve25519_square(c, p->z);
1173  curve25519_add_reduce(c, c, c);
1174  curve25519_add(r->x, p->x, p->y);
1175  curve25519_square(r->x, r->x);
1176  curve25519_add(r->y, b, a);
1177  curve25519_sub(r->z, b, a);
1178  curve25519_sub_after_basic(r->x, r->x, r->y);
1179  curve25519_sub_after_basic(r->t, c, r->z);
1180 }
1181 
1182 void
1183 ge25519_nielsadd2_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_niels *q, byte signbit) {
1184  const bignum25519 *qb = (const bignum25519 *)q;
1185  bignum25519 *rb = (bignum25519 *)r;
1186  bignum25519 a,b,c;
1187 
1188  curve25519_sub(a, p->y, p->x);
1189  curve25519_add(b, p->y, p->x);
1190  curve25519_mul(a, a, qb[signbit]); /* x for +, y for - */
1191  curve25519_mul(r->x, b, qb[signbit^1]); /* y for +, x for - */
1192  curve25519_add(r->y, r->x, a);
1193  curve25519_sub(r->x, r->x, a);
1194  curve25519_mul(c, p->t, q->t2d);
1195  curve25519_add_reduce(r->t, p->z, p->z);
1196  curve25519_copy(r->z, r->t);
1197  curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */
1198  curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */
1199 }
1200 
1201 void
1202 ge25519_pnielsadd_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_pniels *q, byte signbit) {
1203  const bignum25519 *qb = (const bignum25519 *)q;
1204  bignum25519 *rb = (bignum25519 *)r;
1205  bignum25519 a,b,c;
1206 
1207  curve25519_sub(a, p->y, p->x);
1208  curve25519_add(b, p->y, p->x);
1209  curve25519_mul(a, a, qb[signbit]); /* ysubx for +, xaddy for - */
1210  curve25519_mul(r->x, b, qb[signbit^1]); /* xaddy for +, ysubx for - */
1211  curve25519_add(r->y, r->x, a);
1212  curve25519_sub(r->x, r->x, a);
1213  curve25519_mul(c, p->t, q->t2d);
1214  curve25519_mul(r->t, p->z, q->z);
1215  curve25519_add_reduce(r->t, r->t, r->t);
1216  curve25519_copy(r->z, r->t);
1217  curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */
1218  curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */
1219 }
1220 
1221 void
1222 ge25519_double_partial(ge25519 *r, const ge25519 *p) {
1223  ge25519_p1p1 t;
1224  ge25519_double_p1p1(&t, p);
1225  ge25519_p1p1_to_partial(r, &t);
1226 }
1227 
1228 void
1229 ge25519_double(ge25519 *r, const ge25519 *p) {
1230  ge25519_p1p1 t;
1231  ge25519_double_p1p1(&t, p);
1232  ge25519_p1p1_to_full(r, &t);
1233 }
1234 
1235 void
1236 ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q) {
1237  ge25519_p1p1 t;
1238  ge25519_add_p1p1(&t, p, q);
1239  ge25519_p1p1_to_full(r, &t);
1240 }
1241 
1242 void
1243 ge25519_nielsadd2(ge25519 *r, const ge25519_niels *q) {
1244  bignum25519 a,b,c,e,f,g,h;
1245 
1246  curve25519_sub(a, r->y, r->x);
1247  curve25519_add(b, r->y, r->x);
1248  curve25519_mul(a, a, q->ysubx);
1249  curve25519_mul(e, b, q->xaddy);
1250  curve25519_add(h, e, a);
1251  curve25519_sub(e, e, a);
1252  curve25519_mul(c, r->t, q->t2d);
1253  curve25519_add(f, r->z, r->z);
1254  curve25519_add_after_basic(g, f, c);
1255  curve25519_sub_after_basic(f, f, c);
1256  curve25519_mul(r->x, e, f);
1257  curve25519_mul(r->y, h, g);
1258  curve25519_mul(r->z, g, f);
1259  curve25519_mul(r->t, e, h);
1260 }
1261 
1262 void
1263 ge25519_pnielsadd(ge25519_pniels *r, const ge25519 *p, const ge25519_pniels *q) {
1264  bignum25519 a,b,c,x,y,z,t;
1265 
1266  curve25519_sub(a, p->y, p->x);
1267  curve25519_add(b, p->y, p->x);
1268  curve25519_mul(a, a, q->ysubx);
1269  curve25519_mul(x, b, q->xaddy);
1270  curve25519_add(y, x, a);
1271  curve25519_sub(x, x, a);
1272  curve25519_mul(c, p->t, q->t2d);
1273  curve25519_mul(t, p->z, q->z);
1274  curve25519_add(t, t, t);
1275  curve25519_add_after_basic(z, t, c);
1276  curve25519_sub_after_basic(t, t, c);
1277  curve25519_mul(r->xaddy, x, t);
1278  curve25519_mul(r->ysubx, y, z);
1279  curve25519_mul(r->z, z, t);
1280  curve25519_mul(r->t2d, x, y);
1281  curve25519_copy(y, r->ysubx);
1282  curve25519_sub(r->ysubx, r->ysubx, r->xaddy);
1283  curve25519_add(r->xaddy, r->xaddy, y);
1284  curve25519_mul(r->t2d, r->t2d, ge25519_ec2d);
1285 }
1286 
1287 void
1288 ge25519_pack(byte r[32], const ge25519 *p) {
1289  bignum25519 tx, ty, zi;
1290  byte parity[32];
1291  curve25519_recip(zi, p->z);
1292  curve25519_mul(tx, p->x, zi);
1293  curve25519_mul(ty, p->y, zi);
1294  curve25519_contract(r, ty);
1295  curve25519_contract(parity, tx);
1296  r[31] ^= ((parity[0] & 1) << 7);
1297 }
1298 
1299 int
1300 ed25519_verify(const byte *x, const byte *y, size_t len) {
1301  size_t differentbits = 0;
1302  while (len--)
1303  differentbits |= (*x++ ^ *y++);
1304  return (int) (1 & ((differentbits - 1) >> 8));
1305 }
1306 
1307 int
1308 ge25519_unpack_negative_vartime(ge25519 *r, const byte p[32]) {
1309  const byte zero[32] = {0};
1310  const bignum25519 one = {1};
1311  byte parity = p[31] >> 7;
1312  byte check[32];
1313  bignum25519 t, root, num, den, d3;
1314 
1315  curve25519_expand(r->y, p);
1316  curve25519_copy(r->z, one);
1317  curve25519_square(num, r->y); /* x = y^2 */
1318  curve25519_mul(den, num, ge25519_ecd); /* den = dy^2 */
1319  curve25519_sub_reduce(num, num, r->z); /* x = y^1 - 1 */
1320  curve25519_add(den, den, r->z); /* den = dy^2 + 1 */
1321 
1322  /* Computation of sqrt(num/den) */
1323  /* 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) */
1324  curve25519_square(t, den);
1325  curve25519_mul(d3, t, den);
1326  curve25519_square(r->x, d3);
1327  curve25519_mul(r->x, r->x, den);
1328  curve25519_mul(r->x, r->x, num);
1329  curve25519_pow_two252m3(r->x, r->x);
1330 
1331  /* 2. computation of r->x = num * den^3 * (num*den^7)^((p-5)/8) */
1332  curve25519_mul(r->x, r->x, d3);
1333  curve25519_mul(r->x, r->x, num);
1334 
1335  /* 3. Check if either of the roots works: */
1336  curve25519_square(t, r->x);
1337  curve25519_mul(t, t, den);
1338  curve25519_sub_reduce(root, t, num);
1339  curve25519_contract(check, root);
1340  if (!ed25519_verify(check, zero, 32)) {
1341  curve25519_add_reduce(t, t, num);
1342  curve25519_contract(check, t);
1343  if (!ed25519_verify(check, zero, 32))
1344  return 0;
1345  curve25519_mul(r->x, r->x, ge25519_sqrtneg1);
1346  }
1347 
1348  curve25519_contract(check, r->x);
1349  if ((check[0] & 1) == parity) {
1350  curve25519_copy(t, r->x);
1351  curve25519_neg(r->x, t);
1352  }
1353  curve25519_mul(r->t, r->x, r->y);
1354  return 1;
1355 }
1356 
1357 /* computes [s1]p1 + [s2]basepoint */
1358 void
1359 ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1, const bignum256modm s2) {
1360  signed char slide1[256], slide2[256];
1361  ge25519_pniels pre1[S1_TABLE_SIZE];
1362  ge25519 d1;
1363  ge25519_p1p1 t;
1364  sword32 i;
1365 
1366  contract256_slidingwindow_modm(slide1, s1, S1_SWINDOWSIZE);
1367  contract256_slidingwindow_modm(slide2, s2, S2_SWINDOWSIZE);
1368 
1369  ge25519_double(&d1, p1);
1370  ge25519_full_to_pniels(pre1, p1);
1371  for (i = 0; i < S1_TABLE_SIZE - 1; i++)
1372  ge25519_pnielsadd(&pre1[i+1], &d1, &pre1[i]);
1373 
1374  /* set neutral */
1375  memset(r, 0, sizeof(ge25519));
1376  r->y[0] = 1;
1377  r->z[0] = 1;
1378 
1379  i = 255;
1380  while ((i >= 0) && !(slide1[i] | slide2[i]))
1381  i--;
1382 
1383  for (; i >= 0; i--) {
1384  ge25519_double_p1p1(&t, r);
1385 
1386  if (slide1[i]) {
1387  ge25519_p1p1_to_full(r, &t);
1388  ge25519_pnielsadd_p1p1(&t, r, &pre1[abs(slide1[i]) / 2], (byte)slide1[i] >> 7);
1389  }
1390 
1391  if (slide2[i]) {
1392  ge25519_p1p1_to_full(r, &t);
1393  ge25519_nielsadd2_p1p1(&t, r, &ge25519_niels_sliding_multiples[abs(slide2[i]) / 2], (byte)slide2[i] >> 7);
1394  }
1395 
1396  ge25519_p1p1_to_partial(r, &t);
1397  }
1398 }
1399 
1400 #if !defined(HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS)
1401 
1402 word32
1403 ge25519_windowb_equal(word32 b, word32 c) {
1404  return ((b ^ c) - 1) >> 31;
1405 }
1406 
1407 void
1408 ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const byte table[256][96], word32 pos, signed char b) {
1409  bignum25519 neg;
1410  word32 sign = (word32)((byte)b >> 7);
1411  word32 mask = ~(sign - 1);
1412  word32 u = (b + mask) ^ mask;
1413  word32 i;
1414 
1415  /* ysubx, xaddy, t2d in packed form. initialize to ysubx = 1, xaddy = 1, t2d = 0 */
1416  byte packed[96] = {0};
1417  packed[0] = 1;
1418  packed[32] = 1;
1419 
1420  for (i = 0; i < 8; i++)
1421  curve25519_move_conditional_bytes(packed, table[(pos * 8) + i], ge25519_windowb_equal(u, i + 1));
1422 
1423  /* expand in to t */
1424  curve25519_expand(t->ysubx, packed + 0);
1425  curve25519_expand(t->xaddy, packed + 32);
1426  curve25519_expand(t->t2d , packed + 64);
1427 
1428  /* adjust for sign */
1429  curve25519_swap_conditional(t->ysubx, t->xaddy, sign);
1430  curve25519_neg(neg, t->t2d);
1431  curve25519_swap_conditional(t->t2d, neg, sign);
1432 }
1433 
1434 #endif /* HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS */
1435 
1436 /* computes [s]basepoint */
1437 void
1438 ge25519_scalarmult_base_niels(ge25519 *r, const byte basepoint_table[256][96], const bignum256modm s) {
1439  signed char b[64];
1440  word32 i;
1441  ge25519_niels t;
1442 
1443  contract256_window4_modm(b, s);
1444 
1445  ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[1]);
1446  curve25519_sub_reduce(r->x, t.xaddy, t.ysubx);
1447  curve25519_add_reduce(r->y, t.xaddy, t.ysubx);
1448  memset(r->z, 0, sizeof(bignum25519));
1449  curve25519_copy(r->t, t.t2d);
1450  r->z[0] = 2;
1451  for (i = 3; i < 64; i += 2) {
1452  ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]);
1453  ge25519_nielsadd2(r, &t);
1454  }
1455  ge25519_double_partial(r, r);
1456  ge25519_double_partial(r, r);
1457  ge25519_double_partial(r, r);
1458  ge25519_double(r, r);
1459  ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[0]);
1460  curve25519_mul(t.t2d, t.t2d, ge25519_ecd);
1461  ge25519_nielsadd2(r, &t);
1462  for(i = 2; i < 64; i += 2) {
1463  ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]);
1464  ge25519_nielsadd2(r, &t);
1465  }
1466 }
1467 
1468 ANONYMOUS_NAMESPACE_END
1469 NAMESPACE_END // Ed25519
1470 NAMESPACE_END // Donna
1471 NAMESPACE_END // CryptoPP
1472 
1473 //***************************** curve25519 *****************************//
1474 
1475 NAMESPACE_BEGIN(CryptoPP)
1476 NAMESPACE_BEGIN(Donna)
1477 
1478 int curve25519_mult_CXX(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32])
1479 {
1480  using namespace CryptoPP::Donna::X25519;
1481 
1483  for (size_t i = 0;i < 32;++i)
1484  e[i] = secretKey[i];
1485  e[0] &= 0xf8; e[31] &= 0x7f; e[31] |= 0x40;
1486 
1487  bignum25519 nqpqx = {1}, nqpqz = {0}, nqz = {1}, nqx;
1488  bignum25519 q, qx, qpqx, qqx, zzz, zmone;
1489  size_t bit, lastbit;
1490 
1491  curve25519_expand(q, othersKey);
1492  curve25519_copy(nqx, q);
1493 
1494  /* bit 255 is always 0, and bit 254 is always 1, so skip bit 255 and
1495  start pre-swapped on bit 254 */
1496  lastbit = 1;
1497 
1498  /* we are doing bits 254..3 in the loop, but are swapping in bits 253..2 */
1499  for (int i = 253; i >= 2; i--) {
1500  curve25519_add(qx, nqx, nqz);
1501  curve25519_sub(nqz, nqx, nqz);
1502  curve25519_add(qpqx, nqpqx, nqpqz);
1503  curve25519_sub(nqpqz, nqpqx, nqpqz);
1504  curve25519_mul(nqpqx, qpqx, nqz);
1505  curve25519_mul(nqpqz, qx, nqpqz);
1506  curve25519_add(qqx, nqpqx, nqpqz);
1507  curve25519_sub(nqpqz, nqpqx, nqpqz);
1508  curve25519_square(nqpqz, nqpqz);
1509  curve25519_square(nqpqx, qqx);
1510  curve25519_mul(nqpqz, nqpqz, q);
1511  curve25519_square(qx, qx);
1512  curve25519_square(nqz, nqz);
1513  curve25519_mul(nqx, qx, nqz);
1514  curve25519_sub(nqz, qx, nqz);
1515  curve25519_scalar_product(zzz, nqz, 121665);
1516  curve25519_add(zzz, zzz, qx);
1517  curve25519_mul(nqz, nqz, zzz);
1518 
1519  bit = (e[i/8] >> (i & 7)) & 1;
1520  curve25519_swap_conditional(nqx, nqpqx, bit ^ lastbit);
1521  curve25519_swap_conditional(nqz, nqpqz, bit ^ lastbit);
1522  lastbit = bit;
1523  }
1524 
1525  /* the final 3 bits are always zero, so we only need to double */
1526  for (int i = 0; i < 3; i++) {
1527  curve25519_add(qx, nqx, nqz);
1528  curve25519_sub(nqz, nqx, nqz);
1529  curve25519_square(qx, qx);
1530  curve25519_square(nqz, nqz);
1531  curve25519_mul(nqx, qx, nqz);
1532  curve25519_sub(nqz, qx, nqz);
1533  curve25519_scalar_product(zzz, nqz, 121665);
1534  curve25519_add(zzz, zzz, qx);
1535  curve25519_mul(nqz, nqz, zzz);
1536  }
1537 
1538  curve25519_recip(zmone, nqz);
1539  curve25519_mul(nqz, nqx, zmone);
1540  curve25519_contract(sharedKey, nqz);
1541 
1542  return 0;
1543 }
1544 
1545 int curve25519_mult(byte publicKey[32], const byte secretKey[32])
1546 {
1547  using namespace CryptoPP::Donna::X25519;
1548 
1549 #if (CRYPTOPP_CURVE25519_SSE2)
1550  if (HasSSE2())
1551  return curve25519_mult_SSE2(publicKey, secretKey, basePoint);
1552  else
1553 #endif
1554 
1555  return curve25519_mult_CXX(publicKey, secretKey, basePoint);
1556 }
1557 
1558 int curve25519_mult(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32])
1559 {
1560 #if (CRYPTOPP_CURVE25519_SSE2)
1561  if (HasSSE2())
1562  return curve25519_mult_SSE2(sharedKey, secretKey, othersKey);
1563  else
1564 #endif
1565 
1566  return curve25519_mult_CXX(sharedKey, secretKey, othersKey);
1567 }
1568 
1569 NAMESPACE_END // Donna
1570 NAMESPACE_END // CryptoPP
1571 
1572 //******************************* ed25519 *******************************//
1573 
1574 NAMESPACE_BEGIN(CryptoPP)
1575 NAMESPACE_BEGIN(Donna)
1576 
1577 int
1578 ed25519_publickey_CXX(byte publicKey[32], const byte secretKey[32])
1579 {
1580  using namespace CryptoPP::Donna::Ed25519;
1581 
1582  bignum256modm a;
1583  ALIGN(16) ge25519 A;
1584  hash_512bits extsk;
1585 
1586  /* A = aB */
1587  ed25519_extsk(extsk, secretKey);
1588  expand256_modm(a, extsk, 32);
1589  ge25519_scalarmult_base_niels(&A, ge25519_niels_base_multiples, a);
1590  ge25519_pack(publicKey, &A);
1591 
1592  return 0;
1593 }
1594 
1595 int
1596 ed25519_publickey(byte publicKey[32], const byte secretKey[32])
1597 {
1598  return ed25519_publickey_CXX(publicKey, secretKey);
1599 }
1600 
1601 int
1602 ed25519_sign_CXX(std::istream& stream, const byte sk[32], const byte pk[32], byte RS[64])
1603 {
1604  using namespace CryptoPP::Donna::Ed25519;
1605 
1606  bignum256modm r, S, a;
1607  ALIGN(16) ge25519 R;
1608  hash_512bits extsk, hashr, hram;
1609 
1610  // Unfortunately we need to read the stream twice. The fisrt time calculates
1611  // 'r = H(aExt[32..64], m)'. The second time calculates 'S = H(R,A,m)'. There
1612  // is a data dependency due to hashing 'RS' with 'R = [r]B' that does not
1613  // allow us to read the stream once.
1614  std::streampos where = stream.tellg();
1615 
1616  ed25519_extsk(extsk, sk);
1617 
1618  /* r = H(aExt[32..64], m) */
1619  SHA512 hash;
1620  hash.Update(extsk + 32, 32);
1621  UpdateFromStream(hash, stream);
1622  hash.Final(hashr);
1623  expand256_modm(r, hashr, 64);
1624 
1625  /* R = rB */
1626  ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r);
1627  ge25519_pack(RS, &R);
1628 
1629  // Reset stream for the second digest
1630  stream.clear();
1631  stream.seekg(where);
1632 
1633  /* S = H(R,A,m).. */
1634  ed25519_hram(hram, RS, pk, stream);
1635  expand256_modm(S, hram, 64);
1636 
1637  /* S = H(R,A,m)a */
1638  expand256_modm(a, extsk, 32);
1639  mul256_modm(S, S, a);
1640 
1641  /* S = (r + H(R,A,m)a) */
1642  add256_modm(S, S, r);
1643 
1644  /* S = (r + H(R,A,m)a) mod L */
1645  contract256_modm(RS + 32, S);
1646  return 0;
1647 }
1648 
1649 int
1650 ed25519_sign_CXX(const byte *m, size_t mlen, const byte sk[32], const byte pk[32], byte RS[64])
1651 {
1652  using namespace CryptoPP::Donna::Ed25519;
1653 
1654  bignum256modm r, S, a;
1655  ALIGN(16) ge25519 R;
1656  hash_512bits extsk, hashr, hram;
1657 
1658  ed25519_extsk(extsk, sk);
1659 
1660  /* r = H(aExt[32..64], m) */
1661  SHA512 hash;
1662  hash.Update(extsk + 32, 32);
1663  hash.Update(m, mlen);
1664  hash.Final(hashr);
1665  expand256_modm(r, hashr, 64);
1666 
1667  /* R = rB */
1668  ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r);
1669  ge25519_pack(RS, &R);
1670 
1671  /* S = H(R,A,m).. */
1672  ed25519_hram(hram, RS, pk, m, mlen);
1673  expand256_modm(S, hram, 64);
1674 
1675  /* S = H(R,A,m)a */
1676  expand256_modm(a, extsk, 32);
1677  mul256_modm(S, S, a);
1678 
1679  /* S = (r + H(R,A,m)a) */
1680  add256_modm(S, S, r);
1681 
1682  /* S = (r + H(R,A,m)a) mod L */
1683  contract256_modm(RS + 32, S);
1684  return 0;
1685 }
1686 
1687 int
1688 ed25519_sign(std::istream& stream, const byte secretKey[32], const byte publicKey[32],
1689  byte signature[64])
1690 {
1691  return ed25519_sign_CXX(stream, secretKey, publicKey, signature);
1692 }
1693 
1694 int
1695 ed25519_sign(const byte* message, size_t messageLength, const byte secretKey[32],
1696  const byte publicKey[32], byte signature[64])
1697 {
1698  return ed25519_sign_CXX(message, messageLength, secretKey, publicKey, signature);
1699 }
1700 
1701 int
1702 ed25519_sign_open_CXX(const byte *m, size_t mlen, const byte pk[32], const byte RS[64]) {
1703 
1704  using namespace CryptoPP::Donna::Ed25519;
1705 
1706  ALIGN(16) ge25519 R, A;
1707  hash_512bits hash;
1708  bignum256modm hram, S;
1709  byte checkR[32];
1710 
1711  if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk))
1712  return -1;
1713 
1714  /* hram = H(R,A,m) */
1715  ed25519_hram(hash, RS, pk, m, mlen);
1716  expand256_modm(hram, hash, 64);
1717 
1718  /* S */
1719  expand256_modm(S, RS + 32, 32);
1720 
1721  /* SB - H(R,A,m)A */
1722  ge25519_double_scalarmult_vartime(&R, &A, hram, S);
1723  ge25519_pack(checkR, &R);
1724 
1725  /* check that R = SB - H(R,A,m)A */
1726  return ed25519_verify(RS, checkR, 32) ? 0 : -1;
1727 }
1728 
1729 int
1730 ed25519_sign_open_CXX(std::istream& stream, const byte pk[32], const byte RS[64]) {
1731 
1732  using namespace CryptoPP::Donna::Ed25519;
1733 
1734  ALIGN(16) ge25519 R, A;
1735  hash_512bits hash;
1736  bignum256modm hram, S;
1737  byte checkR[32];
1738 
1739  if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk))
1740  return -1;
1741 
1742  /* hram = H(R,A,m) */
1743  ed25519_hram(hash, RS, pk, stream);
1744  expand256_modm(hram, hash, 64);
1745 
1746  /* S */
1747  expand256_modm(S, RS + 32, 32);
1748 
1749  /* SB - H(R,A,m)A */
1750  ge25519_double_scalarmult_vartime(&R, &A, hram, S);
1751  ge25519_pack(checkR, &R);
1752 
1753  /* check that R = SB - H(R,A,m)A */
1754  return ed25519_verify(RS, checkR, 32) ? 0 : -1;
1755 }
1756 
1757 int
1758 ed25519_sign_open(std::istream& stream, const byte publicKey[32], const byte signature[64])
1759 {
1760  return ed25519_sign_open_CXX(stream, publicKey, signature);
1761 }
1762 
1763 int
1764 ed25519_sign_open(const byte *message, size_t messageLength, const byte publicKey[32], const byte signature[64])
1765 {
1766  return ed25519_sign_open_CXX(message, messageLength, publicKey, signature);
1767 }
1768 
1769 NAMESPACE_END // Donna
1770 NAMESPACE_END // CryptoPP
1771 
1772 #endif // CRYPTOPP_CURVE25519_64BIT
Utility functions for the Crypto++ library.
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2428
Converts an enumeration to a type suitable for use as a template parameter.
Definition: cryptlib.h:135
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:150
Library configuration file.
void Update(const byte *input, size_t length)
Updates a hash with additional input.
Definition: iterhash.cpp:13
STL namespace.
SecBlock<byte> typedef.
Definition: secblock.h:1058
byte order is little-endian
Definition: cryptlib.h:145
Classes and functions for secure memory allocations.
T GetWord(bool assumeAligned, ByteOrder order, const byte *block)
Access a block of memory.
Definition: misc.h:2386
int ed25519_sign(const byte *message, size_t messageLength, const byte secretKey[32], const byte publicKey[32], byte signature[64])
Creates a signature on a message.
int curve25519_mult(byte publicKey[32], const byte secretKey[32])
Generate a public key.
SHA-512 message digest.
Definition: sha.h:141
Precompiled header file.
int ed25519_sign_open(const byte *message, size_t messageLength, const byte publicKey[32], const byte signature[64])
Verifies a signature on a message.
Fixed size stack-based SecBlock.
Definition: secblock.h:1077
Functions for CPU features and intrinsics.
Classes for SHA-1 and SHA-2 family of message digests.
virtual void CalculateDigest(byte *digest, const byte *input, size_t length)
Updates the hash with additional input and computes the hash of the current message.
Definition: cryptlib.h:1160
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:116
Interface for hash functions and data processing part of MACs.
Definition: cryptlib.h:1084
Access a block of memory.
Definition: misc.h:2454
Crypto++ library namespace.
int ed25519_publickey(byte publicKey[32], const byte secretKey[32])
Creates a public key from a secret key.
virtual void Final(byte *digest)
Computes the hash of the current message.
Definition: cryptlib.h:1114
virtual void Update(const byte *input, size_t length)=0
Updates a hash with additional input.