2 * code for the field GF(2^255-19).
4 * This code is in public domain.
6 * Philipp Lay <philipp.lay@illunis.net>
16 * 64bit implementation
20 * exported field constants
23 /* con_d = - 121665/121666 (mod q) */
25 929955233495203, 466365720129213, 1662059464998953, 2033849074728123,
28 /* con_2d = 2 * con_d (mod q) */
29 const fld_t con_2d = {
30 1859910466990425, 932731440258426, 1072319116312658, 1815898335770999,
33 /* con_m2d = -2 * con_d (mod q) */
34 const fld_t con_m2d = {
35 391889346694804, 1319068373426821, 1179480697372589, 435901477914248,
38 /* con_j^2 = 1 (mod q) */
40 1718705420411056, 234908883556509, 2233514472574048, 2117202627021982,
46 * fld_reduce - returns the smallest non-negative representation of x modulo q
47 * with 0 <= x[i] <= 2^51 - 1 for 0 <= i <= 4.
49 * This function assumes
50 * abs(x[i]) <= 2^63 - 2^12 = 2^12 * (2^51 - 1),
51 * for 0 <= i <= 4 to work properly.
54 fld_reduce(fld_t res, const fld_t x)
56 /* first carry round with offset 19 */
58 res[1] = x[1] + (res[0] >> FLD_LIMB_BITS);
59 res[2] = x[2] + (res[1] >> FLD_LIMB_BITS);
60 res[3] = x[3] + (res[2] >> FLD_LIMB_BITS);
61 res[4] = x[4] + (res[3] >> FLD_LIMB_BITS);
63 /* -2^12 <= (res[4] >> FLD_LIMB_BITS) <= 2^12-1 */
64 res[0] = (res[0] & FLD_LIMB_MASK) + 19*(res[4] >> FLD_LIMB_BITS);
65 res[1] &= FLD_LIMB_MASK;
66 res[2] &= FLD_LIMB_MASK;
67 res[3] &= FLD_LIMB_MASK;
68 res[4] &= FLD_LIMB_MASK;
71 * -19*2^12 <= res[0] <= 2^51-1 + 19*(2^12-1),
73 * 0 <= res[i] <= 2^51-1 for 1 <= i <= 4.
77 res[1] += (res[0] >> FLD_LIMB_BITS);
78 res[2] += (res[1] >> FLD_LIMB_BITS);
79 res[3] += (res[2] >> FLD_LIMB_BITS);
80 res[4] += (res[3] >> FLD_LIMB_BITS);
82 /* -1 <= (res[4] >> FLD_LIMB_BITS) <= 1 */
83 res[0] = (res[0] & FLD_LIMB_MASK) + 19*(res[4] >> FLD_LIMB_BITS);
85 res[1] &= FLD_LIMB_MASK;
86 res[2] &= FLD_LIMB_MASK;
87 res[3] &= FLD_LIMB_MASK;
88 res[4] &= FLD_LIMB_MASK;
90 /* the second round yields
91 * -19 <= res[0] <= 2^51-1 + 19
93 * 0 <= res[i] <= 2^51-1 for 1 <= i <= 4.
97 /* with the offset removed we have
98 * -38 <= res[0] <= 2^51-1
99 * and need a third round to assure that res[0] is non-negative.
101 * please note, that no positive carry is possible at this point.
104 res[1] += (res[0] >> FLD_LIMB_BITS);
105 res[2] += (res[1] >> FLD_LIMB_BITS);
106 res[3] += (res[2] >> FLD_LIMB_BITS);
107 res[4] += (res[3] >> FLD_LIMB_BITS);
109 /* -1 <= (res[4] >> FLD_LIMB_BITS) <= 0, because of non-positive carry */
110 res[0] = (res[0] & FLD_LIMB_MASK) + 19*(res[4] >> FLD_LIMB_BITS);
112 res[1] &= FLD_LIMB_MASK;
113 res[2] &= FLD_LIMB_MASK;
114 res[3] &= FLD_LIMB_MASK;
115 res[4] &= FLD_LIMB_MASK;
118 /* if res[0] was negative before this round we had an negative carry and
120 * 2^51 - 38 - 19 <= res[0] <= 2^51 - 1.
122 * so in any case it is
123 * 0 <= res[0] <= 2^51 - 1
125 * 0 <= res[i] <= 2^51 - 1 for 1 <= i <= 4
128 * moreover res is the smallest non-negative representant of x modulo q.
133 * fld_import - import an 256bit, unsigned, little-endian integer into
134 * our signed 51-bit limb format and reduce modulo 2^255-19.
137 fld_import(fld_t dst, const uint8_t src[32])
143 for (i = 0; i < FLD_LIMB_NUM; i++) {
144 for (;fill < FLD_LIMB_BITS; fill += 8)
145 tmp |= (uint64_t)*src++ << fill;
147 dst[i] = tmp & FLD_LIMB_MASK;
148 tmp >>= FLD_LIMB_BITS;
149 fill -= FLD_LIMB_BITS;
153 /* dst is now reduced and partially carried (first limb may
154 * use 52bits instead of 51).
159 * fld_export - export our internal format to a 256bit, unsigned,
160 * little-endian packed format.
163 fld_export(uint8_t dst[32], const fld_t src)
170 fld_reduce(tmp, src);
172 for (i = 0; i < FLD_LIMB_NUM; i++) {
173 foo |= (uint64_t)tmp[i] << fill;
174 for (fill += FLD_LIMB_BITS; fill >= 8; fill -= 8, foo >>= 8)
181 * fld_scale - multiply e by scalar s and reduce modulo q.
184 fld_scale(fld_t res, const fld_t e, limb_t s)
188 carry = (llimb_t)s*e[0];
189 res[0] = carry & FLD_LIMB_MASK;
191 carry = (carry >> FLD_LIMB_BITS) + (llimb_t)s*e[1];
192 res[1] = carry & FLD_LIMB_MASK;
194 carry = (carry >> FLD_LIMB_BITS) + (llimb_t)s*e[2];
195 res[2] = carry & FLD_LIMB_MASK;
197 carry = (carry >> FLD_LIMB_BITS) + (llimb_t)s*e[3];
198 res[3] = carry & FLD_LIMB_MASK;
200 carry = (carry >> FLD_LIMB_BITS) + (llimb_t)s*e[4];
201 res[4] = carry & FLD_LIMB_MASK;
203 res[0] += 19*(carry >> FLD_LIMB_BITS);
207 * fld_mul - multiply a with b and reduce modulo q.
210 fld_mul(fld_t res, const fld_t a, const fld_t b)
212 limb_t a19_1, a19_2, a19_3, a19_4, tmp;
220 c[0] = (llimb_t)a[0]*b[0] + (llimb_t)a19_1*b[4] + (llimb_t)a19_2*b[3]
221 + (llimb_t)a19_3*b[2] + (llimb_t)a19_4*b[1];
222 c[1] = (llimb_t)a[0]*b[1] + (llimb_t)a[1]*b[0] + (llimb_t)a19_2*b[4]
223 + (llimb_t)a19_3*b[3] + (llimb_t)a19_4*b[2];
224 c[2] = (llimb_t)a[0]*b[2] + (llimb_t)a[1]*b[1] + (llimb_t)a[2]*b[0]
225 + (llimb_t)a19_3*b[4] + (llimb_t)a19_4*b[3];
226 c[3] = (llimb_t)a[0]*b[3] + (llimb_t)a[1]*b[2] + (llimb_t)a[2]*b[1]
227 + (llimb_t)a[3]*b[0] + (llimb_t)a19_4*b[4];
228 c[4] = (llimb_t)a[0]*b[4] + (llimb_t)a[1]*b[3] + (llimb_t)a[2]*b[2]
229 + (llimb_t)a[3]*b[1] + (llimb_t)a[4]*b[0];
232 c[1] += c[0] >> FLD_LIMB_BITS;
233 c[2] += c[1] >> FLD_LIMB_BITS;
234 c[3] += c[2] >> FLD_LIMB_BITS;
235 c[4] += c[3] >> FLD_LIMB_BITS;
237 tmp = (c[0] & FLD_LIMB_MASK) + 19*(c[4] >> FLD_LIMB_BITS);
238 res[1] = (c[1] & FLD_LIMB_MASK) + (tmp >> FLD_LIMB_BITS);
240 res[0] = tmp & FLD_LIMB_MASK;
241 res[2] = c[2] & FLD_LIMB_MASK;
242 res[3] = c[3] & FLD_LIMB_MASK;
243 res[4] = c[4] & FLD_LIMB_MASK;
247 * fld_sq - square x and reduce modulo q.
250 fld_sq(fld_t res, const fld_t x)
252 limb_t x2_1, x2_2, x2_3, x2_4, x19_3, x19_4, tmp;
262 c[0] = (llimb_t)x[0]*x[0] + (llimb_t)x2_1*x19_4 + (llimb_t)x2_2*x19_3;
263 c[1] = (llimb_t)x[0]*x2_1 + (llimb_t)x2_2*x19_4 + (llimb_t)x19_3*x[3];
264 c[2] = (llimb_t)x[0]*x2_2 + (llimb_t)x[1]*x[1] + (llimb_t)x2_3*x19_4;
265 c[3] = (llimb_t)x[0]*x2_3 + (llimb_t)x2_1*x[2] + (llimb_t)x19_4*x[4];
266 c[4] = (llimb_t)x[0]*x2_4 + (llimb_t)x2_1*x[3] + (llimb_t)x[2]*x[2];
268 c[1] += c[0] >> FLD_LIMB_BITS;
269 c[2] += c[1] >> FLD_LIMB_BITS;
270 c[3] += c[2] >> FLD_LIMB_BITS;
271 c[4] += c[3] >> FLD_LIMB_BITS;
273 tmp = (c[0] & FLD_LIMB_MASK) + 19*(c[4] >> FLD_LIMB_BITS);
274 res[1] = (c[1] & FLD_LIMB_MASK) + (tmp >> FLD_LIMB_BITS);
276 res[0] = tmp & FLD_LIMB_MASK;
277 res[2] = c[2] & FLD_LIMB_MASK;
278 res[3] = c[3] & FLD_LIMB_MASK;
279 res[4] = c[4] & FLD_LIMB_MASK;
282 #else /* USE_64BIT */
286 * 32bit implementation
291 * exported field constants
294 /* con_d = - 121665/121666 (mod q) */
295 const fld_t con_d = { 56195235, 13857412, 51736253, 6949390, 114729, 24766616,
296 60832955, 30306712, 48412415, 21499315 };
298 /* con_2d = 2 * con_d (mod q) */
299 const fld_t con_2d = { 45281625, 27714825, 36363642, 13898781, 229458,
300 15978800, 54557047, 27058993, 29715967, 9444199 };
301 /* con_m2d = -2 * con_d (mod q) */
302 const fld_t con_m2d = { 21827220, 5839606, 30745221, 19655650, 66879405,
303 17575631, 12551816, 6495438, 37392896, 24110232 };
304 /* j^2 = 1 (mod q) */
305 const fld_t con_j = { 34513072, 25610706, 9377949, 3500415, 12389472, 33281959,
306 41962654, 31548777, 326685, 11406482 };
310 * macro for doing one carry-reduce round
312 * tmp is being used as carry-register and could be of type
315 * off is normally zero, but could be used to wrap-around values x
316 * with q <= x < 2^255 (see fld_reduce for an example).
318 #define CARRY(dst, src, tmp, off) \
322 (tmp) <<= FLD_LIMB_BITS(0); \
323 for (_ii = 0; _ii < FLD_LIMB_NUM; _ii += 2) { \
324 (tmp) = ((tmp) >> FLD_LIMB_BITS(0)) + (src)[_ii]; \
325 (dst)[_ii] = (tmp) & FLD_LIMB_MASK(1); \
326 (tmp) = ((tmp) >> FLD_LIMB_BITS(1)) + (src)[_ii+1]; \
327 (dst)[_ii+1] = (tmp) & FLD_LIMB_MASK(0); \
329 (dst)[0] += 19*((tmp) >> FLD_LIMB_BITS(0)); \
334 * fld_reduce - returns the smallest non-negative representation of x modulo q
335 * with 0 <= x[i] <= 2^26 - 1 for i % 2 == 0
336 * and 0 <= x[i] <= 2^25 - 1 for i % 2 == 1.
339 * abs(x[i]) <= 2^31 - 2^5 = 32 * (2^26 - 1)
342 fld_reduce(fld_t res, const fld_t x)
346 CARRY(res, x, tmp, 19);
348 * -2^26 - 19*2^5 <= res[0] <= 2^26 - 1 + 19*(2^5 - 1)
350 * 0 <= res[i] <= 2^26-1, i % 2 == 0,
351 * 0 <= res[i] <= 2^25-1, i % 2 == 1.
354 CARRY(res, res, tmp, 0);
356 * -2^26 - 19 <= res[0] <= 2^26 - 1 + 19
357 * holds and, as before,
358 * 0 <= res[i] <= 2^26-1, i % 2 == 0
359 * 0 <= res[i] <= 2^25-1, i % 2 == 1,
363 /* next round we will first remove our offset resulting in
364 * -2^26 - 38 <= res[0] <= 2^26 - 1,
365 * therefor only a negative carry could appear.
367 CARRY(res, res, tmp, -19);
369 * 0 <= res[i] <= 2^26-1, i % 2 == 0
370 * 0 <= res[i] <= 2^25-1, i % 2 == 1
371 * for all limbs as wished.
373 * if a carry had happend, we even know
374 * 2^26 - 38 - 19 <= res[0] <= 2^26 - 1.
379 * fld_import - import an 256bit, unsigned, little-endian integer into
380 * our internal fld_t format.
383 fld_import(fld_t dst, const uint8_t src[32])
390 for (i = 0; i < FLD_LIMB_NUM; i++) {
391 for (; fill < FLD_LIMB_BITS(d); fill += 8)
392 foo |= (uint32_t)*src++ << fill;
393 dst[i] = foo & FLD_LIMB_MASK(d);
395 foo >>= FLD_LIMB_BITS(d);
396 fill -= FLD_LIMB_BITS(d);
403 * fld_export - export a field element into 256bit little-endian encoded form.
406 fld_export(uint8_t dst[32], const fld_t src)
412 fld_reduce(tmp, src);
414 for (i = 0, fill = 0, foo = 0; i < FLD_LIMB_NUM; i += 2) {
415 foo |= (tmp[i] & FLD_LIMB_MASK(1)) << fill;
416 for (fill += FLD_LIMB_BITS(1); fill >= 8; fill -= 8, foo >>= 8)
419 foo |= (tmp[i+1] & FLD_LIMB_MASK(0)) << fill;
420 for (fill += FLD_LIMB_BITS(0); fill >= 8; fill -= 8, foo >>= 8)
427 * fld_scale - multiply e by scalar s and reduce modulo q.
430 fld_scale(fld_t dst, const fld_t e, limb_t x)
435 for (tmp = 0, i = 0; i < FLD_LIMB_NUM; i += 2) {
436 tmp = (tmp >> FLD_LIMB_BITS(0)) + (llimb_t) x*e[i];
437 dst[i] = tmp & FLD_LIMB_MASK(1);
438 tmp = (tmp >> FLD_LIMB_BITS(1)) + ((llimb_t) x*e[i+1]);
439 dst[i+1] = tmp & FLD_LIMB_MASK(0);
441 dst[0] += 19*(tmp >> FLD_LIMB_BITS(0));
445 * fld_mul - multiply a with b and reduce modulo q.
448 fld_mul(fld_t dst, const fld_t a, const fld_t b)
453 c[0] = (llimb_t)a[0]*b[0];
454 c[1] = (llimb_t)a[0]*b[1] + (llimb_t)a[1]*b[0];
455 c[2] = (llimb_t)a[0]*b[2] + (llimb_t)2*a[1]*b[1] + (llimb_t)a[2]*b[0];
456 c[3] = (llimb_t)a[0]*b[3] + (llimb_t)a[1]*b[2] + (llimb_t)a[2]*b[1]
457 + (llimb_t)a[3]*b[0];
458 c[4] = (llimb_t)a[0]*b[4] + (llimb_t)2*a[1]*b[3] + (llimb_t)a[2]*b[2]
459 + (llimb_t)2*a[3]*b[1] + (llimb_t)a[4]*b[0];
460 c[5] = (llimb_t)a[0]*b[5] + (llimb_t)a[1]*b[4] + (llimb_t)a[2]*b[3]
461 + (llimb_t)a[3]*b[2] + (llimb_t)a[4]*b[1] + (llimb_t)a[5]*b[0];
462 c[6] = (llimb_t)a[0]*b[6] + (llimb_t)2*a[1]*b[5] + (llimb_t)a[2]*b[4]
463 + (llimb_t)2*a[3]*b[3] + (llimb_t)a[4]*b[2] + (llimb_t)2*a[5]*b[1]
464 + (llimb_t)a[6]*b[0];
465 c[7] = (llimb_t)a[0]*b[7] + (llimb_t)a[1]*b[6] + (llimb_t)a[2]*b[5]
466 + (llimb_t)a[3]*b[4] + (llimb_t)a[4]*b[3] + (llimb_t)a[5]*b[2]
467 + (llimb_t)a[6]*b[1] + (llimb_t)a[7]*b[0];
468 c[8] = (llimb_t)a[0]*b[8] + (llimb_t)2*a[1]*b[7] + (llimb_t)a[2]*b[6]
469 + (llimb_t)2*a[3]*b[5] + (llimb_t)a[4]*b[4] + (llimb_t)2*a[5]*b[3]
470 + (llimb_t)a[6]*b[2] + (llimb_t)2*a[7]*b[1] + (llimb_t)a[8]*b[0];
471 c[9] = (llimb_t)a[0]*b[9] + (llimb_t)a[1]*b[8] + (llimb_t)a[2]*b[7]
472 + (llimb_t)a[3]*b[6] + (llimb_t)a[4]*b[5] + (llimb_t)a[5]*b[4]
473 + (llimb_t)a[6]*b[3] + (llimb_t)a[7]*b[2] + (llimb_t)a[8]*b[1]
474 + (llimb_t)a[9]*b[0];
476 c[0] += 19 * ((llimb_t)2*a[1]*b[9] + (llimb_t)a[2]*b[8] + (llimb_t)2*a[3]*b[7]
477 + (llimb_t)a[4]*b[6] + (llimb_t)2*a[5]*b[5] + (llimb_t)a[6]*b[4]
478 + (llimb_t)2*a[7]*b[3] + (llimb_t)a[8]*b[2] + (llimb_t)2*a[9]*b[1]);
479 c[1] += 19 * ((llimb_t)a[2]*b[9] + (llimb_t)a[3]*b[8] + (llimb_t)a[4]*b[7]
480 + (llimb_t)a[5]*b[6] + (llimb_t)a[6]*b[5] + (llimb_t)a[7]*b[4]
481 + (llimb_t)a[8]*b[3] + (llimb_t)a[9]*b[2]);
482 c[2] += 19 * ((llimb_t)2*a[3]*b[9] + (llimb_t)a[4]*b[8] + (llimb_t)2*a[5]*b[7]
483 + (llimb_t)a[6]*b[6] + (llimb_t)2*a[7]*b[5] + (llimb_t)a[8]*b[4]
484 + (llimb_t)2*a[9]*b[3]);
485 c[3] += 19 * ((llimb_t)a[4]*b[9] + (llimb_t)a[5]*b[8] + (llimb_t)a[6]*b[7]
486 + (llimb_t)a[7]*b[6] + (llimb_t)a[8]*b[5] + (llimb_t)a[9]*b[4]);
487 c[4] += 19 * ((llimb_t)2*a[5]*b[9] + (llimb_t)a[6]*b[8] + (llimb_t)2*a[7]*b[7]
488 + (llimb_t)a[8]*b[6] + (llimb_t)2*a[9]*b[5]);
489 c[5] += 19 * ((llimb_t)a[6]*b[9] + (llimb_t)a[7]*b[8] + (llimb_t)a[8]*b[7]
490 + (llimb_t)a[9]*b[6]);
491 c[6] += 19 * ((llimb_t)2*a[7]*b[9] + (llimb_t)a[8]*b[8] + (llimb_t)2*a[9]*b[7]);
492 c[7] += 19 * ((llimb_t)a[8]*b[9] + (llimb_t)a[9]*b[8]);
493 c[8] += 19 * ((llimb_t)2*a[9]*b[9]);
496 CARRY(dst, c, tmp, 0);
500 * fld_sq - square x and reduce modulo q.
503 fld_sq(fld_t dst, const fld_t a)
508 c[0] = (llimb_t)a[0]*a[0];
509 c[1] = (llimb_t)2*a[0]*a[1];
510 c[2] = 2*((llimb_t)a[0]*a[2] + (llimb_t)a[1]*a[1]);
511 c[3] = 2*((llimb_t)a[0]*a[3] + (llimb_t)a[1]*a[2]);
512 c[4] = 2*((llimb_t)a[0]*a[4] + (llimb_t)2*a[1]*a[3]) + (llimb_t)a[2]*a[2];
513 c[5] = 2*((llimb_t)a[0]*a[5] + (llimb_t)a[1]*a[4] + (llimb_t)a[2]*a[3]);
514 c[6] = 2*((llimb_t)a[0]*a[6] + (llimb_t)2*a[1]*a[5] + (llimb_t)a[2]*a[4] + (llimb_t)a[3]*a[3]);
515 c[7] = 2*((llimb_t)a[0]*a[7] + (llimb_t)a[1]*a[6] + (llimb_t)a[2]*a[5] + (llimb_t)a[3]*a[4]);
516 c[8] = 2*((llimb_t)a[0]*a[8] + (llimb_t)2*a[1]*a[7] + (llimb_t)a[2]*a[6] + (llimb_t)2*a[3]*a[5]) + (llimb_t)a[4]*a[4];
517 c[9] = 2*((llimb_t)a[0]*a[9] + (llimb_t)a[1]*a[8] + (llimb_t)a[2]*a[7] + (llimb_t)a[3]*a[6] + (llimb_t)a[4]*a[5]);
519 c[0] += 19*2*((llimb_t)2*a[1]*a[9] + (llimb_t)a[2]*a[8] + (llimb_t)2*a[3]*a[7] + (llimb_t)a[4]*a[6] + (llimb_t)a[5]*a[5]);
520 c[1] += 19*2*((llimb_t)a[2]*a[9] + (llimb_t)a[3]*a[8] + (llimb_t)a[4]*a[7] + (llimb_t)a[5]*a[6]);
521 c[2] += 19*(2*((llimb_t)2*a[3]*a[9] + (llimb_t)a[4]*a[8] + (llimb_t)2*a[5]*a[7]) + (llimb_t)a[6]*a[6]);
522 c[3] += 19*2*((llimb_t)a[4]*a[9] + (llimb_t)a[5]*a[8] + (llimb_t)a[6]*a[7]);
523 c[4] += 19*(2*((llimb_t)2*a[5]*a[9] + (llimb_t)a[6]*a[8] + (llimb_t)a[7]*a[7]));
524 c[5] += 19*2*((llimb_t)a[6]*a[9] + (llimb_t)a[7]*a[8]);
525 c[6] += 19*((llimb_t)2*2*a[7]*a[9] + (llimb_t)a[8]*a[8]);
526 c[7] += 19*2*(llimb_t)a[8]*a[9];
527 c[8] += 19*2*(llimb_t)a[9]*a[9];
530 CARRY(dst, c, tmp, 0);
533 #endif /* USE_64BIT */
542 * fld_eq - compares two field elements in a time-constant manner.
544 * returns 1 if a == b and 0 otherwise.
547 fld_eq(const fld_t a, const fld_t b)
555 fld_reduce(tmp, tmp);
557 /* and check tmp for zero */
559 for (i = 1; i < FLD_LIMB_NUM; i++)
561 for (i = 4*sizeof(limb_t); i > 0; i >>= 1)
564 /* now (res & 1) is zero iff tmp is zero */
572 * fld_inv - inverts z modulo q.
574 * this code is taken from nacl. it works by taking z to the q-2
575 * power. by lagrange's theorem (aka 'fermat's little theorem' in this
576 * special case) this gives us z^-1 modulo q.
579 fld_inv(fld_t res, const fld_t z)
593 /* 2 */ fld_sq(z2, z);
594 /* 4 */ fld_sq(t1, z2);
595 /* 8 */ fld_sq(t0, t1);
596 /* 9 */ fld_mul(z9,t0, z);
597 /* 11 */ fld_mul(z11, z9, z2);
598 /* 22 */ fld_sq(t0, z11);
599 /* 2^5 - 2^0 = 31 */ fld_mul(z2_5_0, t0, z9);
601 /* 2^6 - 2^1 */ fld_sq(t0, z2_5_0);
602 /* 2^7 - 2^2 */ fld_sq(t1, t0);
603 /* 2^8 - 2^3 */ fld_sq(t0, t1);
604 /* 2^9 - 2^4 */ fld_sq(t1, t0);
605 /* 2^10 - 2^5 */ fld_sq(t0, t1);
606 /* 2^10 - 2^0 */ fld_mul(z2_10_0, t0, z2_5_0);
608 /* 2^11 - 2^1 */ fld_sq(t0, z2_10_0);
609 /* 2^12 - 2^2 */ fld_sq(t1, t0);
610 /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { fld_sq(t0, t1); fld_sq(t1, t0); }
611 /* 2^20 - 2^0 */ fld_mul(z2_20_0, t1, z2_10_0);
613 /* 2^21 - 2^1 */ fld_sq(t0, z2_20_0);
614 /* 2^22 - 2^2 */ fld_sq(t1, t0);
615 /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fld_sq(t0, t1); fld_sq(t1, t0); }
616 /* 2^40 - 2^0 */ fld_mul(t0, t1, z2_20_0);
618 /* 2^41 - 2^1 */ fld_sq(t1, t0);
619 /* 2^42 - 2^2 */ fld_sq(t0, t1);
620 /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { fld_sq(t1, t0); fld_sq(t0, t1); }
621 /* 2^50 - 2^0 */ fld_mul(z2_50_0, t0, z2_10_0);
623 /* 2^51 - 2^1 */ fld_sq(t0, z2_50_0);
624 /* 2^52 - 2^2 */ fld_sq(t1, t0);
625 /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { fld_sq(t0, t1); fld_sq(t1, t0); }
626 /* 2^100 - 2^0 */ fld_mul(z2_100_0, t1, z2_50_0);
628 /* 2^101 - 2^1 */ fld_sq(t1, z2_100_0);
629 /* 2^102 - 2^2 */ fld_sq(t0, t1);
630 /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { fld_sq(t1, t0); fld_sq(t0, t1); }
631 /* 2^200 - 2^0 */ fld_mul(t1, t0, z2_100_0);
633 /* 2^201 - 2^1 */ fld_sq(t0, t1);
634 /* 2^202 - 2^2 */ fld_sq(t1, t0);
635 /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { fld_sq(t0, t1); fld_sq(t1, t0); }
636 /* 2^250 - 2^0 */ fld_mul(t0, t1, z2_50_0);
638 /* 2^251 - 2^1 */ fld_sq(t1, t0);
639 /* 2^252 - 2^2 */ fld_sq(t0, t1);
641 /* 2^253 - 2^3 */ fld_sq(t1, t0);
642 /* 2^254 - 2^4 */ fld_sq(t0, t1);
643 /* 2^255 - 2^5 */ fld_sq(t1, t0);
644 /* 2^255 - 21 */ fld_mul(res, t1, z11);
649 * fld_pow2523 - compute z^((q-5)/8) modulo q, ie (z*res)^2 is either z
652 * this function is used to mix a square-root modulo q with an invertation in
653 * ed_import. see the ed25519 paper for an explanation.
655 * this code is, like fld_inv, taken from nacl.
658 fld_pow2523(fld_t res, const fld_t z)
670 /* 2 */ fld_sq(z2, z);
671 /* 4 */ fld_sq(t, z2);
672 /* 8 */ fld_sq(t, t);
673 /* 9 */ fld_mul(z9, t, z);
674 /* 11 */ fld_mul(t, z9, z2);
675 /* 22 */ fld_sq(t, t);
676 /* 2^5 - 2^0 = 31 */ fld_mul(z2_5_0, t, z9);
678 /* 2^6 - 2^1 */ fld_sq(t, z2_5_0);
679 /* 2^10 - 2^5 */ for (i = 1;i < 5;i++) { fld_sq(t, t); }
680 /* 2^10 - 2^0 */ fld_mul(z2_10_0, t, z2_5_0);
682 /* 2^11 - 2^1 */ fld_sq(t, z2_10_0);
683 /* 2^20 - 2^10 */ for (i = 1;i < 10;i++) { fld_sq(t, t); }
684 /* 2^20 - 2^0 */ fld_mul(z2_20_0, t, z2_10_0);
686 /* 2^21 - 2^1 */ fld_sq(t, z2_20_0);
687 /* 2^40 - 2^20 */ for (i = 1;i < 20;i++) { fld_sq(t, t); }
688 /* 2^40 - 2^0 */ fld_mul(t, t, z2_20_0);
690 /* 2^41 - 2^1 */ fld_sq(t, t);
691 /* 2^50 - 2^10 */ for (i = 1;i < 10;i++) { fld_sq(t, t); }
692 /* 2^50 - 2^0 */ fld_mul(z2_50_0, t, z2_10_0);
694 /* 2^51 - 2^1 */ fld_sq(t, z2_50_0);
695 /* 2^100 - 2^50 */ for (i = 1;i < 50;i++) { fld_sq(t, t); }
696 /* 2^100 - 2^0 */ fld_mul(z2_100_0, t, z2_50_0);
698 /* 2^101 - 2^1 */ fld_sq(t, z2_100_0);
699 /* 2^200 - 2^100 */ for (i = 1;i < 100;i++) { fld_sq(t, t); }
700 /* 2^200 - 2^0 */ fld_mul(t, t, z2_100_0);
702 /* 2^201 - 2^1 */ fld_sq(t, t);
703 /* 2^250 - 2^50 */ for (i = 1;i < 50;i++) { fld_sq(t, t); }
704 /* 2^250 - 2^0 */ fld_mul(t, t, z2_50_0);
706 /* 2^251 - 2^1 */ fld_sq(t, t);
707 /* 2^252 - 2^2 */ fld_sq(t, t);
708 /* 2^252 - 3 */ fld_mul(res, t, z);