2 * Implementing twisted edwards curve
3 * E : -x^2 + y^2 = 1 - (121665/121666) x^2 y^2
4 * over GF(2^255-19) according to [1].
6 * This code is public domain.
8 * Philipp Lay <philipp.lay@illunis.net>.
11 * [1] High-speed high-security signatures, 2011/09/26,
12 * Bernstein, Duif, Lange, Schwabe, Yang
13 * [2] Twisted Edwards Curves Revisited, 2008,
14 * Hisil, Wong, Carter, Dawson
27 * special pre-computed form of a point on the curve, used
28 * for the lookup table and some optimizations.
31 fld_t diff; /* y - x */
32 fld_t sum; /* y + x */
33 fld_t prod; /* 2*d*t */
40 /* lookup-table for ed_scale_base - 64bit version */
41 static const struct pced ed_lookup[][8] = {
42 #include "ed_lookup64.h"
45 /* base point B of our group in pre-computed form */
46 const struct pced pced_B = {
47 {62697248952638, 204681361388450, 631292143396476,
48 338455783676468, 1213667448819585},
49 {1288382639258501, 245678601348599, 269427782077623,
50 1462984067271730, 137412439391563},
51 {301289933810280, 1259582250014073, 1422107436869536,
52 796239922652654, 1953934009299142} };
56 /* lookup-table for ed_scale_base - 32bit version */
57 static const struct pced ed_lookup[][8] = {
58 #include "ed_lookup32.h"
61 /* base point B of our group in pre-computed form */
62 const struct pced pced_B = {
63 {54563134, 934261, 64385954, 3049989, 66381436,
64 9406985, 12720692, 5043384, 19500929, 18085054},
65 {25967493, 19198397, 29566455, 3660896, 54414519,
66 4014786, 27544626, 21800161, 61029707, 2047604},
67 {58370664, 4489569, 9688441, 18769238, 10184608,
68 21191052, 29287918, 11864899, 42594502, 29115885} };
72 const struct ed ed_zero = { { 0 }, { 1 }, { 0 }, { 1 } };
73 const struct pced pced_zero = { { 1 }, { 1 }, { 0 } };
77 * memselect - helper function to select one of two buffers in a
81 memselect(void *out, const void *pa, const void *pb, size_t max, int flag)
83 uint8_t *a = (uint8_t*)pa;
84 uint8_t *b = (uint8_t*)pb;
85 uint8_t *p = (uint8_t*)out;
86 uint8_t *endp = p+max;
88 uint8_t mB = (flag & 1) - 1;
92 *p++ = (mA & *a++) ^ (mB & *b++);
97 * ed_import - import a point P on the curve from packet 256bit encoding.
101 ed_import(struct ed *P, const uint8_t in[32])
111 fld_import(P->y, tmp);
114 /* U <- y^2 - 1, V <- d*y^2 + 1 */
116 fld_mul(V, con_d, U);
127 /* B <- (uv^7)^((q-5)/8) */
130 /* B <- uv^3 * (uv^7)^((q-5)/8) */
136 /* flag <- v*B^2 == u */
140 fld_mul(A, con_j, B);
142 memselect(P->x, B, A, sizeof(fld_t), flag);
143 fld_reduce(P->x, P->x);
144 fld_tinyscale(P->x, P->x, 1 - 2*((in[31] >> 7) ^ (P->x[0] & 1)));
146 /* compute t and z */
147 fld_mul(P->t, P->x, P->y);
153 * ed_export - export point P to packed 256bit format.
156 ed_export(uint8_t out[32], const struct ed *P)
160 /* divide x and y by z to get affine coordinates */
162 fld_mul(x, P->x, zinv);
163 fld_mul(y, P->y, zinv);
166 /* lsb decides the sign of x */
168 out[31] |= (x[0] & 1) << 7;
173 * ed_add - add points P and Q
176 ed_add(struct ed *out, const struct ed *P, const struct ed *Q)
178 fld_t a, b, c, d, e, f, g, h, t;
180 fld_sub(a, P->y, P->x);
181 fld_sub(t, Q->y, Q->x);
184 fld_add(b, P->y, P->x);
185 fld_add(t, Q->y, Q->x);
188 fld_mul(c, P->t, Q->t);
189 fld_mul(c, c, con_2d);
191 fld_mul(d, P->z, Q->z);
199 fld_mul(out->x, e, f);
200 fld_mul(out->y, g, h);
201 fld_mul(out->t, e, h);
202 fld_mul(out->z, f, g);
207 * ed_double - special case of ed_add for P=Q.
209 * little bit faster, since 4 multiplications are turned into squares.
212 ed_double(struct ed *out, const struct ed *P)
214 fld_t a, b, c, d, e, f, g, h;
216 fld_sub(a, P->y, P->x);
219 fld_add(b, P->y, P->x);
223 fld_mul(c, c, con_2d);
233 fld_mul(out->x, e, f);
234 fld_mul(out->y, g, h);
235 fld_mul(out->t, e, h);
236 fld_mul(out->z, f, g);
240 * ed_sub - subtract two points P and Q.
242 * alternatively we could negate a point (which is cheap) and use
243 * ed_add, but this is a little bit faster.
246 ed_sub(struct ed *out, const struct ed *P, const struct ed *Q)
248 fld_t a, b, c, d, e, f, g, h, t;
250 fld_sub(a, P->y, P->x);
251 fld_add(t, Q->y, Q->x);
254 fld_add(b, P->y, P->x);
255 fld_sub(t, Q->y, Q->x);
258 fld_mul(c, P->t, Q->t);
259 fld_mul(c, c, con_m2d);
261 fld_mul(d, P->z, Q->z);
269 fld_mul(out->x, e, f);
270 fld_mul(out->y, g, h);
271 fld_mul(out->t, e, h);
272 fld_mul(out->z, f, g);
277 * ed_add_pc - add points P and Q where Q is in precomputed form
279 * the precomputed form is used for the lookup table and as an
280 * optimization in ed_double_scale.
283 ed_add_pc(struct ed *out, const struct ed *P, const struct pced *Q)
285 fld_t a, b, c, d, e, f, g, h;
287 fld_sub(a, P->y, P->x);
288 fld_mul(a, a, Q->diff);
290 fld_add(b, P->y, P->x);
291 fld_mul(b, b, Q->sum);
293 fld_mul(c, P->t, Q->prod);
301 fld_mul(out->x, e, f);
302 fld_mul(out->y, g, h);
303 fld_mul(out->t, e, h);
304 fld_mul(out->z, f, g);
308 * ed_sub_pc - subtract P and Q where Q is in precomputed form.
311 ed_sub_pc(struct ed *out, const struct ed *P, const struct pced *Q)
313 fld_t a, b, c, d, e, f, g, h;
315 fld_sub(a, P->y, P->x);
316 fld_mul(a, a, Q->sum);
318 fld_add(b, P->y, P->x);
319 fld_mul(b, b, Q->diff);
321 fld_mul(c, P->t, Q->prod);
331 fld_mul(out->x, e, f);
332 fld_mul(out->y, g, h);
333 fld_mul(out->t, e, h);
334 fld_mul(out->z, f, g);
339 * scale16 - helper function for ed_scale_base, returns x * 16^pow * base
347 scale16(struct pced *out, int pow, int x)
349 struct pced R = { { 0 }, { 0 }, { 0 } };
359 /* handle abs(x) == 0 */
360 mask = absx | (absx >> 2);
362 mask = (mask & 1) - 1;
363 for (i = 0; i < FLD_LIMB_NUM; i++) {
364 R.diff[i] ^= pced_zero.diff[i] & mask;
365 R.sum[i] ^= pced_zero.sum[i] & mask;
366 R.prod[i] ^= pced_zero.prod[i] & mask;
370 /* go through our table and look for abs(x) */
371 for (k = 0; k < 8; k++) {
373 mask = absx | (absx >> 2);
375 mask = (mask & 1) - 1;
376 for (i = 0; i < FLD_LIMB_NUM; i++) {
377 R.diff[i] ^= ed_lookup[pow][k].diff[i] & mask;
378 R.sum[i] ^= ed_lookup[pow][k].sum[i] & mask;
379 R.prod[i] ^= ed_lookup[pow][k].prod[i] & mask;
383 /* conditionally negate R and write to out */
386 for (i = 0; i < FLD_LIMB_NUM; i++) {
387 out->diff[i] = (mA & R.diff[i]) ^ (mB & R.sum[i]);
388 out->sum[i] = (mB & R.diff[i]) ^ (mA & R.sum[i]);
389 out->prod[i] = sgnx * R.prod[i];
395 * ed_scale_base - calculates x * base
398 ed_scale_base(struct ed *out, const sc_t x)
407 /* s <- x + 8 * (16^64 - 1) / 15 */
408 sc_add(tmp, x, con_off);
409 sc_export(pack, tmp);
410 for (i = 0; i < 32; i++) {
411 r[2*i] = (pack[i] & 0x0f) - 8;
412 r[2*i+1] = (pack[i] >> 4) - 8;
416 * R0 <- r0*B + r2*16^2*B + ... + r62*16^62*B
417 * R1 <- r1*B + r3*16^2*B + ... + r63*16^62*B
419 memcpy(&R0, &ed_zero, sizeof(struct ed));
420 memcpy(&R1, &ed_zero, sizeof(struct ed));
421 for (i = 0; i < 63; i += 2) {
422 scale16(&P, i, r[i]);
423 ed_add_pc(&R0, &R0, &P);
425 scale16(&P, i, r[i+1]);
426 ed_add_pc(&R1, &R1, &P);
430 for (i = 0; i < 4; i++)
431 ed_add(&R1, &R1, &R1);
434 ed_add(out, &R0, &R1);
439 * helper function to speed up ed_double_scale
442 ed_precompute(struct pced *R, const struct ed *P)
444 fld_sub(R->diff, P->y, P->x);
445 fld_add(R->sum, P->y, P->x);
446 fld_mul(R->prod, P->t, con_2d);
451 * ed_dual_scale - calculates R = x*base + y*Q. (vartime)
453 * Note: This algorithms does NOT run in constant time! Please use this
454 * only for public information like in ed25519_verify().
457 * Q is affine, ie has z = 1
458 * x and y must be reduced
461 ed_dual_scale(struct ed *R,
463 const sc_t y, const struct ed *Q)
468 int ux[SC_BITS+1], uy[SC_BITS+1];
471 memcpy(R, &ed_zero, sizeof(struct ed));
473 /* calculate joint sparse form of x and y */
474 n = sc_jsf(ux, uy, x, y);
478 /* precompute Q, Q+B and Q-B */
479 ed_add_pc(&QpB, Q, &pced_B);
480 ed_sub_pc(&QmB, Q, &pced_B);
481 ed_precompute(&pcQ, Q);
483 /* now we calculate R = x * base_point + y * Q using fast shamir method */
488 else if (uy[i] == -1)
491 ed_add_pc(R, R, &pced_B);
493 } else if (ux[i] == -1) {
496 else if (uy[i] == -1)
499 ed_sub_pc(R, R, &pced_B);
503 ed_add_pc(R, R, &pcQ);
504 else if (uy[i] == -1)
505 ed_sub_pc(R, R, &pcQ);