pd.if.org Git - zpackage/blob - crypto/libeddsa/lib/ed.c

   1 /*
   2  * Implementing twisted edwards curve
   3  *      E : -x^2 + y^2 = 1 - (121665/121666) x^2 y^2
   4  * over GF(2^255-19) according to [1].
   5  *
   6  * This code is public domain.
   7  *
   8  * Philipp Lay <philipp.lay@illunis.net>.
   9  *
  10  * References:
  11  * [1] High-speed high-security signatures, 2011/09/26,
  12  *     Bernstein, Duif, Lange, Schwabe, Yang
  13  * [2] Twisted Edwards Curves Revisited, 2008,
  14  *     Hisil, Wong, Carter, Dawson
  15  */
  16
  17 #include <stdint.h>
  18 #include <string.h>
  19
  20 #include "bitness.h"
  21 #include "fld.h"
  22 #include "sc.h"
  23 #include "ed.h"
  24
  25
  26 /*
  27  * special pre-computed form of a point on the curve, used
  28  * for the lookup table and some optimizations.
  29  */
  30 struct pced {
  31         fld_t           diff;           /* y - x */
  32         fld_t           sum;            /* y + x */
  33         fld_t           prod;           /* 2*d*t */
  34 };
  35
  36
  37
  38 #ifdef USE_64BIT
  39
  40 /* lookup-table for ed_scale_base - 64bit version */
  41 static const struct pced ed_lookup[][8] = {
  42   #include "ed_lookup64.h"
  43 };
  44
  45 /* base point B of our group in pre-computed form */
  46 const struct pced pced_B = {
  47         {62697248952638, 204681361388450, 631292143396476,
  48          338455783676468, 1213667448819585},
  49         {1288382639258501, 245678601348599, 269427782077623,
  50          1462984067271730, 137412439391563},
  51         {301289933810280, 1259582250014073, 1422107436869536,
  52          796239922652654, 1953934009299142} };
  53
  54 #else
  55
  56 /* lookup-table for ed_scale_base - 32bit version */
  57 static const struct pced ed_lookup[][8] = {
  58   #include "ed_lookup32.h"
  59 };
  60
  61 /* base point B of our group in pre-computed form */
  62 const struct pced pced_B = {
  63         {54563134, 934261, 64385954, 3049989, 66381436,
  64          9406985, 12720692, 5043384, 19500929, 18085054},
  65         {25967493, 19198397, 29566455, 3660896, 54414519,
  66          4014786, 27544626, 21800161, 61029707, 2047604},
  67         {58370664, 4489569, 9688441, 18769238, 10184608,
  68          21191052, 29287918, 11864899, 42594502, 29115885} };
  69
  70 #endif
  71
  72 const struct ed ed_zero = { { 0 }, { 1 }, { 0 }, { 1 } };
  73 const struct pced pced_zero = { { 1 }, { 1 }, { 0 } };
  74
  75
  76 /*
  77  * memselect - helper function to select one of two buffers in a
  78  * time-constant way.
  79  */
  80 static void
  81 memselect(void *out, const void *pa, const void *pb, size_t max, int flag)
  82 {
  83         uint8_t *a = (uint8_t*)pa;
  84         uint8_t *b = (uint8_t*)pb;
  85         uint8_t *p = (uint8_t*)out;
  86         uint8_t *endp = p+max;
  87
  88         uint8_t mB = (flag & 1) - 1;
  89         uint8_t mA = ~mB;
  90
  91         while (p < endp)
  92                 *p++ = (mA & *a++) ^ (mB & *b++);
  93 }
  94
  95
  96 /*
  97  * ed_import - import a point P on the curve from packet 256bit encoding.
  98  *
  99  */
 100 void
 101 ed_import(struct ed *P, const uint8_t in[32])
 102 {
 103         fld_t U, V, A, B;
 104         uint8_t tmp[32];
 105         int flag;
 106
 107         /* import y */
 108         memcpy(tmp, in, 32);
 109         tmp[31] &= 0x7f;
 110
 111         fld_import(P->y, tmp);
 112
 113
 114         /* U <- y^2 - 1,  V <- d*y^2 + 1 */
 115         fld_sq(U, P->y);
 116         fld_mul(V, con_d, U);
 117         U[0]--;
 118         V[0]++;
 119
 120         /* A <- v^2 */
 121         fld_sq(A, V);
 122         /* B <- v^4 */
 123         fld_sq(B, A);
 124         /* A <- uv^3 */
 125         fld_mul(A, A, U);
 126         fld_mul(A, A, V);
 127         /* B <- (uv^7)^((q-5)/8) */
 128         fld_mul(B, B, A);
 129         fld_pow2523(B, B);
 130         /* B <- uv^3 * (uv^7)^((q-5)/8) */
 131         fld_mul(B, B, A);
 132
 133         /* A <- v * B^2 */
 134         fld_sq(A, B);
 135         fld_mul(A, A, V);
 136         /* flag <- v*B^2 == u */
 137         flag = fld_eq(A, U);
 138
 139         /* A <- j * B */
 140         fld_mul(A, con_j, B);
 141
 142         memselect(P->x, B, A, sizeof(fld_t), flag);
 143         fld_reduce(P->x, P->x);
 144         fld_tinyscale(P->x, P->x, 1 - 2*((in[31] >> 7) ^ (P->x[0] & 1)));
 145
 146         /* compute t and z */
 147         fld_mul(P->t, P->x, P->y);
 148         fld_set0(P->z, 1);
 149 }
 150
 151
 152 /*
 153  * ed_export - export point P to packed 256bit format.
 154  */
 155 void
 156 ed_export(uint8_t out[32], const struct ed *P)
 157 {
 158         fld_t x, y, zinv;
 159
 160         /* divide x and y by z to get affine coordinates */
 161         fld_inv(zinv, P->z);
 162         fld_mul(x, P->x, zinv);
 163         fld_mul(y, P->y, zinv);
 164         fld_export(out, y);
 165
 166         /* lsb decides the sign of x */
 167         fld_reduce(x, x);
 168         out[31] |= (x[0] & 1) << 7;
 169 }
 170
 171
 172 /*
 173  * ed_add - add points P and Q
 174  */
 175 static void
 176 ed_add(struct ed *out, const struct ed *P, const struct ed *Q)
 177 {
 178         fld_t a, b, c, d, e, f, g, h, t;
 179
 180         fld_sub(a, P->y, P->x);
 181         fld_sub(t, Q->y, Q->x);
 182         fld_mul(a, a, t);
 183
 184         fld_add(b, P->y, P->x);
 185         fld_add(t, Q->y, Q->x);
 186         fld_mul(b, b, t);
 187
 188         fld_mul(c, P->t, Q->t);
 189         fld_mul(c, c, con_2d);
 190
 191         fld_mul(d, P->z, Q->z);
 192         fld_scale2(d, d);
 193
 194         fld_sub(e, b, a);
 195         fld_sub(f, d, c);
 196         fld_add(g, d, c);
 197         fld_add(h, b, a);
 198
 199         fld_mul(out->x, e, f);
 200         fld_mul(out->y, g, h);
 201         fld_mul(out->t, e, h);
 202         fld_mul(out->z, f, g);
 203 }
 204
 205
 206 /*
 207  * ed_double - special case of ed_add for P=Q.
 208  *
 209  * little bit faster, since 4 multiplications are turned into squares.
 210  */
 211 static void
 212 ed_double(struct ed *out, const struct ed *P)
 213 {
 214         fld_t a, b, c, d, e, f, g, h;
 215
 216         fld_sub(a, P->y, P->x);
 217         fld_sq(a, a);
 218
 219         fld_add(b, P->y, P->x);
 220         fld_sq(b, b);
 221
 222         fld_sq(c, P->t);
 223         fld_mul(c, c, con_2d);
 224
 225         fld_sq(d, P->z);
 226         fld_scale2(d, d);
 227
 228         fld_sub(e, b, a);
 229         fld_sub(f, d, c);
 230         fld_add(g, d, c);
 231         fld_add(h, b, a);
 232
 233         fld_mul(out->x, e, f);
 234         fld_mul(out->y, g, h);
 235         fld_mul(out->t, e, h);
 236         fld_mul(out->z, f, g);
 237 }
 238
 239 /*
 240  * ed_sub - subtract two points P and Q.
 241  *
 242  * alternatively we could negate a point (which is cheap) and use
 243  * ed_add, but this is a little bit faster.
 244  */
 245 static void
 246 ed_sub(struct ed *out, const struct ed *P, const struct ed *Q)
 247 {
 248         fld_t a, b, c, d, e, f, g, h, t;
 249
 250         fld_sub(a, P->y, P->x);
 251         fld_add(t, Q->y, Q->x);
 252         fld_mul(a, a, t);
 253
 254         fld_add(b, P->y, P->x);
 255         fld_sub(t, Q->y, Q->x);
 256         fld_mul(b, b, t);
 257
 258         fld_mul(c, P->t, Q->t);
 259         fld_mul(c, c, con_m2d);
 260
 261         fld_mul(d, P->z, Q->z);
 262         fld_scale2(d, d);
 263
 264         fld_sub(e, b, a);
 265         fld_sub(f, d, c);
 266         fld_add(g, d, c);
 267         fld_add(h, b, a);
 268
 269         fld_mul(out->x, e, f);
 270         fld_mul(out->y, g, h);
 271         fld_mul(out->t, e, h);
 272         fld_mul(out->z, f, g);
 273 }
 274
 275
 276 /*
 277  * ed_add_pc - add points P and Q where Q is in precomputed form
 278  *
 279  * the precomputed form is used for the lookup table and as an
 280  * optimization in ed_double_scale.
 281  */
 282 static void
 283 ed_add_pc(struct ed *out, const struct ed *P, const struct pced *Q)
 284 {
 285         fld_t a, b, c, d, e, f, g, h;
 286
 287         fld_sub(a, P->y, P->x);
 288         fld_mul(a, a, Q->diff);
 289
 290         fld_add(b, P->y, P->x);
 291         fld_mul(b, b, Q->sum);
 292
 293         fld_mul(c, P->t, Q->prod);
 294         fld_scale2(d, P->z);
 295
 296         fld_sub(e, b, a);
 297         fld_sub(f, d, c);
 298         fld_add(g, d, c);
 299         fld_add(h, b, a);
 300
 301         fld_mul(out->x, e, f);
 302         fld_mul(out->y, g, h);
 303         fld_mul(out->t, e, h);
 304         fld_mul(out->z, f, g);
 305 }
 306
 307 /*
 308  * ed_sub_pc - subtract P and Q where Q is in precomputed form.
 309  */
 310 static void
 311 ed_sub_pc(struct ed *out, const struct ed *P, const struct pced *Q)
 312 {
 313         fld_t a, b, c, d, e, f, g, h;
 314
 315         fld_sub(a, P->y, P->x);
 316         fld_mul(a, a, Q->sum);
 317
 318         fld_add(b, P->y, P->x);
 319         fld_mul(b, b, Q->diff);
 320
 321         fld_mul(c, P->t, Q->prod);
 322         fld_neg(c, c);
 323
 324         fld_scale2(d, P->z);
 325
 326         fld_sub(e, b, a);
 327         fld_sub(f, d, c);
 328         fld_add(g, d, c);
 329         fld_add(h, b, a);
 330
 331         fld_mul(out->x, e, f);
 332         fld_mul(out->y, g, h);
 333         fld_mul(out->t, e, h);
 334         fld_mul(out->z, f, g);
 335 }
 336
 337
 338 /*
 339  * scale16 - helper function for ed_scale_base, returns x * 16^pow * base
 340  *
 341  * assumes:
 342  *  -8 <= x <= 7
 343  *   0 <= pow <= 62
 344  *   2 | pow
 345  */
 346 static void
 347 scale16(struct pced *out, int pow, int x)
 348 {
 349         struct pced R = { { 0 }, { 0 }, { 0 } };
 350         limb_t mA, mB, mask;
 351         int neg, sgnx, absx;
 352         int i, k;
 353
 354         neg = (x >> 3) & 1;
 355         sgnx = 1 - 2*neg;
 356         absx = sgnx * x;
 357         pow >>= 1;
 358
 359         /* handle abs(x) == 0 */
 360         mask = absx | (absx >> 2);
 361         mask |= mask >> 1;
 362         mask = (mask & 1) - 1;
 363         for (i = 0; i < FLD_LIMB_NUM; i++) {
 364                 R.diff[i] ^= pced_zero.diff[i] & mask;
 365                 R.sum[i] ^= pced_zero.sum[i] & mask;
 366                 R.prod[i] ^= pced_zero.prod[i] & mask;
 367         }
 368
 369
 370         /* go through our table and look for abs(x) */
 371         for (k = 0; k < 8; k++) {
 372                 absx--;
 373                 mask = absx | (absx >> 2);
 374                 mask |= mask >> 1;
 375                 mask = (mask & 1) - 1;
 376                 for (i = 0; i < FLD_LIMB_NUM; i++) {
 377                         R.diff[i] ^= ed_lookup[pow][k].diff[i] & mask;
 378                         R.sum[i] ^= ed_lookup[pow][k].sum[i] & mask;
 379                         R.prod[i] ^= ed_lookup[pow][k].prod[i] & mask;
 380                 }
 381         }
 382
 383         /* conditionally negate R and write to out */
 384         mA = neg-1;
 385         mB = ~mA;
 386         for (i = 0; i < FLD_LIMB_NUM; i++) {
 387                 out->diff[i] = (mA & R.diff[i]) ^ (mB & R.sum[i]);
 388                 out->sum[i]  = (mB & R.diff[i]) ^ (mA & R.sum[i]);
 389                 out->prod[i] = sgnx * R.prod[i];
 390         }
 391 }
 392
 393
 394 /*
 395  * ed_scale_base - calculates x * base
 396  */
 397 void
 398 ed_scale_base(struct ed *out, const sc_t x)
 399 {
 400         struct ed R0, R1;
 401         struct pced P;
 402         sc_t tmp;
 403         uint8_t pack[32];
 404         int r[64];
 405         int i;
 406
 407         /* s <- x + 8 * (16^64 - 1) / 15 */
 408         sc_add(tmp, x, con_off);
 409         sc_export(pack, tmp);
 410         for (i = 0; i < 32; i++) {
 411                 r[2*i] = (pack[i] & 0x0f) - 8;
 412                 r[2*i+1] = (pack[i] >> 4) - 8;
 413         }
 414
 415         /*
 416          * R0 <- r0*B + r2*16^2*B + ... + r62*16^62*B
 417          * R1 <- r1*B + r3*16^2*B + ... + r63*16^62*B
 418          */
 419         memcpy(&R0, &ed_zero, sizeof(struct ed));
 420         memcpy(&R1, &ed_zero, sizeof(struct ed));
 421         for (i = 0; i < 63; i += 2) {
 422                 scale16(&P, i, r[i]);
 423                 ed_add_pc(&R0, &R0, &P);
 424
 425                 scale16(&P, i, r[i+1]);
 426                 ed_add_pc(&R1, &R1, &P);
 427         }
 428
 429         /* R1 <- 16 * R1 */
 430         for (i = 0; i < 4; i++)
 431                 ed_add(&R1, &R1, &R1);
 432
 433         /* out <- R0 + R1 */
 434         ed_add(out, &R0, &R1);
 435 }
 436
 437
 438 /*
 439  * helper function to speed up ed_double_scale
 440  */
 441 static void
 442 ed_precompute(struct pced *R, const struct ed *P)
 443 {
 444         fld_sub(R->diff, P->y, P->x);
 445         fld_add(R->sum, P->y, P->x);
 446         fld_mul(R->prod, P->t, con_2d);
 447 }
 448
 449
 450 /*
 451  * ed_dual_scale - calculates R = x*base + y*Q.  (vartime)
 452  *
 453  * Note: This algorithms does NOT run in constant time! Please use this
 454  * only for public information like in ed25519_verify().
 455  *
 456  * assumes:
 457  *   Q is affine, ie has z = 1
 458  *   x and y must be reduced
 459  */
 460 void
 461 ed_dual_scale(struct ed *R,
 462               const sc_t x,
 463               const sc_t y, const struct ed *Q)
 464 {
 465         struct ed QpB, QmB;
 466         struct pced pcQ;
 467
 468         int ux[SC_BITS+1], uy[SC_BITS+1];
 469         int n, i;
 470
 471         memcpy(R, &ed_zero, sizeof(struct ed));
 472
 473         /* calculate joint sparse form of x and y */
 474         n = sc_jsf(ux, uy, x, y);
 475         if (n == -1)
 476                 return;
 477
 478         /* precompute Q, Q+B and Q-B */
 479         ed_add_pc(&QpB, Q, &pced_B);
 480         ed_sub_pc(&QmB, Q, &pced_B);
 481         ed_precompute(&pcQ, Q);
 482
 483         /* now we calculate R = x * base_point + y * Q using fast shamir method */
 484         for (i = n; ; i--) {
 485                 if (ux[i] == 1) {
 486                         if (uy[i] == 1)
 487                                 ed_add(R, R, &QpB);
 488                         else if (uy[i] == -1)
 489                                 ed_sub(R, R, &QmB);
 490                         else
 491                                 ed_add_pc(R, R, &pced_B);
 492
 493                 } else if (ux[i] == -1) {
 494                         if (uy[i] == 1)
 495                                 ed_add(R, R, &QmB);
 496                         else if (uy[i] == -1)
 497                                 ed_sub(R, R, &QpB);
 498                         else
 499                                 ed_sub_pc(R, R, &pced_B);
 500
 501                 } else {
 502                         if (uy[i] == 1)
 503                                 ed_add_pc(R, R, &pcQ);
 504                         else if (uy[i] == -1)
 505                                 ed_sub_pc(R, R, &pcQ);
 506                 }
 507
 508                 if (i == 0) break;
 509
 510                 ed_double(R, R);
 511         }
 512 }