X-Git-Url: https://pd.if.org/git/?a=blobdiff_plain;f=crypto%2Flibeddsa%2Flib%2Fed.c;fp=crypto%2Flibeddsa%2Flib%2Fed.c;h=9a1e5eee76735afc2b8843e519ce205be7c6595f;hb=7bfbc0423ba40ea5156e06c8fb62bacd5ea93390;hp=0000000000000000000000000000000000000000;hpb=2ae349f5ed63b026cff763b35984dd36b330870a;p=zpackage diff --git a/crypto/libeddsa/lib/ed.c b/crypto/libeddsa/lib/ed.c new file mode 100644 index 0000000..9a1e5ee --- /dev/null +++ b/crypto/libeddsa/lib/ed.c @@ -0,0 +1,512 @@ +/* + * Implementing twisted edwards curve + * E : -x^2 + y^2 = 1 - (121665/121666) x^2 y^2 + * over GF(2^255-19) according to [1]. + * + * This code is public domain. + * + * Philipp Lay . + * + * References: + * [1] High-speed high-security signatures, 2011/09/26, + * Bernstein, Duif, Lange, Schwabe, Yang + * [2] Twisted Edwards Curves Revisited, 2008, + * Hisil, Wong, Carter, Dawson + */ + +#include +#include + +#include "bitness.h" +#include "fld.h" +#include "sc.h" +#include "ed.h" + + +/* + * special pre-computed form of a point on the curve, used + * for the lookup table and some optimizations. + */ +struct pced { + fld_t diff; /* y - x */ + fld_t sum; /* y + x */ + fld_t prod; /* 2*d*t */ +}; + + + +#ifdef USE_64BIT + +/* lookup-table for ed_scale_base - 64bit version */ +static const struct pced ed_lookup[][8] = { + #include "ed_lookup64.h" +}; + +/* base point B of our group in pre-computed form */ +const struct pced pced_B = { + {62697248952638, 204681361388450, 631292143396476, + 338455783676468, 1213667448819585}, + {1288382639258501, 245678601348599, 269427782077623, + 1462984067271730, 137412439391563}, + {301289933810280, 1259582250014073, 1422107436869536, + 796239922652654, 1953934009299142} }; + +#else + +/* lookup-table for ed_scale_base - 32bit version */ +static const struct pced ed_lookup[][8] = { + #include "ed_lookup32.h" +}; + +/* base point B of our group in pre-computed form */ +const struct pced pced_B = { + {54563134, 934261, 64385954, 3049989, 66381436, + 9406985, 12720692, 5043384, 19500929, 18085054}, + {25967493, 19198397, 29566455, 3660896, 54414519, + 4014786, 27544626, 21800161, 61029707, 2047604}, + {58370664, 4489569, 9688441, 18769238, 10184608, + 21191052, 29287918, 11864899, 42594502, 29115885} }; + +#endif + +const struct ed ed_zero = { { 0 }, { 1 }, { 0 }, { 1 } }; +const struct pced pced_zero = { { 1 }, { 1 }, { 0 } }; + + +/* + * memselect - helper function to select one of two buffers in a + * time-constant way. + */ +static void +memselect(void *out, const void *pa, const void *pb, size_t max, int flag) +{ + uint8_t *a = (uint8_t*)pa; + uint8_t *b = (uint8_t*)pb; + uint8_t *p = (uint8_t*)out; + uint8_t *endp = p+max; + + uint8_t mB = (flag & 1) - 1; + uint8_t mA = ~mB; + + while (p < endp) + *p++ = (mA & *a++) ^ (mB & *b++); +} + + +/* + * ed_import - import a point P on the curve from packet 256bit encoding. + * + */ +void +ed_import(struct ed *P, const uint8_t in[32]) +{ + fld_t U, V, A, B; + uint8_t tmp[32]; + int flag; + + /* import y */ + memcpy(tmp, in, 32); + tmp[31] &= 0x7f; + + fld_import(P->y, tmp); + + + /* U <- y^2 - 1, V <- d*y^2 + 1 */ + fld_sq(U, P->y); + fld_mul(V, con_d, U); + U[0]--; + V[0]++; + + /* A <- v^2 */ + fld_sq(A, V); + /* B <- v^4 */ + fld_sq(B, A); + /* A <- uv^3 */ + fld_mul(A, A, U); + fld_mul(A, A, V); + /* B <- (uv^7)^((q-5)/8) */ + fld_mul(B, B, A); + fld_pow2523(B, B); + /* B <- uv^3 * (uv^7)^((q-5)/8) */ + fld_mul(B, B, A); + + /* A <- v * B^2 */ + fld_sq(A, B); + fld_mul(A, A, V); + /* flag <- v*B^2 == u */ + flag = fld_eq(A, U); + + /* A <- j * B */ + fld_mul(A, con_j, B); + + memselect(P->x, B, A, sizeof(fld_t), flag); + fld_reduce(P->x, P->x); + fld_tinyscale(P->x, P->x, 1 - 2*((in[31] >> 7) ^ (P->x[0] & 1))); + + /* compute t and z */ + fld_mul(P->t, P->x, P->y); + fld_set0(P->z, 1); +} + + +/* + * ed_export - export point P to packed 256bit format. + */ +void +ed_export(uint8_t out[32], const struct ed *P) +{ + fld_t x, y, zinv; + + /* divide x and y by z to get affine coordinates */ + fld_inv(zinv, P->z); + fld_mul(x, P->x, zinv); + fld_mul(y, P->y, zinv); + fld_export(out, y); + + /* lsb decides the sign of x */ + fld_reduce(x, x); + out[31] |= (x[0] & 1) << 7; +} + + +/* + * ed_add - add points P and Q + */ +static void +ed_add(struct ed *out, const struct ed *P, const struct ed *Q) +{ + fld_t a, b, c, d, e, f, g, h, t; + + fld_sub(a, P->y, P->x); + fld_sub(t, Q->y, Q->x); + fld_mul(a, a, t); + + fld_add(b, P->y, P->x); + fld_add(t, Q->y, Q->x); + fld_mul(b, b, t); + + fld_mul(c, P->t, Q->t); + fld_mul(c, c, con_2d); + + fld_mul(d, P->z, Q->z); + fld_scale2(d, d); + + fld_sub(e, b, a); + fld_sub(f, d, c); + fld_add(g, d, c); + fld_add(h, b, a); + + fld_mul(out->x, e, f); + fld_mul(out->y, g, h); + fld_mul(out->t, e, h); + fld_mul(out->z, f, g); +} + + +/* + * ed_double - special case of ed_add for P=Q. + * + * little bit faster, since 4 multiplications are turned into squares. + */ +static void +ed_double(struct ed *out, const struct ed *P) +{ + fld_t a, b, c, d, e, f, g, h; + + fld_sub(a, P->y, P->x); + fld_sq(a, a); + + fld_add(b, P->y, P->x); + fld_sq(b, b); + + fld_sq(c, P->t); + fld_mul(c, c, con_2d); + + fld_sq(d, P->z); + fld_scale2(d, d); + + fld_sub(e, b, a); + fld_sub(f, d, c); + fld_add(g, d, c); + fld_add(h, b, a); + + fld_mul(out->x, e, f); + fld_mul(out->y, g, h); + fld_mul(out->t, e, h); + fld_mul(out->z, f, g); +} + +/* + * ed_sub - subtract two points P and Q. + * + * alternatively we could negate a point (which is cheap) and use + * ed_add, but this is a little bit faster. + */ +static void +ed_sub(struct ed *out, const struct ed *P, const struct ed *Q) +{ + fld_t a, b, c, d, e, f, g, h, t; + + fld_sub(a, P->y, P->x); + fld_add(t, Q->y, Q->x); + fld_mul(a, a, t); + + fld_add(b, P->y, P->x); + fld_sub(t, Q->y, Q->x); + fld_mul(b, b, t); + + fld_mul(c, P->t, Q->t); + fld_mul(c, c, con_m2d); + + fld_mul(d, P->z, Q->z); + fld_scale2(d, d); + + fld_sub(e, b, a); + fld_sub(f, d, c); + fld_add(g, d, c); + fld_add(h, b, a); + + fld_mul(out->x, e, f); + fld_mul(out->y, g, h); + fld_mul(out->t, e, h); + fld_mul(out->z, f, g); +} + + +/* + * ed_add_pc - add points P and Q where Q is in precomputed form + * + * the precomputed form is used for the lookup table and as an + * optimization in ed_double_scale. + */ +static void +ed_add_pc(struct ed *out, const struct ed *P, const struct pced *Q) +{ + fld_t a, b, c, d, e, f, g, h; + + fld_sub(a, P->y, P->x); + fld_mul(a, a, Q->diff); + + fld_add(b, P->y, P->x); + fld_mul(b, b, Q->sum); + + fld_mul(c, P->t, Q->prod); + fld_scale2(d, P->z); + + fld_sub(e, b, a); + fld_sub(f, d, c); + fld_add(g, d, c); + fld_add(h, b, a); + + fld_mul(out->x, e, f); + fld_mul(out->y, g, h); + fld_mul(out->t, e, h); + fld_mul(out->z, f, g); +} + +/* + * ed_sub_pc - subtract P and Q where Q is in precomputed form. + */ +static void +ed_sub_pc(struct ed *out, const struct ed *P, const struct pced *Q) +{ + fld_t a, b, c, d, e, f, g, h; + + fld_sub(a, P->y, P->x); + fld_mul(a, a, Q->sum); + + fld_add(b, P->y, P->x); + fld_mul(b, b, Q->diff); + + fld_mul(c, P->t, Q->prod); + fld_neg(c, c); + + fld_scale2(d, P->z); + + fld_sub(e, b, a); + fld_sub(f, d, c); + fld_add(g, d, c); + fld_add(h, b, a); + + fld_mul(out->x, e, f); + fld_mul(out->y, g, h); + fld_mul(out->t, e, h); + fld_mul(out->z, f, g); +} + + +/* + * scale16 - helper function for ed_scale_base, returns x * 16^pow * base + * + * assumes: + * -8 <= x <= 7 + * 0 <= pow <= 62 + * 2 | pow + */ +static void +scale16(struct pced *out, int pow, int x) +{ + struct pced R = { { 0 }, { 0 }, { 0 } }; + limb_t mA, mB, mask; + int neg, sgnx, absx; + int i, k; + + neg = (x >> 3) & 1; + sgnx = 1 - 2*neg; + absx = sgnx * x; + pow >>= 1; + + /* handle abs(x) == 0 */ + mask = absx | (absx >> 2); + mask |= mask >> 1; + mask = (mask & 1) - 1; + for (i = 0; i < FLD_LIMB_NUM; i++) { + R.diff[i] ^= pced_zero.diff[i] & mask; + R.sum[i] ^= pced_zero.sum[i] & mask; + R.prod[i] ^= pced_zero.prod[i] & mask; + } + + + /* go through our table and look for abs(x) */ + for (k = 0; k < 8; k++) { + absx--; + mask = absx | (absx >> 2); + mask |= mask >> 1; + mask = (mask & 1) - 1; + for (i = 0; i < FLD_LIMB_NUM; i++) { + R.diff[i] ^= ed_lookup[pow][k].diff[i] & mask; + R.sum[i] ^= ed_lookup[pow][k].sum[i] & mask; + R.prod[i] ^= ed_lookup[pow][k].prod[i] & mask; + } + } + + /* conditionally negate R and write to out */ + mA = neg-1; + mB = ~mA; + for (i = 0; i < FLD_LIMB_NUM; i++) { + out->diff[i] = (mA & R.diff[i]) ^ (mB & R.sum[i]); + out->sum[i] = (mB & R.diff[i]) ^ (mA & R.sum[i]); + out->prod[i] = sgnx * R.prod[i]; + } +} + + +/* + * ed_scale_base - calculates x * base + */ +void +ed_scale_base(struct ed *out, const sc_t x) +{ + struct ed R0, R1; + struct pced P; + sc_t tmp; + uint8_t pack[32]; + int r[64]; + int i; + + /* s <- x + 8 * (16^64 - 1) / 15 */ + sc_add(tmp, x, con_off); + sc_export(pack, tmp); + for (i = 0; i < 32; i++) { + r[2*i] = (pack[i] & 0x0f) - 8; + r[2*i+1] = (pack[i] >> 4) - 8; + } + + /* + * R0 <- r0*B + r2*16^2*B + ... + r62*16^62*B + * R1 <- r1*B + r3*16^2*B + ... + r63*16^62*B + */ + memcpy(&R0, &ed_zero, sizeof(struct ed)); + memcpy(&R1, &ed_zero, sizeof(struct ed)); + for (i = 0; i < 63; i += 2) { + scale16(&P, i, r[i]); + ed_add_pc(&R0, &R0, &P); + + scale16(&P, i, r[i+1]); + ed_add_pc(&R1, &R1, &P); + } + + /* R1 <- 16 * R1 */ + for (i = 0; i < 4; i++) + ed_add(&R1, &R1, &R1); + + /* out <- R0 + R1 */ + ed_add(out, &R0, &R1); +} + + +/* + * helper function to speed up ed_double_scale + */ +static void +ed_precompute(struct pced *R, const struct ed *P) +{ + fld_sub(R->diff, P->y, P->x); + fld_add(R->sum, P->y, P->x); + fld_mul(R->prod, P->t, con_2d); +} + + +/* + * ed_dual_scale - calculates R = x*base + y*Q. (vartime) + * + * Note: This algorithms does NOT run in constant time! Please use this + * only for public information like in ed25519_verify(). + * + * assumes: + * Q is affine, ie has z = 1 + * x and y must be reduced + */ +void +ed_dual_scale(struct ed *R, + const sc_t x, + const sc_t y, const struct ed *Q) +{ + struct ed QpB, QmB; + struct pced pcQ; + + int ux[SC_BITS+1], uy[SC_BITS+1]; + int n, i; + + memcpy(R, &ed_zero, sizeof(struct ed)); + + /* calculate joint sparse form of x and y */ + n = sc_jsf(ux, uy, x, y); + if (n == -1) + return; + + /* precompute Q, Q+B and Q-B */ + ed_add_pc(&QpB, Q, &pced_B); + ed_sub_pc(&QmB, Q, &pced_B); + ed_precompute(&pcQ, Q); + + /* now we calculate R = x * base_point + y * Q using fast shamir method */ + for (i = n; ; i--) { + if (ux[i] == 1) { + if (uy[i] == 1) + ed_add(R, R, &QpB); + else if (uy[i] == -1) + ed_sub(R, R, &QmB); + else + ed_add_pc(R, R, &pced_B); + + } else if (ux[i] == -1) { + if (uy[i] == 1) + ed_add(R, R, &QmB); + else if (uy[i] == -1) + ed_sub(R, R, &QpB); + else + ed_sub_pc(R, R, &pced_B); + + } else { + if (uy[i] == 1) + ed_add_pc(R, R, &pcQ); + else if (uy[i] == -1) + ed_sub_pc(R, R, &pcQ); + } + + if (i == 0) break; + + ed_double(R, R); + } +}