2 * Implementation of the ring Z/mZ for
3 * m := 2^252 + 27742317777372353535851937790883648493.
5 * We use this ring (which is actually a field, but we are not
6 * interested in dividing here) as scalar ring for our base point
7 * B. Since B has order m the operation of Z/mZ on { xB | x in Z } is
10 * This code is public domain.
12 * Philipp Lay <philipp.lay@illunis.net>.
23 #define MSK SC_LIMB_MASK
24 #define LB SC_LIMB_BITS
30 static const limb_t con_m[K+1] = {
31 671914833335277, 3916664325105025, 1367801, 0, 17592186044416, 0 };
33 /* mu = floor(b^(2*k) / m) */
34 static const limb_t con_mu[K+1] = {
35 1586638968003385, 147551898491342, 4503509987107165, 4503599627370495,
36 4503599627370495, 255 };
39 /* off = 8 * (16^64 - 1) / 15 mod m */
40 const sc_t con_off = { 1530200761952544, 2593802592017535, 2401919790321849,
41 2401919801264264, 9382499223688 };
45 static const limb_t con_m[K+1] = {
46 16110573, 10012311, 30238081, 58362846, 1367801, 0, 0, 0, 0,
49 static const limb_t con_mu[K+1] = {
50 1252153, 23642763, 41867726, 2198694, 17178973, 67107528, 67108863,
51 67108863, 67108863, 67108863, 255 };
53 /* off = 8 * (16^64 - 1) / 15 mod m */
54 const sc_t con_off = { 14280992, 22801768, 35478655, 38650670, 65114297,
55 35791393, 8947848, 35791394, 8947848, 139810 };
63 * sc_barrett - reduce x modulo m using barrett reduction (HAC 14.42):
64 * with the notation of (14.42) we use k = K limbs and b = 2^LB as
67 * NOTE: x must be carried and non negative.
69 * as long as x <= b^(k-1) * (b^(k+1) - mu), res will be fully
70 * reduced. this is normally true since we have (for our choices of k
72 * x < m^2 < b^(k-1) * (b^(k+1) - mu)
73 * if x is the result of a multiplication x = a * b with a, b < m.
75 * in the case of b^(k-1) * (b^(k+1) - mu) < x < b^(2k) the caller must
76 * conditionally subtract m from the result.
80 sc_barrett(sc_t res, const lsc_t x)
83 limb_t q[K+1], r[K+1];
89 * step 1: q <- floor( floor(x/b^(k-1)) * mu / b^(k+1) )
92 /* calculate carry from the (k-1)-th and k-th position of floor(x/b^(k-1))*mu */
94 for (i = 0; i <= K-1; i++)
95 carry += (llimb_t)x[K-1+i] * con_mu[K-1-i];
97 for (i = 0; i <= K; i++)
98 carry += (llimb_t)x[K-1+i] * con_mu[K-i];
101 for (j = K+1; j <= 2*K; j++) {
103 for (i = j-K; i <= K; i++)
104 carry += (llimb_t)x[K-1+i] * con_mu[j-i];
106 q[j-K-1] = carry & MSK;
108 q[j-K-1] = carry >> LB;
112 * step 2: r <- (x - q * m) mod b^(k+1)
115 /* r <- q*m mod b^(k+1) */
116 for (j = 0, carry = 0; j <= K; j++) {
119 for (i = 0; i <= j; i++)
120 carry += (llimb_t) q[i]*con_m[j-i];
125 /* r <- x - r mod b^(k+1) */
126 for (i = 0, carry = 0; i <= K; i++) {
127 carry = (carry >> LB) + x[i] - r[i];
133 * step 3: if (r < 0) r += b^(k+1);
136 /* done by ignoring the coefficient of b^(k+1) (= carry >> LB)
137 * after the last loop above.
141 * step 4: if (r > m) r -= m;
143 q[0] = r[0] - con_m[0];
144 for (i = 1; i <= K; i++) {
145 q[i] = (q[i-1] >> LB) + r[i] - con_m[i];
149 mask = ~(q[K] >> (8*sizeof(limb_t)-1));
150 for (i = 0; i <= K; i++)
151 r[i] ^= (r[i] ^ q[i]) & mask;
154 * step 5: copy out and clean up
156 for (i = 0; i < K; i++)
162 * sc_reduce - completely carry and reduce element e.
165 sc_reduce(sc_t dst, const sc_t e)
172 for (carry = 0, i = 0; i < K; i++) {
173 carry = (carry >> LB) + e[i];
174 tmp[i] = carry & MSK;
176 tmp[K] = carry >> LB;
177 for (i = K+1; i < 2*K; i++)
180 /* reduce modulo m */
181 sc_barrett(dst, tmp);
185 * sc_import - import packed 256bit/512bit little-endian encoded integer
186 * to our internal sc_t format.
192 sc_import(sc_t dst, const uint8_t *src, size_t len)
194 const uint8_t *endp = src + len;
201 for (i = 0; i < 2*K; i++) {
202 while (src < endp && fill < LB) {
203 foo |= (uint64_t)*src++ << fill;
213 sc_barrett(dst, tmp);
218 * sc_export - export internal sc_t format to an unsigned, 256bit
219 * little-endian integer.
222 sc_export(uint8_t dst[32], const sc_t x)
224 const uint8_t *endp = dst+32;
231 for (i = 0, foo = 0, fill = 0; i < K; i++) {
232 foo |= (uint64_t)tmp[i] << fill;
233 for (fill += LB; fill >= 8 && dst < endp; fill -= 8, foo >>= 8)
239 * sc_mul - multiply a with b and reduce modulo m
242 sc_mul(sc_t res, const sc_t a, const sc_t b)
250 for (k = 0; k < K; k++) {
252 for (i = 0; i <= k; i++)
253 carry += (llimb_t) a[i] * b[k-i];
254 tmp[k] = carry & MSK;
257 for (k = K; k < 2*K-1; k++) {
259 for (i = k-K+1; i <= K-1; i++)
260 carry += (llimb_t) a[i] * b[k-i];
261 tmp[k] = carry & MSK;
263 tmp[k] = carry >>= LB;
265 sc_barrett(res, tmp);
270 * jsfdigit - helper for sc_jsf (vartime)
273 jsfdigit(unsigned int a, unsigned int b)
275 int u = 2 - (a & 0x03);
278 if ( ((a & 0x07) == 3 || (a & 0x07) == 5) && (b & 0x03) == 2 )
285 * sc_jsf - calculate joint sparse form of a and b. (vartime)
288 * a and b carried and reduced
290 * NOTE: this function runs in variable time and due to the nature of
291 * the optimization JSF is needed for (see ed_dual_scale), there is
292 * no point in creating a constant-time version.
294 * returns the highest index k >= 0 with max(u0[k], u1[k]) != 0
295 * or -1 in case u0 and u1 are all zero.
298 sc_jsf(int u0[SC_BITS+1], int u1[SC_BITS+1], const sc_t a, const sc_t b)
305 for (i = 0; i < K; i++) {
309 for (j = 0; j < LB; j++, k++) {
310 u0[k] = jsfdigit(n0, n1);
311 u1[k] = jsfdigit(n1, n0);
313 n0 = (n0 - u0[k]) >> 1;
314 n1 = (n1 - u1[k]) >> 1;
317 u0[k] = jsfdigit(n0, n1);
318 u1[k] = jsfdigit(n1, n0);
320 while (k >= 0 && u0[k] == 0 && u1[k] == 0)