pd.if.org Git - zpackage/blob - crypto/libeddsa/lib/sc.c

   1 /*
   2  * Implementation of the ring Z/mZ for
   3  *   m := 2^252 + 27742317777372353535851937790883648493.
   4  *
   5  * We use this ring (which is actually a field, but we are not
   6  * interested in dividing here) as scalar ring for our base point
   7  * B. Since B has order m the operation of Z/mZ on { xB | x in Z } is
   8  * well defined.
   9  *
  10  * This code is public domain.
  11  *
  12  * Philipp Lay <philipp.lay@illunis.net>.
  13  */
  14
  15 #include <stdint.h>
  16
  17 #include "bitness.h"
  18 #include "compat.h"
  19 #include "sc.h"
  20
  21
  22 #define K       SC_LIMB_NUM
  23 #define MSK     SC_LIMB_MASK
  24 #define LB      SC_LIMB_BITS
  25
  26
  27
  28 #ifdef USE_64BIT
  29
  30 static const limb_t con_m[K+1] = {
  31         671914833335277, 3916664325105025, 1367801, 0, 17592186044416, 0 };
  32
  33 /* mu = floor(b^(2*k) / m) */
  34 static const limb_t con_mu[K+1] = {
  35         1586638968003385, 147551898491342, 4503509987107165, 4503599627370495,
  36         4503599627370495, 255 };
  37
  38
  39 /* off = 8 * (16^64 - 1) / 15 mod m */
  40 const sc_t con_off = { 1530200761952544, 2593802592017535, 2401919790321849,
  41                       2401919801264264, 9382499223688 };
  42
  43 #else
  44
  45 static const limb_t con_m[K+1] = {
  46         16110573, 10012311, 30238081, 58362846, 1367801, 0, 0, 0, 0,
  47         262144, 0 };
  48
  49 static const limb_t con_mu[K+1] = {
  50         1252153, 23642763, 41867726, 2198694, 17178973, 67107528, 67108863,
  51         67108863, 67108863, 67108863, 255 };
  52
  53 /* off = 8 * (16^64 - 1) / 15 mod m */
  54 const sc_t con_off = { 14280992, 22801768, 35478655, 38650670, 65114297,
  55                        35791393, 8947848, 35791394, 8947848, 139810 };
  56
  57 #endif
  58
  59
  60
  61
  62 /*
  63  * sc_barrett - reduce x modulo m using barrett reduction (HAC 14.42):
  64  * with the notation of (14.42) we use k = K limbs and b = 2^LB as
  65  * (actual) limb size.
  66  *
  67  * NOTE: x must be carried and non negative.
  68  *
  69  * as long as x <= b^(k-1) * (b^(k+1) - mu), res will be fully
  70  * reduced. this is normally true since we have (for our choices of k
  71  * and b)
  72  *              x < m^2 < b^(k-1) * (b^(k+1) - mu)
  73  * if x is the result of a multiplication x = a * b with a, b < m.
  74  *
  75  * in the case of b^(k-1) * (b^(k+1) - mu) < x < b^(2k) the caller must
  76  * conditionally subtract m from the result.
  77  *
  78  */
  79 static void
  80 sc_barrett(sc_t res, const lsc_t x)
  81 {
  82         llimb_t carry;
  83         limb_t q[K+1], r[K+1];
  84         limb_t mask;
  85
  86         int i, j;
  87
  88         /*
  89          * step 1: q <- floor( floor(x/b^(k-1)) * mu / b^(k+1) )
  90          */
  91
  92         /* calculate carry from the (k-1)-th and k-th position of floor(x/b^(k-1))*mu */
  93         carry = 0;
  94         for (i = 0; i <= K-1; i++)
  95                 carry += (llimb_t)x[K-1+i] * con_mu[K-1-i];
  96         carry >>= LB;
  97         for (i = 0; i <= K; i++)
  98                 carry += (llimb_t)x[K-1+i] * con_mu[K-i];
  99
 100
 101         for (j = K+1; j <= 2*K; j++) {
 102                 carry >>= LB;
 103                 for (i = j-K; i <= K; i++)
 104                         carry += (llimb_t)x[K-1+i] * con_mu[j-i];
 105
 106                 q[j-K-1] = carry & MSK;
 107         }
 108         q[j-K-1] = carry >> LB;
 109
 110
 111         /*
 112          * step 2: r <- (x - q * m) mod b^(k+1)
 113          */
 114
 115         /* r <- q*m mod b^(k+1) */
 116         for (j = 0, carry = 0; j <= K; j++) {
 117                 carry >>= LB;
 118
 119                 for (i = 0; i <= j; i++)
 120                         carry += (llimb_t) q[i]*con_m[j-i];
 121
 122                 r[j] = carry & MSK;
 123         }
 124
 125         /* r <- x - r mod b^(k+1) */
 126         for (i = 0, carry = 0; i <= K; i++) {
 127                 carry = (carry >> LB) + x[i] - r[i];
 128                 r[i] = carry & MSK;
 129         }
 130
 131
 132         /*
 133          * step 3: if (r < 0) r += b^(k+1);
 134          */
 135
 136         /* done by ignoring the coefficient of b^(k+1) (= carry >> LB)
 137          * after the last loop above.
 138          */
 139
 140         /*
 141          * step 4: if (r > m) r -= m;
 142          */
 143         q[0] = r[0] - con_m[0];
 144         for (i = 1; i <= K; i++) {
 145                 q[i] = (q[i-1] >> LB) + r[i] - con_m[i];
 146                 q[i-1] &= MSK;
 147         }
 148
 149         mask = ~(q[K] >> (8*sizeof(limb_t)-1));
 150         for (i = 0; i <= K; i++)
 151                 r[i] ^= (r[i] ^ q[i]) & mask;
 152
 153         /*
 154          * step 5: copy out and clean up
 155          */
 156         for (i = 0; i < K; i++)
 157                 res[i] = r[i];
 158 }
 159
 160
 161 /*
 162  * sc_reduce - completely carry and reduce element e.
 163  */
 164 void
 165 sc_reduce(sc_t dst, const sc_t e)
 166 {
 167         lsc_t tmp;
 168         limb_t carry;
 169         int i;
 170
 171         /* carry e */
 172         for (carry = 0, i = 0; i < K; i++) {
 173                 carry = (carry >> LB) + e[i];
 174                 tmp[i] = carry & MSK;
 175         }
 176         tmp[K] = carry >> LB;
 177         for (i = K+1; i < 2*K; i++)
 178                 tmp[i] = 0;
 179
 180         /* reduce modulo m */
 181         sc_barrett(dst, tmp);
 182 }
 183
 184 /*
 185  * sc_import - import packed 256bit/512bit little-endian encoded integer
 186  * to our internal sc_t format.
 187  *
 188  * assumes:
 189  *   len <= 64
 190  */
 191 void
 192 sc_import(sc_t dst, const uint8_t *src, size_t len)
 193 {
 194         const uint8_t *endp = src + len;
 195         lsc_t tmp;
 196         uint64_t foo;
 197         int i, fill;
 198
 199         fill = 0;
 200         foo = 0;
 201         for (i = 0; i < 2*K; i++) {
 202                 while (src < endp && fill < LB) {
 203                         foo |= (uint64_t)*src++ << fill;
 204                         fill += 8;
 205                 }
 206
 207                 tmp[i] = foo & MSK;
 208
 209                 foo >>= LB;
 210                 fill -= LB;
 211         }
 212
 213         sc_barrett(dst, tmp);
 214 }
 215
 216
 217 /*
 218  * sc_export - export internal sc_t format to an unsigned, 256bit
 219  * little-endian integer.
 220  */
 221 void
 222 sc_export(uint8_t dst[32], const sc_t x)
 223 {
 224         const uint8_t *endp = dst+32;
 225         sc_t tmp;
 226         uint64_t foo;
 227         int fill, i;
 228
 229         sc_reduce(tmp, x);
 230
 231         for (i = 0, foo = 0, fill = 0; i < K; i++) {
 232                 foo |= (uint64_t)tmp[i] << fill;
 233                 for (fill += LB; fill >= 8 && dst < endp; fill -= 8, foo >>= 8)
 234                         *dst++ = foo & 0xff;
 235         }
 236 }
 237
 238 /*
 239  * sc_mul - multiply a with b and reduce modulo m
 240  */
 241 void
 242 sc_mul(sc_t res, const sc_t a, const sc_t b)
 243 {
 244         int i, k;
 245         lsc_t tmp;
 246         llimb_t carry;
 247
 248         carry = 0;
 249
 250         for (k = 0; k < K; k++) {
 251                 carry >>= LB;
 252                 for (i = 0; i <= k; i++)
 253                         carry += (llimb_t) a[i] * b[k-i];
 254                 tmp[k] = carry & MSK;
 255         }
 256
 257         for (k = K; k < 2*K-1; k++) {
 258                 carry >>= LB;
 259                 for (i = k-K+1; i <= K-1; i++)
 260                         carry += (llimb_t) a[i] * b[k-i];
 261                 tmp[k] = carry & MSK;
 262         }
 263         tmp[k] = carry >>= LB;
 264
 265         sc_barrett(res, tmp);
 266 }
 267
 268
 269 /*
 270  * jsfdigit - helper for sc_jsf (vartime)
 271  */
 272 static int
 273 jsfdigit(unsigned int a, unsigned int b)
 274 {
 275         int u = 2 - (a & 0x03);
 276         if (u == 2)
 277                 return 0;
 278         if ( ((a & 0x07) == 3 || (a & 0x07) == 5) && (b & 0x03) == 2 )
 279                 return -u;
 280         return u;
 281 }
 282
 283
 284 /*
 285  * sc_jsf - calculate joint sparse form of a and b. (vartime)
 286  *
 287  * assumes:
 288  *  a and b carried and reduced
 289  *
 290  * NOTE: this function runs in variable time and due to the nature of
 291  * the optimization JSF is needed for (see ed_dual_scale), there is
 292  * no point in creating a constant-time version.
 293  *
 294  * returns the highest index k >= 0 with max(u0[k], u1[k]) != 0
 295  * or -1 in case u0 and u1 are all zero.
 296  */
 297 int
 298 sc_jsf(int u0[SC_BITS+1], int u1[SC_BITS+1], const sc_t a, const sc_t b)
 299 {
 300         limb_t n0, n1;
 301         int i, j, k;
 302
 303         k = n0 = n1 = 0;
 304
 305         for (i = 0; i < K; i++) {
 306                 n0 += a[i];
 307                 n1 += b[i];
 308
 309                 for (j = 0; j < LB; j++, k++) {
 310                         u0[k] = jsfdigit(n0, n1);
 311                         u1[k] = jsfdigit(n1, n0);
 312
 313                         n0 = (n0 - u0[k]) >> 1;
 314                         n1 = (n1 - u1[k]) >> 1;
 315                 }
 316         }
 317         u0[k] = jsfdigit(n0, n1);
 318         u1[k] = jsfdigit(n1, n0);
 319
 320         while (k >= 0 && u0[k] == 0 && u1[k] == 0)
 321                 k--;
 322
 323         return k;
 324 }