7 typedef unsigned char u8;
8 typedef unsigned int u32;
10 typedef struct chacha_ctx chacha_ctx;
13 #define U32C(v) (v##U)
15 #define U8V(v) ((u8)(v) & U8C(0xFF))
16 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
18 #define ROTL32(v, n) \
19 (U32V((v) << (n)) | ((v) >> (32 - (n))))
21 #define U8TO32_LITTLE(p) \
23 ((u32)((p)[1]) << 8) | \
24 ((u32)((p)[2]) << 16) | \
25 ((u32)((p)[3]) << 24))
27 #define U32TO8_LITTLE(p, v) \
30 (p)[1] = U8V((v) >> 8); \
31 (p)[2] = U8V((v) >> 16); \
32 (p)[3] = U8V((v) >> 24); \
35 #define ROTATE(v,c) (ROTL32(v,c))
36 #define XOR(v,w) ((v) ^ (w))
37 #define PLUS(v,w) (U32V((v) + (w)))
38 #define PLUSONE(v) (PLUS((v),1))
40 #define QUARTERROUND(a,b,c,d) \
41 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
42 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
43 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
44 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
46 static const char sigma[] = "expand 32-byte k";
47 static const char tau[] = "expand 16-byte k";
49 void chacha_keysetup(chacha_ctx * x, const u8 * k, u32 kbits) {
50 const char *constants;
52 x->input[4] = U8TO32_LITTLE(k + 0);
53 x->input[5] = U8TO32_LITTLE(k + 4);
54 x->input[6] = U8TO32_LITTLE(k + 8);
55 x->input[7] = U8TO32_LITTLE(k + 12);
56 if (kbits == 256) { /* recommended */
59 } else { /* kbits == 128 */
62 x->input[8] = U8TO32_LITTLE(k + 0);
63 x->input[9] = U8TO32_LITTLE(k + 4);
64 x->input[10] = U8TO32_LITTLE(k + 8);
65 x->input[11] = U8TO32_LITTLE(k + 12);
66 x->input[0] = U8TO32_LITTLE(constants + 0);
67 x->input[1] = U8TO32_LITTLE(constants + 4);
68 x->input[2] = U8TO32_LITTLE(constants + 8);
69 x->input[3] = U8TO32_LITTLE(constants + 12);
72 void chacha_key(chacha_ctx * x, u8 * k) {
73 U32TO8_LITTLE(k, x->input[4]);
74 U32TO8_LITTLE(k + 4, x->input[5]);
75 U32TO8_LITTLE(k + 8, x->input[6]);
76 U32TO8_LITTLE(k + 12, x->input[7]);
78 U32TO8_LITTLE(k + 16, x->input[8]);
79 U32TO8_LITTLE(k + 20, x->input[9]);
80 U32TO8_LITTLE(k + 24, x->input[10]);
81 U32TO8_LITTLE(k + 28, x->input[11]);
84 void chacha_nonce(chacha_ctx * x, u8 * nonce) {
85 U32TO8_LITTLE(nonce + 0, x->input[13]);
86 U32TO8_LITTLE(nonce + 4, x->input[14]);
87 U32TO8_LITTLE(nonce + 8, x->input[15]);
90 void chacha_ivsetup(chacha_ctx * x, const u8 * iv, const u8 * counter) {
92 counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
94 counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
96 x->input[14] = U8TO32_LITTLE(iv + 0);
97 x->input[15] = U8TO32_LITTLE(iv + 4);
101 void chacha_ivsetup_96bitnonce(chacha_ctx * x, const u8 * iv,
102 const u8 * counter) {
104 counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
106 x->input[13] = U8TO32_LITTLE(iv + 0);
107 x->input[14] = U8TO32_LITTLE(iv + 4);
108 x->input[15] = U8TO32_LITTLE(iv + 8);
112 void chacha_ivupdate(chacha_ctx * x, const uint8_t * iv,
113 const uint8_t * aad, const uint8_t * counter) {
115 counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
116 x->input[13] = U8TO32_LITTLE(iv + 0);
123 8) ^ U8TO32_LITTLE(aad
128 void chacha_encrypt_bytes(chacha_ctx * x, const u8 * m, u8 * c, u32 bytes) {
130 u32 x0, x1, x2, x3, x4, x5, x6, x7;
131 u32 x8, x9, x10, x11, x12, x13, x14, x15;
132 u32 j0, j1, j2, j3, j4, j5, j6, j7;
133 u32 j8, j9, j10, j11, j12, j13, j14, j15;
160 for (i = 0; i < bytes; ++i) {
183 for (i = 20; i > 0; i -= 2) {
184 QUARTERROUND(x0, x4, x8, x12)
185 QUARTERROUND(x1, x5, x9, x13)
186 QUARTERROUND(x2, x6, x10, x14)
187 QUARTERROUND(x3, x7, x11, x15)
188 QUARTERROUND(x0, x5, x10, x15)
189 QUARTERROUND(x1, x6, x11, x12)
190 QUARTERROUND(x2, x7, x8, x13)
191 QUARTERROUND(x3, x4, x9, x14)
203 x10 = PLUS(x10, j10);
204 x11 = PLUS(x11, j11);
205 x12 = PLUS(x12, j12);
206 x13 = PLUS(x13, j13);
207 x14 = PLUS(x14, j14);
208 x15 = PLUS(x15, j15);
211 U32TO8_LITTLE(x->ks + 0, x0);
212 U32TO8_LITTLE(x->ks + 4, x1);
213 U32TO8_LITTLE(x->ks + 8, x2);
214 U32TO8_LITTLE(x->ks + 12, x3);
215 U32TO8_LITTLE(x->ks + 16, x4);
216 U32TO8_LITTLE(x->ks + 20, x5);
217 U32TO8_LITTLE(x->ks + 24, x6);
218 U32TO8_LITTLE(x->ks + 28, x7);
219 U32TO8_LITTLE(x->ks + 32, x8);
220 U32TO8_LITTLE(x->ks + 36, x9);
221 U32TO8_LITTLE(x->ks + 40, x10);
222 U32TO8_LITTLE(x->ks + 44, x11);
223 U32TO8_LITTLE(x->ks + 48, x12);
224 U32TO8_LITTLE(x->ks + 52, x13);
225 U32TO8_LITTLE(x->ks + 56, x14);
226 U32TO8_LITTLE(x->ks + 60, x15);
229 x0 = XOR(x0, U8TO32_LITTLE(m + 0));
230 x1 = XOR(x1, U8TO32_LITTLE(m + 4));
231 x2 = XOR(x2, U8TO32_LITTLE(m + 8));
232 x3 = XOR(x3, U8TO32_LITTLE(m + 12));
233 x4 = XOR(x4, U8TO32_LITTLE(m + 16));
234 x5 = XOR(x5, U8TO32_LITTLE(m + 20));
235 x6 = XOR(x6, U8TO32_LITTLE(m + 24));
236 x7 = XOR(x7, U8TO32_LITTLE(m + 28));
237 x8 = XOR(x8, U8TO32_LITTLE(m + 32));
238 x9 = XOR(x9, U8TO32_LITTLE(m + 36));
239 x10 = XOR(x10, U8TO32_LITTLE(m + 40));
240 x11 = XOR(x11, U8TO32_LITTLE(m + 44));
241 x12 = XOR(x12, U8TO32_LITTLE(m + 48));
242 x13 = XOR(x13, U8TO32_LITTLE(m + 52));
243 x14 = XOR(x14, U8TO32_LITTLE(m + 56));
244 x15 = XOR(x15, U8TO32_LITTLE(m + 60));
250 * Stopping at 2^70 bytes per nonce is the user's
255 U32TO8_LITTLE(c + 0, x0);
256 U32TO8_LITTLE(c + 4, x1);
257 U32TO8_LITTLE(c + 8, x2);
258 U32TO8_LITTLE(c + 12, x3);
259 U32TO8_LITTLE(c + 16, x4);
260 U32TO8_LITTLE(c + 20, x5);
261 U32TO8_LITTLE(c + 24, x6);
262 U32TO8_LITTLE(c + 28, x7);
263 U32TO8_LITTLE(c + 32, x8);
264 U32TO8_LITTLE(c + 36, x9);
265 U32TO8_LITTLE(c + 40, x10);
266 U32TO8_LITTLE(c + 44, x11);
267 U32TO8_LITTLE(c + 48, x12);
268 U32TO8_LITTLE(c + 52, x13);
269 U32TO8_LITTLE(c + 56, x14);
270 U32TO8_LITTLE(c + 60, x15);
274 for (i = 0; i < bytes; ++i)
279 x->unused = 64 - bytes;
288 void chacha20_block(chacha_ctx * x, unsigned char *c, int len) {
291 unsigned int state[16];
292 for (i = 0; i < 16; i++)
293 state[i] = x->input[i];
294 for (i = 20; i > 0; i -= 2) {
295 QUARTERROUND(state[0], state[4], state[8], state[12])
296 QUARTERROUND(state[1], state[5], state[9], state[13])
297 QUARTERROUND(state[2], state[6], state[10], state[14])
298 QUARTERROUND(state[3], state[7], state[11], state[15])
299 QUARTERROUND(state[0], state[5], state[10], state[15])
300 QUARTERROUND(state[1], state[6], state[11], state[12])
301 QUARTERROUND(state[2], state[7], state[8], state[13])
302 QUARTERROUND(state[3], state[4], state[9], state[14])
305 for (i = 0; i < 16; i++)
306 x->input[i] = PLUS(x->input[i], state[i]);
308 for (i = 0; i < len; i += 4) {
309 U32TO8_LITTLE(c + i, x->input[i / 4]);
313 int poly1305_generate_key(unsigned char *key256, unsigned char *nonce,
314 unsigned int noncelen, unsigned char *poly_key,
315 unsigned int counter) {
316 struct chacha_ctx ctx;
318 memset(&ctx, 0, sizeof(ctx));
319 chacha_keysetup(&ctx, key256, 256);
323 chacha_ivsetup(&ctx, nonce, (unsigned char *) &ctr);
326 chacha_ivsetup_96bitnonce(&ctx, nonce,
327 (unsigned char *) &counter);
332 chacha20_block(&ctx, poly_key, POLY1305_KEYLEN);
336 /* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in
338 static unsigned long U8TO32(const unsigned char *p) {
340 (((unsigned long) (p[0] & 0xff)) |
341 ((unsigned long) (p[1] & 0xff) << 8) |
342 ((unsigned long) (p[2] & 0xff) << 16) |
343 ((unsigned long) (p[3] & 0xff) << 24));
346 /* store a 32 bit unsigned integer as four 8 bit unsigned integers in little
348 static void U32TO8(unsigned char *p, unsigned long v) {
350 p[1] = (v >> 8) & 0xff;
351 p[2] = (v >> 16) & 0xff;
352 p[3] = (v >> 24) & 0xff;
355 void tls_poly1305_init(struct poly1305_context *ctx,
356 const unsigned char key[32]) {
357 struct poly1305_context *st = (struct poly1305_context *) ctx;
359 /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
360 st->r[0] = (U8TO32(&key[0])) & 0x3ffffff;
361 st->r[1] = (U8TO32(&key[3]) >> 2) & 0x3ffff03;
362 st->r[2] = (U8TO32(&key[6]) >> 4) & 0x3ffc0ff;
363 st->r[3] = (U8TO32(&key[9]) >> 6) & 0x3f03fff;
364 st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
373 /* save pad for later */
374 st->pad[0] = U8TO32(&key[16]);
375 st->pad[1] = U8TO32(&key[20]);
376 st->pad[2] = U8TO32(&key[24]);
377 st->pad[3] = U8TO32(&key[28]);
383 void tls_poly1305_blocks(struct poly1305_context *st, const unsigned char *m,
385 const unsigned long hibit = (st->final) ? 0 : (1UL << 24); /* 1 << 128 */
386 unsigned long r0, r1, r2, r3, r4;
387 unsigned long s1, s2, s3, s4;
388 unsigned long h0, h1, h2, h3, h4;
389 unsigned long long d0, d1, d2, d3, d4;
409 while (bytes >= POLY1305_BLOCK_SIZE) {
411 h0 += (U8TO32(m + 0)) & 0x3ffffff;
412 h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff;
413 h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff;
414 h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff;
415 h4 += (U8TO32(m + 12) >> 8) | hibit;
418 d0 = ((unsigned long long) h0 * r0) +
419 ((unsigned long long) h1 * s4) +
420 ((unsigned long long) h2 * s3) +
421 ((unsigned long long) h3 * s2) +
422 ((unsigned long long) h4 * s1);
423 d1 = ((unsigned long long) h0 * r1) +
424 ((unsigned long long) h1 * r0) +
425 ((unsigned long long) h2 * s4) +
426 ((unsigned long long) h3 * s3) +
427 ((unsigned long long) h4 * s2);
428 d2 = ((unsigned long long) h0 * r2) +
429 ((unsigned long long) h1 * r1) +
430 ((unsigned long long) h2 * r0) +
431 ((unsigned long long) h3 * s4) +
432 ((unsigned long long) h4 * s3);
433 d3 = ((unsigned long long) h0 * r3) +
434 ((unsigned long long) h1 * r2) +
435 ((unsigned long long) h2 * r1) +
436 ((unsigned long long) h3 * r0) +
437 ((unsigned long long) h4 * s4);
438 d4 = ((unsigned long long) h0 * r4) +
439 ((unsigned long long) h1 * r3) +
440 ((unsigned long long) h2 * r2) +
441 ((unsigned long long) h3 * r1) +
442 ((unsigned long long) h4 * r0);
444 /* (partial) h %= p */
445 c = (unsigned long) (d0 >> 26);
446 h0 = (unsigned long) d0 & 0x3ffffff;
448 c = (unsigned long) (d1 >> 26);
449 h1 = (unsigned long) d1 & 0x3ffffff;
451 c = (unsigned long) (d2 >> 26);
452 h2 = (unsigned long) d2 & 0x3ffffff;
454 c = (unsigned long) (d3 >> 26);
455 h3 = (unsigned long) d3 & 0x3ffffff;
457 c = (unsigned long) (d4 >> 26);
458 h4 = (unsigned long) d4 & 0x3ffffff;
464 m += POLY1305_BLOCK_SIZE;
465 bytes -= POLY1305_BLOCK_SIZE;
475 void tls_poly1305_finish(struct poly1305_context *ctx, unsigned char mac[16]) {
477 struct poly1305_context *st = (struct poly1305_context *) ctx;
478 unsigned long h0, h1, h2, h3, h4, c;
479 unsigned long g0, g1, g2, g3, g4;
480 unsigned long long f;
483 /* process the remaining block */
487 for (; i < POLY1305_BLOCK_SIZE; i++)
490 tls_poly1305_blocks(st, st->buffer, POLY1305_BLOCK_SIZE);
529 g4 = h4 + c - (1UL << 26);
531 /* select h if h < p, or h + -p if h >= p */
532 mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
539 h0 = (h0 & mask) | g0;
540 h1 = (h1 & mask) | g1;
541 h2 = (h2 & mask) | g2;
542 h3 = (h3 & mask) | g3;
543 h4 = (h4 & mask) | g4;
545 /* h = h % (2^128) */
546 h0 = ((h0) | (h1 << 26)) & 0xffffffff;
547 h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
548 h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
549 h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
551 /* mac = (h + pad) % (2^128) */
552 f = (unsigned long long) h0 + st->pad[0];
553 h0 = (unsigned long) f;
554 f = (unsigned long long) h1 + st->pad[1] + (f >> 32);
555 h1 = (unsigned long) f;
556 f = (unsigned long long) h2 + st->pad[2] + (f >> 32);
557 h2 = (unsigned long) f;
558 f = (unsigned long long) h3 + st->pad[3] + (f >> 32);
559 h3 = (unsigned long) f;
564 U32TO8(mac + 12, h3);
566 /* zero out the state */
583 void tls_poly1305_update(struct poly1305_context *ctx, const unsigned char *m,
585 struct poly1305_context *st = (struct poly1305_context *) ctx;
587 /* handle leftover */
589 size_t want = (POLY1305_BLOCK_SIZE - st->leftover);
592 for (i = 0; i < want; i++)
593 st->buffer[st->leftover + i] = m[i];
596 st->leftover += want;
597 if (st->leftover < POLY1305_BLOCK_SIZE)
599 tls_poly1305_blocks(st, st->buffer, POLY1305_BLOCK_SIZE);
603 /* process full blocks */
604 if (bytes >= POLY1305_BLOCK_SIZE) {
605 size_t want = (bytes & ~(POLY1305_BLOCK_SIZE - 1));
606 tls_poly1305_blocks(st, m, want);
613 for (i = 0; i < bytes; i++)
614 st->buffer[st->leftover + i] = m[i];
615 st->leftover += bytes;
619 int poly1305_verify(const unsigned char mac1[16],
620 const unsigned char mac2[16]) {
622 unsigned int dif = 0;
623 for (i = 0; i < 16; i++)
624 dif |= (mac1[i] ^ mac2[i]);
625 dif = (dif - 1) >> ((sizeof(unsigned int) * 8) - 1);
629 void chacha20_poly1305_key(struct chacha_ctx *ctx,
630 unsigned char *poly1305_key) {
631 unsigned char key[32];
632 unsigned char nonce[12];
633 chacha_key(ctx, key);
634 chacha_nonce(ctx, nonce);
635 poly1305_generate_key(key, nonce, sizeof(nonce), poly1305_key, 0);
638 static unsigned char zeropad[] =
639 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
641 int chacha20_poly1305_aead(struct chacha_ctx *ctx, unsigned char *pt,
642 unsigned int len, unsigned char *aad,
643 unsigned int aad_len, unsigned char *poly_key,
644 unsigned char *out) {
645 if (aad_len > POLY1305_MAX_AAD) {
649 unsigned int counter = 1;
650 chacha_ivsetup_96bitnonce(ctx, NULL, (unsigned char *) &counter);
651 chacha_encrypt_bytes(ctx, pt, out, len);
653 struct poly1305_context aead_ctx;
654 tls_poly1305_init(&aead_ctx, poly_key);
655 tls_poly1305_update(&aead_ctx, aad, aad_len);
656 int rem = aad_len % 16;
658 tls_poly1305_update(&aead_ctx, zeropad, 16 - rem);
660 tls_poly1305_update(&aead_ctx, out, len);
663 tls_poly1305_update(&aead_ctx, zeropad, 16 - rem);
666 unsigned char trail[16];
667 U32TO8(&trail[0], aad_len);
668 *(int *) &trail[4] = 0;
669 U32TO8(&trail[8], len);
670 *(int *) &trail[12] = 0;
672 tls_poly1305_update(&aead_ctx, trail, 16);
673 tls_poly1305_finish(&aead_ctx, out + len);
675 return len + POLY1305_TAGLEN;