1 /* TomsFastMath, a fast ISO C bignum library.
3 * This project is meant to fill in where LibTomMath
4 * falls short. That is speed ;-)
6 * This project is public domain and free for all purposes.
8 * Tom St Denis, tomstdenis@gmail.com
15 #include <tfm_private.h>
17 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
22 /* these are the combas. Worship them. */
24 /* Generic x86 optimized code */
26 /* anything you need at the start */
29 /* clear the chaining variables */
33 /* forward the carry to the next digit */
34 #define COMBA_FORWARD \
35 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
37 /* store the first sum */
38 #define COMBA_STORE(x) \
41 /* store the second sum [carry] */
42 #define COMBA_STORE2(x) \
45 /* anything you need at the end */
48 /* this should multiply i and j */
49 #define MULADD(i, j) \
51 "movl %6,%%eax \n\t" \
53 "addl %%eax,%0 \n\t" \
54 "adcl %%edx,%1 \n\t" \
56 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
58 #elif defined(TFM_X86_64)
59 /* x86-64 optimized */
61 /* anything you need at the start */
64 /* clear the chaining variables */
68 /* forward the carry to the next digit */
69 #define COMBA_FORWARD \
70 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
72 /* store the first sum */
73 #define COMBA_STORE(x) \
76 /* store the second sum [carry] */
77 #define COMBA_STORE2(x) \
80 /* anything you need at the end */
83 /* this should multiply i and j */
84 #define MULADD(i, j) \
86 "movq %6,%%rax \n\t" \
88 "addq %%rax,%0 \n\t" \
89 "adcq %%rdx,%1 \n\t" \
91 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
93 #elif defined(TFM_SSE2)
94 /* use SSE2 optimizations */
96 /* anything you need at the start */
99 /* clear the chaining variables */
100 #define COMBA_CLEAR \
103 /* forward the carry to the next digit */
104 #define COMBA_FORWARD \
105 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
107 /* store the first sum */
108 #define COMBA_STORE(x) \
111 /* store the second sum [carry] */
112 #define COMBA_STORE2(x) \
115 /* anything you need at the end */
119 /* this should multiply i and j */
120 #define MULADD(i, j) \
122 "movd %6,%%mm0 \n\t" \
123 "movd %7,%%mm1 \n\t" \
124 "pmuludq %%mm1,%%mm0\n\t" \
125 "movd %%mm0,%%eax \n\t" \
126 "psrlq $32,%%mm0 \n\t" \
127 "addl %%eax,%0 \n\t" \
128 "movd %%mm0,%%eax \n\t" \
129 "adcl %%eax,%1 \n\t" \
131 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
133 #elif defined(TFM_ARM)
138 #define COMBA_CLEAR \
141 #define COMBA_FORWARD \
142 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
144 #define COMBA_STORE(x) \
147 #define COMBA_STORE2(x) \
152 #define MULADD(i, j) \
154 " UMULL r0,r1,%6,%7 \n\t" \
155 " ADDS %0,%0,r0 \n\t" \
156 " ADCS %1,%1,r1 \n\t" \
157 " ADC %2,%2,#0 \n\t" \
158 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
160 #elif defined(TFM_PPC32)
165 #define COMBA_CLEAR \
168 #define COMBA_FORWARD \
169 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
171 #define COMBA_STORE(x) \
174 #define COMBA_STORE2(x) \
179 /* untested: will mulhwu change the flags? Docs say no */
180 #define MULADD(i, j) \
182 " mullw 16,%6,%7 \n\t" \
183 " addc %0,%0,16 \n\t" \
184 " mulhwu 16,%6,%7 \n\t" \
185 " adde %1,%1,16 \n\t" \
186 " addze %2,%2 \n\t" \
187 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
189 #elif defined(TFM_PPC64)
194 #define COMBA_CLEAR \
197 #define COMBA_FORWARD \
198 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
200 #define COMBA_STORE(x) \
203 #define COMBA_STORE2(x) \
208 /* untested: will mulhdu change the flags? Docs say no */
209 #define MULADD(i, j) \
211 " mulld r16,%6,%7 \n\t" \
212 " addc %0,%0,16 \n\t" \
213 " mulhdu r16,%6,%7 \n\t" \
214 " adde %1,%1,16 \n\t" \
215 " addze %2,%2 \n\t" \
216 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
218 #elif defined(TFM_AVR32)
224 #define COMBA_CLEAR \
227 #define COMBA_FORWARD \
228 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
230 #define COMBA_STORE(x) \
233 #define COMBA_STORE2(x) \
238 #define MULADD(i, j) \
240 " mulu.d r2,%6,%7 \n\t"\
242 " adc %1,%1,r3 \n\t"\
244 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
246 #elif defined(TFM_MIPS)
250 #define COMBA_CLEAR \
253 #define COMBA_FORWARD \
254 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
256 #define COMBA_STORE(x) \
259 #define COMBA_STORE2(x) \
264 #define MULADD(i, j) \
266 " multu %6,%7 \n\t" \
269 " addu %0,%0,$12 \n\t" \
270 " sltu $12,%0,$12 \n\t" \
271 " addu %1,%1,$13 \n\t" \
272 " sltu $13,%1,$13 \n\t" \
273 " addu %1,%1,$12 \n\t" \
274 " sltu $12,%1,$12 \n\t" \
275 " addu %2,%2,$13 \n\t" \
276 " addu %2,%2,$12 \n\t" \
277 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
284 #define COMBA_CLEAR \
287 #define COMBA_FORWARD \
288 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
290 #define COMBA_STORE(x) \
293 #define COMBA_STORE2(x) \
298 #define MULADD(i, j) \
300 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); \
302 t = (fp_word)c1 + (t >> DIGIT_BIT); \
304 c2 += t >> DIGIT_BIT; \
311 /* generic PxQ multiplier */
312 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
314 int ix, iy, iz, tx, ty, pa;
315 fp_digit c0, c1, c2, *tmpx, *tmpy;
321 /* get size of output and trim */
322 pa = A->used + B->used;
327 if (A == C || B == C) {
335 for (ix = 0; ix < pa; ix++) {
336 /* get offsets into the two bignums */
337 ty = MIN(ix, B->used-1);
340 /* setup temp aliases */
344 /* this is the number of times the loop will iterrate, essentially its
345 while (tx++ < a->used && ty-- >= 0) { ... }
347 iy = MIN(A->used-tx, ty+1);
351 for (iz = 0; iz < iy; ++iz) {
352 fp_digit _tmpx = *tmpx++;
353 fp_digit _tmpy = *tmpy--;
354 MULADD(_tmpx, _tmpy);
358 COMBA_STORE(dst->dp[ix]);
363 dst->sign = A->sign ^ B->sign;