Botan  1.10.9
mp_asmi.h
Go to the documentation of this file.
1 /*
2 * Lowest Level MPI Algorithms
3 * (C) 1999-2010 Jack Lloyd
4 * 2006 Luca Piccarreta
5 *
6 * Distributed under the terms of the Botan license
7 */
8 
9 #ifndef BOTAN_MP_ASM_INTERNAL_H__
10 #define BOTAN_MP_ASM_INTERNAL_H__
11 
12 #include <botan/internal/mp_asm.h>
13 
14 namespace Botan {
15 
16 extern "C" {
17 
18 /*
19 * Word Addition
20 */
21 inline word word_add(word x, word y, word* carry)
22  {
23  word z = x + y;
24  word c1 = (z < x);
25  z += *carry;
26  *carry = c1 | (z < *carry);
27  return z;
28  }
29 
30 /*
31 * Eight Word Block Addition, Two Argument
32 */
33 inline word word8_add2(word x[8], const word y[8], word carry)
34  {
35  __asm {
36  mov edx,[x]
37  mov esi,[y]
38  xor eax,eax
39  sub eax,[carry] //force CF=1 iff *carry==1
40  mov eax,[esi]
41  adc [edx],eax
42  mov eax,[esi+4]
43  adc [edx+4],eax
44  mov eax,[esi+8]
45  adc [edx+8],eax
46  mov eax,[esi+12]
47  adc [edx+12],eax
48  mov eax,[esi+16]
49  adc [edx+16],eax
50  mov eax,[esi+20]
51  adc [edx+20],eax
52  mov eax,[esi+24]
53  adc [edx+24],eax
54  mov eax,[esi+28]
55  adc [edx+28],eax
56  sbb eax,eax
57  neg eax
58  }
59  }
60 
61 /*
62 * Eight Word Block Addition, Three Argument
63 */
64 inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
65  {
66  __asm {
67  mov edi,[x]
68  mov esi,[y]
69  mov ebx,[z]
70  xor eax,eax
71  sub eax,[carry] //force CF=1 iff *carry==1
72  mov eax,[edi]
73  adc eax,[esi]
74  mov [ebx],eax
75 
76  mov eax,[edi+4]
77  adc eax,[esi+4]
78  mov [ebx+4],eax
79 
80  mov eax,[edi+8]
81  adc eax,[esi+8]
82  mov [ebx+8],eax
83 
84  mov eax,[edi+12]
85  adc eax,[esi+12]
86  mov [ebx+12],eax
87 
88  mov eax,[edi+16]
89  adc eax,[esi+16]
90  mov [ebx+16],eax
91 
92  mov eax,[edi+20]
93  adc eax,[esi+20]
94  mov [ebx+20],eax
95 
96  mov eax,[edi+24]
97  adc eax,[esi+24]
98  mov [ebx+24],eax
99 
100  mov eax,[edi+28]
101  adc eax,[esi+28]
102  mov [ebx+28],eax
103 
104  sbb eax,eax
105  neg eax
106  }
107  }
108 
109 /*
110 * Word Subtraction
111 */
112 inline word word_sub(word x, word y, word* carry)
113  {
114  word t0 = x - y;
115  word c1 = (t0 > x);
116  word z = t0 - *carry;
117  *carry = c1 | (z > t0);
118  return z;
119  }
120 
121 /*
122 * Eight Word Block Subtraction, Two Argument
123 */
124 inline word word8_sub2(word x[8], const word y[8], word carry)
125  {
126  __asm {
127  mov edi,[x]
128  mov esi,[y]
129  xor eax,eax
130  sub eax,[carry] //force CF=1 iff *carry==1
131  mov eax,[edi]
132  sbb eax,[esi]
133  mov [edi],eax
134  mov eax,[edi+4]
135  sbb eax,[esi+4]
136  mov [edi+4],eax
137  mov eax,[edi+8]
138  sbb eax,[esi+8]
139  mov [edi+8],eax
140  mov eax,[edi+12]
141  sbb eax,[esi+12]
142  mov [edi+12],eax
143  mov eax,[edi+16]
144  sbb eax,[esi+16]
145  mov [edi+16],eax
146  mov eax,[edi+20]
147  sbb eax,[esi+20]
148  mov [edi+20],eax
149  mov eax,[edi+24]
150  sbb eax,[esi+24]
151  mov [edi+24],eax
152  mov eax,[edi+28]
153  sbb eax,[esi+28]
154  mov [edi+28],eax
155  sbb eax,eax
156  neg eax
157  }
158  }
159 
160 /*
161 * Eight Word Block Subtraction, Two Argument
162 */
163 inline word word8_sub2_rev(word x[8], const word y[8], word carry)
164  {
165  x[0] = word_sub(y[0], x[0], &carry);
166  x[1] = word_sub(y[1], x[1], &carry);
167  x[2] = word_sub(y[2], x[2], &carry);
168  x[3] = word_sub(y[3], x[3], &carry);
169  x[4] = word_sub(y[4], x[4], &carry);
170  x[5] = word_sub(y[5], x[5], &carry);
171  x[6] = word_sub(y[6], x[6], &carry);
172  x[7] = word_sub(y[7], x[7], &carry);
173  return carry;
174  }
175 
176 
177 /*
178 * Eight Word Block Subtraction, Three Argument
179 */
180 inline word word8_sub3(word z[8], const word x[8],
181  const word y[8], word carry)
182  {
183  __asm {
184  mov edi,[x]
185  mov esi,[y]
186  xor eax,eax
187  sub eax,[carry] //force CF=1 iff *carry==1
188  mov ebx,[z]
189  mov eax,[edi]
190  sbb eax,[esi]
191  mov [ebx],eax
192  mov eax,[edi+4]
193  sbb eax,[esi+4]
194  mov [ebx+4],eax
195  mov eax,[edi+8]
196  sbb eax,[esi+8]
197  mov [ebx+8],eax
198  mov eax,[edi+12]
199  sbb eax,[esi+12]
200  mov [ebx+12],eax
201  mov eax,[edi+16]
202  sbb eax,[esi+16]
203  mov [ebx+16],eax
204  mov eax,[edi+20]
205  sbb eax,[esi+20]
206  mov [ebx+20],eax
207  mov eax,[edi+24]
208  sbb eax,[esi+24]
209  mov [ebx+24],eax
210  mov eax,[edi+28]
211  sbb eax,[esi+28]
212  mov [ebx+28],eax
213  sbb eax,eax
214  neg eax
215  }
216  }
217 
218 /*
219 * Eight Word Block Linear Multiplication
220 */
221 inline word word8_linmul2(word x[8], word y, word carry)
222  {
223  __asm {
224  mov esi,[x]
225  mov eax,[esi] //load a
226  mul [y] //edx(hi):eax(lo)=a*b
227  add eax,[carry] //sum lo carry
228  adc edx,0 //sum hi carry
229  mov ecx,edx //store carry
230  mov [esi],eax //load a
231 
232  mov eax,[esi+4] //load a
233  mul [y] //edx(hi):eax(lo)=a*b
234  add eax,ecx //sum lo carry
235  adc edx,0 //sum hi carry
236  mov ecx,edx //store carry
237  mov [esi+4],eax //load a
238 
239  mov eax,[esi+8] //load a
240  mul [y] //edx(hi):eax(lo)=a*b
241  add eax,ecx //sum lo carry
242  adc edx,0 //sum hi carry
243  mov ecx,edx //store carry
244  mov [esi+8],eax //load a
245 
246  mov eax,[esi+12] //load a
247  mul [y] //edx(hi):eax(lo)=a*b
248  add eax,ecx //sum lo carry
249  adc edx,0 //sum hi carry
250  mov ecx,edx //store carry
251  mov [esi+12],eax //load a
252 
253  mov eax,[esi+16] //load a
254  mul [y] //edx(hi):eax(lo)=a*b
255  add eax,ecx //sum lo carry
256  adc edx,0 //sum hi carry
257  mov ecx,edx //store carry
258  mov [esi+16],eax //load a
259 
260  mov eax,[esi+20] //load a
261  mul [y] //edx(hi):eax(lo)=a*b
262  add eax,ecx //sum lo carry
263  adc edx,0 //sum hi carry
264  mov ecx,edx //store carry
265  mov [esi+20],eax //load a
266 
267  mov eax,[esi+24] //load a
268  mul [y] //edx(hi):eax(lo)=a*b
269  add eax,ecx //sum lo carry
270  adc edx,0 //sum hi carry
271  mov ecx,edx //store carry
272  mov [esi+24],eax //load a
273 
274  mov eax,[esi+28] //load a
275  mul [y] //edx(hi):eax(lo)=a*b
276  add eax,ecx //sum lo carry
277  adc edx,0 //sum hi carry
278  mov [esi+28],eax //load a
279 
280  mov eax,edx //store carry
281  }
282  }
283 
284 /*
285 * Eight Word Block Linear Multiplication
286 */
287 inline word word8_muladd(word z[8], const word x[8],
288  word y, word carry)
289  {
290  __asm {
291  mov esi,[x]
292  mov ebx,[y]
293  mov edi,[z]
294  mov eax,[esi] //load a
295  mul ebx //edx(hi):eax(lo)=a*b
296  add eax,[carry] //sum lo carry
297  adc edx,0 //sum hi carry
298  add eax,[edi] //sum lo z
299  adc edx,0 //sum hi z
300  mov ecx,edx //carry for next block = hi z
301  mov [edi],eax //save lo z
302 
303  mov eax,[esi+4]
304  mul ebx
305  add eax,ecx
306  adc edx,0
307  add eax,[edi+4]
308  adc edx,0
309  mov ecx,edx
310  mov [edi+4],eax
311 
312  mov eax,[esi+8]
313  mul ebx
314  add eax,ecx
315  adc edx,0
316  add eax,[edi+8]
317  adc edx,0
318  mov ecx,edx
319  mov [edi+8],eax
320 
321  mov eax,[esi+12]
322  mul ebx
323  add eax,ecx
324  adc edx,0
325  add eax,[edi+12]
326  adc edx,0
327  mov ecx,edx
328  mov [edi+12],eax
329 
330  mov eax,[esi+16]
331  mul ebx
332  add eax,ecx
333  adc edx,0
334  add eax,[edi+16]
335  adc edx,0
336  mov ecx,edx
337  mov [edi+16],eax
338 
339  mov eax,[esi+20]
340  mul ebx
341  add eax,ecx
342  adc edx,0
343  add eax,[edi+20]
344  adc edx,0
345  mov ecx,edx
346  mov [edi+20],eax
347 
348  mov eax,[esi+24]
349  mul ebx
350  add eax,ecx
351  adc edx,0
352  add eax,[edi+24]
353  adc edx,0
354  mov ecx,edx
355  mov [edi+24],eax
356 
357  mov eax,[esi+28]
358  mul ebx
359  add eax,ecx
360  adc edx,0
361  add eax,[edi+28]
362  adc edx,0
363  mov [edi+28],eax
364  mov eax,edx
365  }
366  }
367 
368 inline word word8_linmul3(word z[4], const word x[4], word y, word carry)
369  {
370  __asm {
371 #if 0
372  //it's slower!!!
373  mov edx,[z]
374  mov eax,[x]
375  movd mm7,[y]
376 
377  movd mm0,[eax]
378  movd mm1,[eax+4]
379  movd mm2,[eax+8]
380  pmuludq mm0,mm7
381  pmuludq mm1,mm7
382  pmuludq mm2,mm7
383 
384  movd mm6,[carry]
385  paddq mm0,mm6
386  movd [edx],mm0
387 
388  psrlq mm0,32
389  paddq mm1,mm0
390  movd [edx+4],mm1
391 
392  movd mm3,[eax+12]
393  psrlq mm1,32
394  paddq mm2,mm1
395  movd [edx+8],mm2
396 
397  pmuludq mm3,mm7
398  movd mm4,[eax+16]
399  psrlq mm2,32
400  paddq mm3,mm2
401  movd [edx+12],mm3
402 
403  pmuludq mm4,mm7
404  movd mm5,[eax+20]
405  psrlq mm3,32
406  paddq mm4,mm3
407  movd [edx+16],mm4
408 
409  pmuludq mm5,mm7
410  movd mm0,[eax+24]
411  psrlq mm4,32
412  paddq mm5,mm4
413  movd [edx+20],mm5
414 
415  pmuludq mm0,mm7
416  movd mm1,[eax+28]
417  psrlq mm5,32
418  paddq mm0,mm5
419  movd [edx+24],mm0
420 
421  pmuludq mm1,mm7
422  psrlq mm0,32
423  paddq mm1,mm0
424  movd [edx+28],mm1
425  psrlq mm1,32
426 
427  movd eax,mm1
428  emms
429 #else
430  mov edi,[z]
431  mov esi,[x]
432  mov eax,[esi] //load a
433  mul [y] //edx(hi):eax(lo)=a*b
434  add eax,[carry] //sum lo carry
435  adc edx,0 //sum hi carry
436  mov ecx,edx //store carry
437  mov [edi],eax //load a
438 
439  mov eax,[esi+4] //load a
440  mul [y] //edx(hi):eax(lo)=a*b
441  add eax,ecx //sum lo carry
442  adc edx,0 //sum hi carry
443  mov ecx,edx //store carry
444  mov [edi+4],eax //load a
445 
446  mov eax,[esi+8] //load a
447  mul [y] //edx(hi):eax(lo)=a*b
448  add eax,ecx //sum lo carry
449  adc edx,0 //sum hi carry
450  mov ecx,edx //store carry
451  mov [edi+8],eax //load a
452 
453  mov eax,[esi+12] //load a
454  mul [y] //edx(hi):eax(lo)=a*b
455  add eax,ecx //sum lo carry
456  adc edx,0 //sum hi carry
457  mov ecx,edx //store carry
458  mov [edi+12],eax //load a
459 
460  mov eax,[esi+16] //load a
461  mul [y] //edx(hi):eax(lo)=a*b
462  add eax,ecx //sum lo carry
463  adc edx,0 //sum hi carry
464  mov ecx,edx //store carry
465  mov [edi+16],eax //load a
466 
467  mov eax,[esi+20] //load a
468  mul [y] //edx(hi):eax(lo)=a*b
469  add eax,ecx //sum lo carry
470  adc edx,0 //sum hi carry
471  mov ecx,edx //store carry
472  mov [edi+20],eax //load a
473 
474  mov eax,[esi+24] //load a
475  mul [y] //edx(hi):eax(lo)=a*b
476  add eax,ecx //sum lo carry
477  adc edx,0 //sum hi carry
478  mov ecx,edx //store carry
479  mov [edi+24],eax //load a
480 
481  mov eax,[esi+28] //load a
482  mul [y] //edx(hi):eax(lo)=a*b
483  add eax,ecx //sum lo carry
484  adc edx,0 //sum hi carry
485  mov [edi+28],eax //load a
486  mov eax,edx //store carry
487 #endif
488  }
489  }
490 
491 /*
492 * Eight Word Block Multiply/Add
493 */
494 inline word word8_madd3(word z[8], const word x[8], word y, word carry)
495  {
496  z[0] = word_madd3(x[0], y, z[0], &carry);
497  z[1] = word_madd3(x[1], y, z[1], &carry);
498  z[2] = word_madd3(x[2], y, z[2], &carry);
499  z[3] = word_madd3(x[3], y, z[3], &carry);
500  z[4] = word_madd3(x[4], y, z[4], &carry);
501  z[5] = word_madd3(x[5], y, z[5], &carry);
502  z[6] = word_madd3(x[6], y, z[6], &carry);
503  z[7] = word_madd3(x[7], y, z[7], &carry);
504  return carry;
505  }
506 
507 /*
508 * Multiply-Add Accumulator
509 */
510 inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b)
511  {
512  word carry = *w0;
513  *w0 = word_madd2(a, b, &carry);
514  *w1 += carry;
515  *w2 += (*w1 < carry) ? 1 : 0;
516  }
517 
518 /*
519 * Multiply-Add Accumulator
520 */
521 inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b)
522  {
523  word carry = 0;
524  a = word_madd2(a, b, &carry);
525  b = carry;
526 
527  word top = (b >> (BOTAN_MP_WORD_BITS-1));
528  b <<= 1;
529  b |= (a >> (BOTAN_MP_WORD_BITS-1));
530  a <<= 1;
531 
532  carry = 0;
533  *w0 = word_add(*w0, a, &carry);
534  *w1 = word_add(*w1, b, &carry);
535  *w2 = word_add(*w2, top, &carry);
536  }
537 
538 }
539 
540 }
541 
542 #endif
word word8_muladd(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:287
word word8_sub2_rev(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:94
word word8_add2(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:33
word word8_linmul3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:143
word word_madd3(word a, word b, word c, word *d)
Definition: mp_asm.h:102
word word8_sub2(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:78
word word_madd2(word a, word b, word *c)
Definition: mp_asm.h:86
word word8_madd3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:159
word word8_linmul2(word x[8], word y, word carry)
Definition: mp_asmi.h:127
void word3_muladd(word *w2, word *w1, word *w0, word a, word b)
Definition: mp_asmi.h:175
word word8_add3(word z[8], const word x[8], const word y[8], word carry)
Definition: mp_asmi.h:49
word word_sub(word x, word y, word *carry)
Definition: mp_asmi.h:66
void word3_muladd_2(word *w2, word *w1, word *w0, word a, word b)
Definition: mp_asmi.h:186
word word_add(word x, word y, word *carry)
Definition: mp_asmi.h:21
word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
Definition: mp_asmi.h:110