Botan  1.10.9
mp_karat.cpp
Go to the documentation of this file.
1 /*
2 * Karatsuba Multiplication/Squaring
3 * (C) 1999-2010 Jack Lloyd
4 *
5 * Distributed under the terms of the Botan license
6 */
7 
8 #include <botan/internal/mp_core.h>
9 #include <botan/internal/mp_asmi.h>
10 #include <botan/mem_ops.h>
11 
12 namespace Botan {
13 
14 namespace {
15 
16 /*
17 * Karatsuba Multiplication Operation
18 */
19 void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
20  word workspace[])
21  {
22  if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2)
23  {
24  if(N == 6)
25  return bigint_comba_mul6(z, x, y);
26  else if(N == 8)
27  return bigint_comba_mul8(z, x, y);
28  else if(N == 16)
29  return bigint_comba_mul16(z, x, y);
30  else
31  return bigint_simple_mul(z, x, N, y, N);
32  }
33 
34  const size_t N2 = N / 2;
35 
36  const word* x0 = x;
37  const word* x1 = x + N2;
38  const word* y0 = y;
39  const word* y1 = y + N2;
40  word* z0 = z;
41  word* z1 = z + N;
42 
43  const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2);
44  const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2);
45 
46  clear_mem(workspace, 2*N);
47 
48  //if(cmp0 && cmp1)
49  {
50  if(cmp0 > 0)
51  bigint_sub3(z0, x0, N2, x1, N2);
52  else
53  bigint_sub3(z0, x1, N2, x0, N2);
54 
55  if(cmp1 > 0)
56  bigint_sub3(z1, y1, N2, y0, N2);
57  else
58  bigint_sub3(z1, y0, N2, y1, N2);
59 
60  karatsuba_mul(workspace, z0, z1, N2, workspace+N);
61  }
62 
63  karatsuba_mul(z0, x0, y0, N2, workspace+N);
64  karatsuba_mul(z1, x1, y1, N2, workspace+N);
65 
66  const size_t blocks_of_8 = N - (N % 8);
67 
68  word ws_carry = 0;
69 
70  for(size_t j = 0; j != blocks_of_8; j += 8)
71  ws_carry = word8_add3(workspace + N + j, z0 + j, z1 + j, ws_carry);
72 
73  for(size_t j = blocks_of_8; j != N; ++j)
74  workspace[N + j] = word_add(z0[j], z1[j], &ws_carry);
75 
76  word z_carry = 0;
77 
78  for(size_t j = 0; j != blocks_of_8; j += 8)
79  z_carry = word8_add2(z + N2 + j, workspace + N + j, z_carry);
80 
81  for(size_t j = blocks_of_8; j != N; ++j)
82  z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &z_carry);
83 
84  z[N + N2] = word_add(z[N + N2], ws_carry, &z_carry);
85 
86  if(z_carry)
87  for(size_t j = 1; j != N2; ++j)
88  if(++z[N + N2 + j])
89  break;
90 
91  if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
92  bigint_add2(z + N2, 2*N-N2, workspace, N);
93  else
94  bigint_sub2(z + N2, 2*N-N2, workspace, N);
95  }
96 
97 /*
98 * Karatsuba Squaring Operation
99 */
100 void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
101  {
102  if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2)
103  {
104  if(N == 6)
105  return bigint_comba_sqr6(z, x);
106  else if(N == 8)
107  return bigint_comba_sqr8(z, x);
108  else if(N == 16)
109  return bigint_comba_sqr16(z, x);
110  else
111  return bigint_simple_sqr(z, x, N);
112  }
113 
114  const size_t N2 = N / 2;
115 
116  const word* x0 = x;
117  const word* x1 = x + N2;
118  word* z0 = z;
119  word* z1 = z + N;
120 
121  const s32bit cmp = bigint_cmp(x0, N2, x1, N2);
122 
123  clear_mem(workspace, 2*N);
124 
125  //if(cmp)
126  {
127  if(cmp > 0)
128  bigint_sub3(z0, x0, N2, x1, N2);
129  else
130  bigint_sub3(z0, x1, N2, x0, N2);
131 
132  karatsuba_sqr(workspace, z0, N2, workspace+N);
133  }
134 
135  karatsuba_sqr(z0, x0, N2, workspace+N);
136  karatsuba_sqr(z1, x1, N2, workspace+N);
137 
138  const size_t blocks_of_8 = N - (N % 8);
139 
140  word ws_carry = 0;
141 
142  for(size_t j = 0; j != blocks_of_8; j += 8)
143  ws_carry = word8_add3(workspace + N + j, z0 + j, z1 + j, ws_carry);
144 
145  for(size_t j = blocks_of_8; j != N; ++j)
146  workspace[N + j] = word_add(z0[j], z1[j], &ws_carry);
147 
148  word z_carry = 0;
149 
150  for(size_t j = 0; j != blocks_of_8; j += 8)
151  z_carry = word8_add2(z + N2 + j, workspace + N + j, z_carry);
152 
153  for(size_t j = blocks_of_8; j != N; ++j)
154  z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &z_carry);
155 
156  z[N + N2] = word_add(z[N + N2], ws_carry, &z_carry);
157 
158  if(z_carry)
159  for(size_t j = 1; j != N2; ++j)
160  if(++z[N + N2 + j])
161  break;
162 
163  /*
164  * This is only actually required if cmp is != 0, however
165  * if cmp==0 then workspace[0:N] == 0 and avoiding the jump
166  * hides a timing channel.
167  */
168  bigint_sub2(z + N2, 2*N-N2, workspace, N);
169  }
170 
171 /*
172 * Pick a good size for the Karatsuba multiply
173 */
174 size_t karatsuba_size(size_t z_size,
175  size_t x_size, size_t x_sw,
176  size_t y_size, size_t y_sw)
177  {
178  if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
179  return 0;
180 
181  if(((x_size == x_sw) && (x_size % 2)) ||
182  ((y_size == y_sw) && (y_size % 2)))
183  return 0;
184 
185  const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
186  const size_t end = (x_size < y_size) ? x_size : y_size;
187 
188  if(start == end)
189  {
190  if(start % 2)
191  return 0;
192  return start;
193  }
194 
195  for(size_t j = start; j <= end; ++j)
196  {
197  if(j % 2)
198  continue;
199 
200  if(2*j > z_size)
201  return 0;
202 
203  if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
204  {
205  if(j % 4 == 2 &&
206  (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
207  return j+2;
208  return j;
209  }
210  }
211 
212  return 0;
213  }
214 
215 /*
216 * Pick a good size for the Karatsuba squaring
217 */
218 size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
219  {
220  if(x_sw == x_size)
221  {
222  if(x_sw % 2)
223  return 0;
224  return x_sw;
225  }
226 
227  for(size_t j = x_sw; j <= x_size; ++j)
228  {
229  if(j % 2)
230  continue;
231 
232  if(2*j > z_size)
233  return 0;
234 
235  if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
236  return j+2;
237  return j;
238  }
239 
240  return 0;
241  }
242 
243 }
244 
245 /*
246 * Multiplication Algorithm Dispatcher
247 */
248 void bigint_mul(word z[], size_t z_size, word workspace[],
249  const word x[], size_t x_size, size_t x_sw,
250  const word y[], size_t y_size, size_t y_sw)
251  {
252  if(x_sw == 1)
253  {
254  bigint_linmul3(z, y, y_sw, x[0]);
255  }
256  else if(y_sw == 1)
257  {
258  bigint_linmul3(z, x, x_sw, y[0]);
259  }
260  else if(x_sw <= 4 && x_size >= 4 &&
261  y_sw <= 4 && y_size >= 4 && z_size >= 8)
262  {
263  bigint_comba_mul4(z, x, y);
264  }
265  else if(x_sw <= 6 && x_size >= 6 &&
266  y_sw <= 6 && y_size >= 6 && z_size >= 12)
267  {
268  bigint_comba_mul6(z, x, y);
269  }
270  else if(x_sw <= 8 && x_size >= 8 &&
271  y_sw <= 8 && y_size >= 8 && z_size >= 16)
272  {
273  bigint_comba_mul8(z, x, y);
274  }
275  else if(x_sw <= 16 && x_size >= 16 &&
276  y_sw <= 16 && y_size >= 16 && z_size >= 32)
277  {
278  bigint_comba_mul16(z, x, y);
279  }
280  else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD ||
281  y_sw < BOTAN_KARAT_MUL_THRESHOLD ||
282  !workspace)
283  {
284  bigint_simple_mul(z, x, x_sw, y, y_sw);
285  }
286  else
287  {
288  const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
289 
290  if(N)
291  {
292  clear_mem(workspace, 2*N);
293  karatsuba_mul(z, x, y, N, workspace);
294  }
295  else
296  bigint_simple_mul(z, x, x_sw, y, y_sw);
297  }
298  }
299 
300 /*
301 * Squaring Algorithm Dispatcher
302 */
303 void bigint_sqr(word z[], size_t z_size, word workspace[],
304  const word x[], size_t x_size, size_t x_sw)
305  {
306  if(x_sw == 1)
307  {
308  bigint_linmul3(z, x, x_sw, x[0]);
309  }
310  else if(x_sw <= 4 && x_size >= 4 && z_size >= 8)
311  {
312  bigint_comba_sqr4(z, x);
313  }
314  else if(x_sw <= 6 && x_size >= 6 && z_size >= 12)
315  {
316  bigint_comba_sqr6(z, x);
317  }
318  else if(x_sw <= 8 && x_size >= 8 && z_size >= 16)
319  {
320  bigint_comba_sqr8(z, x);
321  }
322  else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
323  {
324  bigint_comba_sqr16(z, x);
325  }
326  else if(x_size < BOTAN_KARAT_SQR_THRESHOLD || !workspace)
327  {
328  bigint_simple_sqr(z, x, x_sw);
329  }
330  else
331  {
332  const size_t N = karatsuba_size(z_size, x_size, x_sw);
333 
334  if(N)
335  {
336  clear_mem(workspace, 2*N);
337  karatsuba_sqr(z, x, N, workspace);
338  }
339  else
340  bigint_simple_sqr(z, x, x_sw);
341  }
342  }
343 
344 }
void bigint_simple_mul(word z[], const word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_mulop.cpp:20
word word8_add2(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:33
void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:32
word bigint_sub2(word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_asm.cpp:87
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
Definition: mp_comba.cpp:51
word bigint_sub3(word z[], const word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_asm.cpp:127
signed int s32bit
Definition: types.h:37
void bigint_linmul3(word z[], const word x[], size_t x_size, word y)
Definition: mp_asm.cpp:167
void bigint_comba_sqr16(word z[32], const word x[16])
Definition: mp_comba.cpp:387
void bigint_sqr(word z[], size_t z_size, word workspace[], const word x[], size_t x_size, size_t x_sw)
Definition: mp_karat.cpp:303
word word8_add3(word z[8], const word x[8], const word y[8], word carry)
Definition: mp_asmi.h:49
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
Definition: mp_comba.cpp:284
void bigint_mul(word z[], size_t z_size, word workspace[], const word x[], size_t x_size, size_t x_sw, const word y[], size_t y_size, size_t y_sw)
Definition: mp_karat.cpp:248
void bigint_comba_sqr8(word z[16], const word x[8])
Definition: mp_comba.cpp:209
void bigint_add2(word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_asm.cpp:68
void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
Definition: mp_comba.cpp:594
word word_add(word x, word y, word *carry)
Definition: mp_asmi.h:21
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
Definition: mp_comba.cpp:142
void bigint_simple_sqr(word z[], const word x[], size_t x_size)
Definition: mp_mulop.cpp:54
void bigint_comba_sqr4(word z[8], const word x[4])
Definition: mp_comba.cpp:18
void bigint_comba_sqr6(word z[12], const word x[6])
Definition: mp_comba.cpp:90
s32bit bigint_cmp(const word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_misc.cpp:41