8 #include <botan/internal/mp_core.h>
9 #include <botan/internal/mp_asmi.h>
10 #include <botan/mem_ops.h>
19 void karatsuba_mul(word z[],
const word x[],
const word y[],
size_t N,
22 if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2)
34 const size_t N2 = N / 2;
37 const word* x1 = x + N2;
39 const word* y1 = y + N2;
60 karatsuba_mul(workspace, z0, z1, N2, workspace+N);
63 karatsuba_mul(z0, x0, y0, N2, workspace+N);
64 karatsuba_mul(z1, x1, y1, N2, workspace+N);
66 const size_t blocks_of_8 = N - (N % 8);
70 for(
size_t j = 0; j != blocks_of_8; j += 8)
71 ws_carry =
word8_add3(workspace + N + j, z0 + j, z1 + j, ws_carry);
73 for(
size_t j = blocks_of_8; j != N; ++j)
74 workspace[N + j] =
word_add(z0[j], z1[j], &ws_carry);
78 for(
size_t j = 0; j != blocks_of_8; j += 8)
79 z_carry =
word8_add2(z + N2 + j, workspace + N + j, z_carry);
81 for(
size_t j = blocks_of_8; j != N; ++j)
82 z[N2 + j] =
word_add(z[N2 + j], workspace[N + j], &z_carry);
84 z[N + N2] =
word_add(z[N + N2], ws_carry, &z_carry);
87 for(
size_t j = 1; j != N2; ++j)
91 if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
100 void karatsuba_sqr(word z[],
const word x[],
size_t N, word workspace[])
102 if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2)
114 const size_t N2 = N / 2;
117 const word* x1 = x + N2;
132 karatsuba_sqr(workspace, z0, N2, workspace+N);
135 karatsuba_sqr(z0, x0, N2, workspace+N);
136 karatsuba_sqr(z1, x1, N2, workspace+N);
138 const size_t blocks_of_8 = N - (N % 8);
142 for(
size_t j = 0; j != blocks_of_8; j += 8)
143 ws_carry =
word8_add3(workspace + N + j, z0 + j, z1 + j, ws_carry);
145 for(
size_t j = blocks_of_8; j != N; ++j)
146 workspace[N + j] =
word_add(z0[j], z1[j], &ws_carry);
150 for(
size_t j = 0; j != blocks_of_8; j += 8)
151 z_carry =
word8_add2(z + N2 + j, workspace + N + j, z_carry);
153 for(
size_t j = blocks_of_8; j != N; ++j)
154 z[N2 + j] =
word_add(z[N2 + j], workspace[N + j], &z_carry);
156 z[N + N2] =
word_add(z[N + N2], ws_carry, &z_carry);
159 for(
size_t j = 1; j != N2; ++j)
174 size_t karatsuba_size(
size_t z_size,
175 size_t x_size,
size_t x_sw,
176 size_t y_size,
size_t y_sw)
178 if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
181 if(((x_size == x_sw) && (x_size % 2)) ||
182 ((y_size == y_sw) && (y_size % 2)))
185 const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
186 const size_t end = (x_size < y_size) ? x_size : y_size;
195 for(
size_t j = start; j <= end; ++j)
203 if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
206 (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
218 size_t karatsuba_size(
size_t z_size,
size_t x_size,
size_t x_sw)
227 for(
size_t j = x_sw; j <= x_size; ++j)
235 if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
249 const word x[],
size_t x_size,
size_t x_sw,
250 const word y[],
size_t y_size,
size_t y_sw)
260 else if(x_sw <= 4 && x_size >= 4 &&
261 y_sw <= 4 && y_size >= 4 && z_size >= 8)
265 else if(x_sw <= 6 && x_size >= 6 &&
266 y_sw <= 6 && y_size >= 6 && z_size >= 12)
270 else if(x_sw <= 8 && x_size >= 8 &&
271 y_sw <= 8 && y_size >= 8 && z_size >= 16)
275 else if(x_sw <= 16 && x_size >= 16 &&
276 y_sw <= 16 && y_size >= 16 && z_size >= 32)
280 else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD ||
281 y_sw < BOTAN_KARAT_MUL_THRESHOLD ||
288 const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
293 karatsuba_mul(z, x, y, N, workspace);
304 const word x[],
size_t x_size,
size_t x_sw)
310 else if(x_sw <= 4 && x_size >= 4 && z_size >= 8)
314 else if(x_sw <= 6 && x_size >= 6 && z_size >= 12)
318 else if(x_sw <= 8 && x_size >= 8 && z_size >= 16)
322 else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
326 else if(x_size < BOTAN_KARAT_SQR_THRESHOLD || !workspace)
332 const size_t N = karatsuba_size(z_size, x_size, x_sw);
337 karatsuba_sqr(z, x, N, workspace);
void bigint_simple_mul(word z[], const word x[], size_t x_size, const word y[], size_t y_size)
word word8_add2(word x[8], const word y[8], word carry)
void clear_mem(T *ptr, size_t n)
word bigint_sub2(word x[], size_t x_size, const word y[], size_t y_size)
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
word bigint_sub3(word z[], const word x[], size_t x_size, const word y[], size_t y_size)
void bigint_linmul3(word z[], const word x[], size_t x_size, word y)
void bigint_comba_sqr16(word z[32], const word x[16])
void bigint_sqr(word z[], size_t z_size, word workspace[], const word x[], size_t x_size, size_t x_sw)
word word8_add3(word z[8], const word x[8], const word y[8], word carry)
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
void bigint_mul(word z[], size_t z_size, word workspace[], const word x[], size_t x_size, size_t x_sw, const word y[], size_t y_size, size_t y_sw)
void bigint_comba_sqr8(word z[16], const word x[8])
void bigint_add2(word x[], size_t x_size, const word y[], size_t y_size)
void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
word word_add(word x, word y, word *carry)
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
void bigint_simple_sqr(word z[], const word x[], size_t x_size)
void bigint_comba_sqr4(word z[8], const word x[4])
void bigint_comba_sqr6(word z[12], const word x[6])
s32bit bigint_cmp(const word x[], size_t x_size, const word y[], size_t y_size)