8 #include <botan/aes_ni.h>
9 #include <botan/loadstor.h>
10 #include <wmmintrin.h>
16 __m128i aes_128_key_expansion(__m128i key, __m128i key_with_rcon)
18 key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(3,3,3,3));
19 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
20 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
21 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
22 return _mm_xor_si128(key, key_with_rcon);
25 void aes_192_key_expansion(__m128i* K1, __m128i* K2, __m128i key2_with_rcon,
31 key2_with_rcon = _mm_shuffle_epi32(key2_with_rcon, _MM_SHUFFLE(1,1,1,1));
32 key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
33 key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
34 key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
35 key1 = _mm_xor_si128(key1, key2_with_rcon);
38 _mm_storeu_si128((__m128i*)out, key1);
43 key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
44 key2 = _mm_xor_si128(key2, _mm_shuffle_epi32(key1, _MM_SHUFFLE(3,3,3,3)));
47 out[4] = _mm_cvtsi128_si32(key2);
48 out[5] = _mm_cvtsi128_si32(_mm_srli_si128(key2, 4));
54 __m128i aes_256_key_expansion(__m128i key, __m128i key2)
56 __m128i key_with_rcon = _mm_aeskeygenassist_si128(key2, 0x00);
57 key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(2,2,2,2));
59 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
60 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
61 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
62 return _mm_xor_si128(key, key_with_rcon);
67 #define AES_ENC_4_ROUNDS(K) \
70 B0 = _mm_aesenc_si128(B0, K); \
71 B1 = _mm_aesenc_si128(B1, K); \
72 B2 = _mm_aesenc_si128(B2, K); \
73 B3 = _mm_aesenc_si128(B3, K); \
76 #define AES_ENC_4_LAST_ROUNDS(K) \
79 B0 = _mm_aesenclast_si128(B0, K); \
80 B1 = _mm_aesenclast_si128(B1, K); \
81 B2 = _mm_aesenclast_si128(B2, K); \
82 B3 = _mm_aesenclast_si128(B3, K); \
85 #define AES_DEC_4_ROUNDS(K) \
88 B0 = _mm_aesdec_si128(B0, K); \
89 B1 = _mm_aesdec_si128(B1, K); \
90 B2 = _mm_aesdec_si128(B2, K); \
91 B3 = _mm_aesdec_si128(B3, K); \
94 #define AES_DEC_4_LAST_ROUNDS(K) \
97 B0 = _mm_aesdeclast_si128(B0, K); \
98 B1 = _mm_aesdeclast_si128(B1, K); \
99 B2 = _mm_aesdeclast_si128(B2, K); \
100 B3 = _mm_aesdeclast_si128(B3, K); \
108 const __m128i* in_mm = (
const __m128i*)in;
109 __m128i* out_mm = (__m128i*)out;
111 const __m128i* key_mm = (
const __m128i*)&EK[0];
113 __m128i K0 = _mm_loadu_si128(key_mm);
114 __m128i K1 = _mm_loadu_si128(key_mm + 1);
115 __m128i K2 = _mm_loadu_si128(key_mm + 2);
116 __m128i K3 = _mm_loadu_si128(key_mm + 3);
117 __m128i K4 = _mm_loadu_si128(key_mm + 4);
118 __m128i K5 = _mm_loadu_si128(key_mm + 5);
119 __m128i K6 = _mm_loadu_si128(key_mm + 6);
120 __m128i K7 = _mm_loadu_si128(key_mm + 7);
121 __m128i K8 = _mm_loadu_si128(key_mm + 8);
122 __m128i K9 = _mm_loadu_si128(key_mm + 9);
123 __m128i K10 = _mm_loadu_si128(key_mm + 10);
127 __m128i B0 = _mm_loadu_si128(in_mm + 0);
128 __m128i B1 = _mm_loadu_si128(in_mm + 1);
129 __m128i B2 = _mm_loadu_si128(in_mm + 2);
130 __m128i B3 = _mm_loadu_si128(in_mm + 3);
132 B0 = _mm_xor_si128(B0, K0);
133 B1 = _mm_xor_si128(B1, K0);
134 B2 = _mm_xor_si128(B2, K0);
135 B3 = _mm_xor_si128(B3, K0);
148 _mm_storeu_si128(out_mm + 0, B0);
149 _mm_storeu_si128(out_mm + 1, B1);
150 _mm_storeu_si128(out_mm + 2, B2);
151 _mm_storeu_si128(out_mm + 3, B3);
158 for(
size_t i = 0; i != blocks; ++i)
160 __m128i B = _mm_loadu_si128(in_mm + i);
162 B = _mm_xor_si128(B, K0);
164 B = _mm_aesenc_si128(B, K1);
165 B = _mm_aesenc_si128(B, K2);
166 B = _mm_aesenc_si128(B, K3);
167 B = _mm_aesenc_si128(B, K4);
168 B = _mm_aesenc_si128(B, K5);
169 B = _mm_aesenc_si128(B, K6);
170 B = _mm_aesenc_si128(B, K7);
171 B = _mm_aesenc_si128(B, K8);
172 B = _mm_aesenc_si128(B, K9);
173 B = _mm_aesenclast_si128(B, K10);
175 _mm_storeu_si128(out_mm + i, B);
184 const __m128i* in_mm = (
const __m128i*)in;
185 __m128i* out_mm = (__m128i*)out;
187 const __m128i* key_mm = (
const __m128i*)&DK[0];
189 __m128i K0 = _mm_loadu_si128(key_mm);
190 __m128i K1 = _mm_loadu_si128(key_mm + 1);
191 __m128i K2 = _mm_loadu_si128(key_mm + 2);
192 __m128i K3 = _mm_loadu_si128(key_mm + 3);
193 __m128i K4 = _mm_loadu_si128(key_mm + 4);
194 __m128i K5 = _mm_loadu_si128(key_mm + 5);
195 __m128i K6 = _mm_loadu_si128(key_mm + 6);
196 __m128i K7 = _mm_loadu_si128(key_mm + 7);
197 __m128i K8 = _mm_loadu_si128(key_mm + 8);
198 __m128i K9 = _mm_loadu_si128(key_mm + 9);
199 __m128i K10 = _mm_loadu_si128(key_mm + 10);
203 __m128i B0 = _mm_loadu_si128(in_mm + 0);
204 __m128i B1 = _mm_loadu_si128(in_mm + 1);
205 __m128i B2 = _mm_loadu_si128(in_mm + 2);
206 __m128i B3 = _mm_loadu_si128(in_mm + 3);
208 B0 = _mm_xor_si128(B0, K0);
209 B1 = _mm_xor_si128(B1, K0);
210 B2 = _mm_xor_si128(B2, K0);
211 B3 = _mm_xor_si128(B3, K0);
224 _mm_storeu_si128(out_mm + 0, B0);
225 _mm_storeu_si128(out_mm + 1, B1);
226 _mm_storeu_si128(out_mm + 2, B2);
227 _mm_storeu_si128(out_mm + 3, B3);
234 for(
size_t i = 0; i != blocks; ++i)
236 __m128i B = _mm_loadu_si128(in_mm + i);
238 B = _mm_xor_si128(B, K0);
240 B = _mm_aesdec_si128(B, K1);
241 B = _mm_aesdec_si128(B, K2);
242 B = _mm_aesdec_si128(B, K3);
243 B = _mm_aesdec_si128(B, K4);
244 B = _mm_aesdec_si128(B, K5);
245 B = _mm_aesdec_si128(B, K6);
246 B = _mm_aesdec_si128(B, K7);
247 B = _mm_aesdec_si128(B, K8);
248 B = _mm_aesdec_si128(B, K9);
249 B = _mm_aesdeclast_si128(B, K10);
251 _mm_storeu_si128(out_mm + i, B);
258 void AES_128_NI::key_schedule(
const byte key[],
size_t)
260 #define AES_128_key_exp(K, RCON) \
261 aes_128_key_expansion(K, _mm_aeskeygenassist_si128(K, RCON))
263 __m128i K0 = _mm_loadu_si128((
const __m128i*)(key));
275 __m128i* EK_mm = (__m128i*)&EK[0];
276 _mm_storeu_si128(EK_mm , K0);
277 _mm_storeu_si128(EK_mm + 1, K1);
278 _mm_storeu_si128(EK_mm + 2, K2);
279 _mm_storeu_si128(EK_mm + 3, K3);
280 _mm_storeu_si128(EK_mm + 4, K4);
281 _mm_storeu_si128(EK_mm + 5, K5);
282 _mm_storeu_si128(EK_mm + 6, K6);
283 _mm_storeu_si128(EK_mm + 7, K7);
284 _mm_storeu_si128(EK_mm + 8, K8);
285 _mm_storeu_si128(EK_mm + 9, K9);
286 _mm_storeu_si128(EK_mm + 10, K10);
290 __m128i* DK_mm = (__m128i*)&DK[0];
291 _mm_storeu_si128(DK_mm , K10);
292 _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K9));
293 _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K8));
294 _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K7));
295 _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K6));
296 _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K5));
297 _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K4));
298 _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K3));
299 _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K2));
300 _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K1));
301 _mm_storeu_si128(DK_mm + 10, K0);
318 const __m128i* in_mm = (
const __m128i*)in;
319 __m128i* out_mm = (__m128i*)out;
321 const __m128i* key_mm = (
const __m128i*)&EK[0];
323 __m128i K0 = _mm_loadu_si128(key_mm);
324 __m128i K1 = _mm_loadu_si128(key_mm + 1);
325 __m128i K2 = _mm_loadu_si128(key_mm + 2);
326 __m128i K3 = _mm_loadu_si128(key_mm + 3);
327 __m128i K4 = _mm_loadu_si128(key_mm + 4);
328 __m128i K5 = _mm_loadu_si128(key_mm + 5);
329 __m128i K6 = _mm_loadu_si128(key_mm + 6);
330 __m128i K7 = _mm_loadu_si128(key_mm + 7);
331 __m128i K8 = _mm_loadu_si128(key_mm + 8);
332 __m128i K9 = _mm_loadu_si128(key_mm + 9);
333 __m128i K10 = _mm_loadu_si128(key_mm + 10);
334 __m128i K11 = _mm_loadu_si128(key_mm + 11);
335 __m128i K12 = _mm_loadu_si128(key_mm + 12);
339 __m128i B0 = _mm_loadu_si128(in_mm + 0);
340 __m128i B1 = _mm_loadu_si128(in_mm + 1);
341 __m128i B2 = _mm_loadu_si128(in_mm + 2);
342 __m128i B3 = _mm_loadu_si128(in_mm + 3);
344 B0 = _mm_xor_si128(B0, K0);
345 B1 = _mm_xor_si128(B1, K0);
346 B2 = _mm_xor_si128(B2, K0);
347 B3 = _mm_xor_si128(B3, K0);
362 _mm_storeu_si128(out_mm + 0, B0);
363 _mm_storeu_si128(out_mm + 1, B1);
364 _mm_storeu_si128(out_mm + 2, B2);
365 _mm_storeu_si128(out_mm + 3, B3);
372 for(
size_t i = 0; i != blocks; ++i)
374 __m128i B = _mm_loadu_si128(in_mm + i);
376 B = _mm_xor_si128(B, K0);
378 B = _mm_aesenc_si128(B, K1);
379 B = _mm_aesenc_si128(B, K2);
380 B = _mm_aesenc_si128(B, K3);
381 B = _mm_aesenc_si128(B, K4);
382 B = _mm_aesenc_si128(B, K5);
383 B = _mm_aesenc_si128(B, K6);
384 B = _mm_aesenc_si128(B, K7);
385 B = _mm_aesenc_si128(B, K8);
386 B = _mm_aesenc_si128(B, K9);
387 B = _mm_aesenc_si128(B, K10);
388 B = _mm_aesenc_si128(B, K11);
389 B = _mm_aesenclast_si128(B, K12);
391 _mm_storeu_si128(out_mm + i, B);
400 const __m128i* in_mm = (
const __m128i*)in;
401 __m128i* out_mm = (__m128i*)out;
403 const __m128i* key_mm = (
const __m128i*)&DK[0];
405 __m128i K0 = _mm_loadu_si128(key_mm);
406 __m128i K1 = _mm_loadu_si128(key_mm + 1);
407 __m128i K2 = _mm_loadu_si128(key_mm + 2);
408 __m128i K3 = _mm_loadu_si128(key_mm + 3);
409 __m128i K4 = _mm_loadu_si128(key_mm + 4);
410 __m128i K5 = _mm_loadu_si128(key_mm + 5);
411 __m128i K6 = _mm_loadu_si128(key_mm + 6);
412 __m128i K7 = _mm_loadu_si128(key_mm + 7);
413 __m128i K8 = _mm_loadu_si128(key_mm + 8);
414 __m128i K9 = _mm_loadu_si128(key_mm + 9);
415 __m128i K10 = _mm_loadu_si128(key_mm + 10);
416 __m128i K11 = _mm_loadu_si128(key_mm + 11);
417 __m128i K12 = _mm_loadu_si128(key_mm + 12);
421 __m128i B0 = _mm_loadu_si128(in_mm + 0);
422 __m128i B1 = _mm_loadu_si128(in_mm + 1);
423 __m128i B2 = _mm_loadu_si128(in_mm + 2);
424 __m128i B3 = _mm_loadu_si128(in_mm + 3);
426 B0 = _mm_xor_si128(B0, K0);
427 B1 = _mm_xor_si128(B1, K0);
428 B2 = _mm_xor_si128(B2, K0);
429 B3 = _mm_xor_si128(B3, K0);
444 _mm_storeu_si128(out_mm + 0, B0);
445 _mm_storeu_si128(out_mm + 1, B1);
446 _mm_storeu_si128(out_mm + 2, B2);
447 _mm_storeu_si128(out_mm + 3, B3);
454 for(
size_t i = 0; i != blocks; ++i)
456 __m128i B = _mm_loadu_si128(in_mm + i);
458 B = _mm_xor_si128(B, K0);
460 B = _mm_aesdec_si128(B, K1);
461 B = _mm_aesdec_si128(B, K2);
462 B = _mm_aesdec_si128(B, K3);
463 B = _mm_aesdec_si128(B, K4);
464 B = _mm_aesdec_si128(B, K5);
465 B = _mm_aesdec_si128(B, K6);
466 B = _mm_aesdec_si128(B, K7);
467 B = _mm_aesdec_si128(B, K8);
468 B = _mm_aesdec_si128(B, K9);
469 B = _mm_aesdec_si128(B, K10);
470 B = _mm_aesdec_si128(B, K11);
471 B = _mm_aesdeclast_si128(B, K12);
473 _mm_storeu_si128(out_mm + i, B);
480 void AES_192_NI::key_schedule(
const byte key[],
size_t)
482 __m128i K0 = _mm_loadu_si128((
const __m128i*)(key));
483 __m128i K1 = _mm_loadu_si128((
const __m128i*)(key + 8));
484 K1 = _mm_srli_si128(K1, 8);
488 #define AES_192_key_exp(RCON, EK_OFF) \
489 aes_192_key_expansion(&K0, &K1, \
490 _mm_aeskeygenassist_si128(K1, RCON), \
491 EK + EK_OFF, EK_OFF == 48)
503 const __m128i* EK_mm = (
const __m128i*)&EK[0];
504 __m128i* DK_mm = (__m128i*)&DK[0];
505 _mm_storeu_si128(DK_mm , _mm_loadu_si128(EK_mm + 12));
506 _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 11)));
507 _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 10)));
508 _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 9)));
509 _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 8)));
510 _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 7)));
511 _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 6)));
512 _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 5)));
513 _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 4)));
514 _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 3)));
515 _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 2)));
516 _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 1)));
517 _mm_storeu_si128(DK_mm + 12, _mm_loadu_si128(EK_mm + 0));
534 const __m128i* in_mm = (
const __m128i*)in;
535 __m128i* out_mm = (__m128i*)out;
537 const __m128i* key_mm = (
const __m128i*)&EK[0];
539 __m128i K0 = _mm_loadu_si128(key_mm);
540 __m128i K1 = _mm_loadu_si128(key_mm + 1);
541 __m128i K2 = _mm_loadu_si128(key_mm + 2);
542 __m128i K3 = _mm_loadu_si128(key_mm + 3);
543 __m128i K4 = _mm_loadu_si128(key_mm + 4);
544 __m128i K5 = _mm_loadu_si128(key_mm + 5);
545 __m128i K6 = _mm_loadu_si128(key_mm + 6);
546 __m128i K7 = _mm_loadu_si128(key_mm + 7);
547 __m128i K8 = _mm_loadu_si128(key_mm + 8);
548 __m128i K9 = _mm_loadu_si128(key_mm + 9);
549 __m128i K10 = _mm_loadu_si128(key_mm + 10);
550 __m128i K11 = _mm_loadu_si128(key_mm + 11);
551 __m128i K12 = _mm_loadu_si128(key_mm + 12);
552 __m128i K13 = _mm_loadu_si128(key_mm + 13);
553 __m128i K14 = _mm_loadu_si128(key_mm + 14);
557 __m128i B0 = _mm_loadu_si128(in_mm + 0);
558 __m128i B1 = _mm_loadu_si128(in_mm + 1);
559 __m128i B2 = _mm_loadu_si128(in_mm + 2);
560 __m128i B3 = _mm_loadu_si128(in_mm + 3);
562 B0 = _mm_xor_si128(B0, K0);
563 B1 = _mm_xor_si128(B1, K0);
564 B2 = _mm_xor_si128(B2, K0);
565 B3 = _mm_xor_si128(B3, K0);
582 _mm_storeu_si128(out_mm + 0, B0);
583 _mm_storeu_si128(out_mm + 1, B1);
584 _mm_storeu_si128(out_mm + 2, B2);
585 _mm_storeu_si128(out_mm + 3, B3);
592 for(
size_t i = 0; i != blocks; ++i)
594 __m128i B = _mm_loadu_si128(in_mm + i);
596 B = _mm_xor_si128(B, K0);
598 B = _mm_aesenc_si128(B, K1);
599 B = _mm_aesenc_si128(B, K2);
600 B = _mm_aesenc_si128(B, K3);
601 B = _mm_aesenc_si128(B, K4);
602 B = _mm_aesenc_si128(B, K5);
603 B = _mm_aesenc_si128(B, K6);
604 B = _mm_aesenc_si128(B, K7);
605 B = _mm_aesenc_si128(B, K8);
606 B = _mm_aesenc_si128(B, K9);
607 B = _mm_aesenc_si128(B, K10);
608 B = _mm_aesenc_si128(B, K11);
609 B = _mm_aesenc_si128(B, K12);
610 B = _mm_aesenc_si128(B, K13);
611 B = _mm_aesenclast_si128(B, K14);
613 _mm_storeu_si128(out_mm + i, B);
622 const __m128i* in_mm = (
const __m128i*)in;
623 __m128i* out_mm = (__m128i*)out;
625 const __m128i* key_mm = (
const __m128i*)&DK[0];
627 __m128i K0 = _mm_loadu_si128(key_mm);
628 __m128i K1 = _mm_loadu_si128(key_mm + 1);
629 __m128i K2 = _mm_loadu_si128(key_mm + 2);
630 __m128i K3 = _mm_loadu_si128(key_mm + 3);
631 __m128i K4 = _mm_loadu_si128(key_mm + 4);
632 __m128i K5 = _mm_loadu_si128(key_mm + 5);
633 __m128i K6 = _mm_loadu_si128(key_mm + 6);
634 __m128i K7 = _mm_loadu_si128(key_mm + 7);
635 __m128i K8 = _mm_loadu_si128(key_mm + 8);
636 __m128i K9 = _mm_loadu_si128(key_mm + 9);
637 __m128i K10 = _mm_loadu_si128(key_mm + 10);
638 __m128i K11 = _mm_loadu_si128(key_mm + 11);
639 __m128i K12 = _mm_loadu_si128(key_mm + 12);
640 __m128i K13 = _mm_loadu_si128(key_mm + 13);
641 __m128i K14 = _mm_loadu_si128(key_mm + 14);
645 __m128i B0 = _mm_loadu_si128(in_mm + 0);
646 __m128i B1 = _mm_loadu_si128(in_mm + 1);
647 __m128i B2 = _mm_loadu_si128(in_mm + 2);
648 __m128i B3 = _mm_loadu_si128(in_mm + 3);
650 B0 = _mm_xor_si128(B0, K0);
651 B1 = _mm_xor_si128(B1, K0);
652 B2 = _mm_xor_si128(B2, K0);
653 B3 = _mm_xor_si128(B3, K0);
670 _mm_storeu_si128(out_mm + 0, B0);
671 _mm_storeu_si128(out_mm + 1, B1);
672 _mm_storeu_si128(out_mm + 2, B2);
673 _mm_storeu_si128(out_mm + 3, B3);
680 for(
size_t i = 0; i != blocks; ++i)
682 __m128i B = _mm_loadu_si128(in_mm + i);
684 B = _mm_xor_si128(B, K0);
686 B = _mm_aesdec_si128(B, K1);
687 B = _mm_aesdec_si128(B, K2);
688 B = _mm_aesdec_si128(B, K3);
689 B = _mm_aesdec_si128(B, K4);
690 B = _mm_aesdec_si128(B, K5);
691 B = _mm_aesdec_si128(B, K6);
692 B = _mm_aesdec_si128(B, K7);
693 B = _mm_aesdec_si128(B, K8);
694 B = _mm_aesdec_si128(B, K9);
695 B = _mm_aesdec_si128(B, K10);
696 B = _mm_aesdec_si128(B, K11);
697 B = _mm_aesdec_si128(B, K12);
698 B = _mm_aesdec_si128(B, K13);
699 B = _mm_aesdeclast_si128(B, K14);
701 _mm_storeu_si128(out_mm + i, B);
708 void AES_256_NI::key_schedule(
const byte key[],
size_t)
710 __m128i K0 = _mm_loadu_si128((
const __m128i*)(key));
711 __m128i K1 = _mm_loadu_si128((
const __m128i*)(key + 16));
713 __m128i K2 = aes_128_key_expansion(K0, _mm_aeskeygenassist_si128(K1, 0x01));
714 __m128i K3 = aes_256_key_expansion(K1, K2);
716 __m128i K4 = aes_128_key_expansion(K2, _mm_aeskeygenassist_si128(K3, 0x02));
717 __m128i K5 = aes_256_key_expansion(K3, K4);
719 __m128i K6 = aes_128_key_expansion(K4, _mm_aeskeygenassist_si128(K5, 0x04));
720 __m128i K7 = aes_256_key_expansion(K5, K6);
722 __m128i K8 = aes_128_key_expansion(K6, _mm_aeskeygenassist_si128(K7, 0x08));
723 __m128i K9 = aes_256_key_expansion(K7, K8);
725 __m128i K10 = aes_128_key_expansion(K8, _mm_aeskeygenassist_si128(K9, 0x10));
726 __m128i K11 = aes_256_key_expansion(K9, K10);
728 __m128i K12 = aes_128_key_expansion(K10, _mm_aeskeygenassist_si128(K11, 0x20));
729 __m128i K13 = aes_256_key_expansion(K11, K12);
731 __m128i K14 = aes_128_key_expansion(K12, _mm_aeskeygenassist_si128(K13, 0x40));
733 __m128i* EK_mm = (__m128i*)&EK[0];
734 _mm_storeu_si128(EK_mm , K0);
735 _mm_storeu_si128(EK_mm + 1, K1);
736 _mm_storeu_si128(EK_mm + 2, K2);
737 _mm_storeu_si128(EK_mm + 3, K3);
738 _mm_storeu_si128(EK_mm + 4, K4);
739 _mm_storeu_si128(EK_mm + 5, K5);
740 _mm_storeu_si128(EK_mm + 6, K6);
741 _mm_storeu_si128(EK_mm + 7, K7);
742 _mm_storeu_si128(EK_mm + 8, K8);
743 _mm_storeu_si128(EK_mm + 9, K9);
744 _mm_storeu_si128(EK_mm + 10, K10);
745 _mm_storeu_si128(EK_mm + 11, K11);
746 _mm_storeu_si128(EK_mm + 12, K12);
747 _mm_storeu_si128(EK_mm + 13, K13);
748 _mm_storeu_si128(EK_mm + 14, K14);
752 __m128i* DK_mm = (__m128i*)&DK[0];
753 _mm_storeu_si128(DK_mm , K14);
754 _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K13));
755 _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K12));
756 _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K11));
757 _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K10));
758 _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K9));
759 _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K8));
760 _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K7));
761 _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K6));
762 _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K5));
763 _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(K4));
764 _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(K3));
765 _mm_storeu_si128(DK_mm + 12, _mm_aesimc_si128(K2));
766 _mm_storeu_si128(DK_mm + 13, _mm_aesimc_si128(K1));
767 _mm_storeu_si128(DK_mm + 14, K0);
#define AES_DEC_4_LAST_ROUNDS(K)
T load_le(const byte in[], size_t off)
void decrypt_n(const byte in[], byte out[], size_t blocks) const
void decrypt_n(const byte in[], byte out[], size_t blocks) const
#define AES_192_key_exp(RCON, EK_OFF)
#define AES_ENC_4_ROUNDS(K)
#define AES_DEC_4_ROUNDS(K)
#define AES_ENC_4_LAST_ROUNDS(K)
void encrypt_n(const byte in[], byte out[], size_t blocks) const
void decrypt_n(const byte in[], byte out[], size_t blocks) const
void encrypt_n(const byte in[], byte out[], size_t blocks) const
#define AES_128_key_exp(K, RCON)
void encrypt_n(const byte in[], byte out[], size_t blocks) const
void zeroise(MemoryRegion< T > &vec)