11 #include <botan/sha1_sse2.h>
12 #include <botan/rotate.h>
13 #include <emmintrin.h>
17 namespace SHA1_SSE2_F {
26 #define prep00_15(P, W) \
28 W = _mm_shufflehi_epi16(W, _MM_SHUFFLE(2, 3, 0, 1)); \
29 W = _mm_shufflelo_epi16(W, _MM_SHUFFLE(2, 3, 0, 1)); \
30 W = _mm_or_si128(_mm_slli_epi16(W, 8), \
31 _mm_srli_epi16(W, 8)); \
32 P.u128 = _mm_add_epi32(W, K00_19); \
80 #define prep(prep, XW0, XW1, XW2, XW3, K) \
82 __m128i r0, r1, r2, r3; \
85 r3 = _mm_srli_si128((XW3), 4); \
88 r1 = _mm_shuffle_epi32((XW0), _MM_SHUFFLE(1,0,3,2)); \
90 r1 = _mm_unpacklo_epi64(r1, (XW1)); \
93 r0 = _mm_xor_si128(r1, r0); \
94 r2 = _mm_xor_si128(r3, r2); \
95 r0 = _mm_xor_si128(r2, r0); \
98 r2 = _mm_slli_si128(r0, 12); \
99 r1 = _mm_cmplt_epi32(r0, _mm_setzero_si128()); \
100 r0 = _mm_add_epi32(r0, r0); \
101 r0 = _mm_sub_epi32(r0, r1); \
103 r3 = _mm_srli_epi32(r2, 30); \
104 r2 = _mm_slli_epi32(r2, 2); \
106 r0 = _mm_xor_si128(r0, r3); \
107 r0 = _mm_xor_si128(r0, r2); \
110 (prep).u128 = _mm_add_epi32(r0, K); \
118 E += (D ^ (B & (C ^ D))) + msg +
rotate_left(A, 5);
136 E += ((B & C) | ((B | C) & D)) + msg +
rotate_left(A, 5);
156 void SHA_160_SSE2::compress_n(
const byte input_bytes[],
size_t blocks)
158 using namespace SHA1_SSE2_F;
160 const __m128i K00_19 = _mm_set1_epi32(0x5A827999);
161 const __m128i K20_39 = _mm_set1_epi32(0x6ED9EBA1);
162 const __m128i K40_59 = _mm_set1_epi32(0x8F1BBCDC);
163 const __m128i K60_79 = _mm_set1_epi32(0xCA62C1D6);
171 const __m128i* input =
reinterpret_cast<const __m128i*
>(input_bytes);
173 for(
size_t i = 0; i != blocks; ++i)
182 __m128i W0 = _mm_loadu_si128(&input[0]);
185 __m128i W1 = _mm_loadu_si128(&input[1]);
188 __m128i W2 = _mm_loadu_si128(&input[2]);
191 __m128i W3 = _mm_loadu_si128(&input[3]);
202 #define GET_P_32(P, i) P.u32[i]
208 prep(P0, W0, W1, W2, W3, K00_19);
214 prep(P1, W1, W2, W3, W0, K20_39);
220 prep(P2, W2, W3, W0, W1, K20_39);
226 prep(P3, W3, W0, W1, W2, K20_39);
232 prep(P0, W0, W1, W2, W3, K20_39);
238 prep(P1, W1, W2, W3, W0, K20_39);
244 prep(P2, W2, W3, W0, W1, K40_59);
250 prep(P3, W3, W0, W1, W2, K40_59);
256 prep(P0, W0, W1, W2, W3, K40_59);
262 prep(P1, W1, W2, W3, W0, K40_59);
268 prep(P2, W2, W3, W0, W1, K40_59);
274 prep(P3, W3, W0, W1, W2, K60_79);
280 prep(P0, W0, W1, W2, W3, K60_79);
286 prep(P1, W1, W2, W3, W0, K60_79);
292 prep(P2, W2, W3, W0, W1, K60_79);
298 prep(P3, W3, W0, W1, W2, K60_79);
size_t hash_block_size() const
T rotate_left(T input, size_t rot)
void F2(u32bit A, u32bit &B, u32bit C, u32bit D, u32bit &E, u32bit msg, u32bit rot)
void F3(u32bit A, u32bit &B, u32bit C, u32bit D, u32bit &E, u32bit msg, u32bit rot)
#define prep(prep, XW0, XW1, XW2, XW3, K)
SecureVector< u32bit > digest
void F4(u32bit A, u32bit &B, u32bit C, u32bit D, u32bit &E, u32bit msg, u32bit rot)
void F1(u32bit A, u32bit &B, u32bit C, u32bit D, u32bit &E, u32bit msg, u32bit rot)