8 #ifndef BOTAN_SIMD_SSE_H__
9 #define BOTAN_SIMD_SSE_H__
11 #if defined(BOTAN_TARGET_CPU_HAS_SSE2)
13 #include <botan/cpuid.h>
14 #include <emmintrin.h>
23 SIMD_SSE2(
const u32bit B[4])
25 reg = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
30 reg = _mm_set_epi32(B0, B1, B2, B3);
35 reg = _mm_set1_epi32(B);
38 static SIMD_SSE2
load_le(
const void* in)
40 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(in));
43 static SIMD_SSE2
load_be(
const void* in)
50 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), reg);
55 bswap().store_le(out);
60 reg = _mm_or_si128(_mm_slli_epi32(reg, static_cast<int>(rot)),
61 _mm_srli_epi32(reg, static_cast<int>(32-rot)));
71 reg = _mm_add_epi32(reg, other.reg);
74 SIMD_SSE2
operator+(
const SIMD_SSE2& other)
const
76 return _mm_add_epi32(reg, other.reg);
79 void operator-=(
const SIMD_SSE2& other)
81 reg = _mm_sub_epi32(reg, other.reg);
84 SIMD_SSE2
operator-(
const SIMD_SSE2& other)
const
86 return _mm_sub_epi32(reg, other.reg);
89 void operator^=(
const SIMD_SSE2& other)
91 reg = _mm_xor_si128(reg, other.reg);
94 SIMD_SSE2
operator^(
const SIMD_SSE2& other)
const
96 return _mm_xor_si128(reg, other.reg);
99 void operator|=(
const SIMD_SSE2& other)
101 reg = _mm_or_si128(reg, other.reg);
104 SIMD_SSE2 operator&(
const SIMD_SSE2& other)
106 return _mm_and_si128(reg, other.reg);
109 void operator&=(
const SIMD_SSE2& other)
111 reg = _mm_and_si128(reg, other.reg);
116 return _mm_slli_epi32(reg, static_cast<int>(shift));
121 return _mm_srli_epi32(reg, static_cast<int>(shift));
124 SIMD_SSE2 operator~()
const
126 return _mm_xor_si128(reg, _mm_set1_epi32(0xFFFFFFFF));
130 SIMD_SSE2 andc(
const SIMD_SSE2& other)
132 return _mm_andnot_si128(reg, other.reg);
135 SIMD_SSE2 bswap()
const
139 T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
140 T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
142 return _mm_or_si128(_mm_srli_epi16(T, 8),
143 _mm_slli_epi16(T, 8));
146 static void transpose(SIMD_SSE2& B0, SIMD_SSE2& B1,
147 SIMD_SSE2& B2, SIMD_SSE2& B3)
149 __m128i T0 = _mm_unpacklo_epi32(B0.reg, B1.reg);
150 __m128i T1 = _mm_unpacklo_epi32(B2.reg, B3.reg);
151 __m128i T2 = _mm_unpackhi_epi32(B0.reg, B1.reg);
152 __m128i T3 = _mm_unpackhi_epi32(B2.reg, B3.reg);
153 B0.reg = _mm_unpacklo_epi64(T0, T1);
154 B1.reg = _mm_unpackhi_epi64(T0, T1);
155 B2.reg = _mm_unpacklo_epi64(T2, T3);
156 B3.reg = _mm_unpackhi_epi64(T2, T3);
160 SIMD_SSE2(__m128i in) { reg = in; }
T load_be(const byte in[], size_t off)
T load_le(const byte in[], size_t off)
void store_le(u16bit in, byte out[2])
T rotate_left(T input, size_t rot)
int operator>>(int fd, Pipe &pipe)
OctetString operator^(const OctetString &k1, const OctetString &k2)
int operator<<(int fd, Pipe &pipe)
OctetString operator+(const OctetString &k1, const OctetString &k2)
T rotate_right(T input, size_t rot)
MemoryRegion< T > & operator+=(MemoryRegion< T > &out, const MemoryRegion< T > &in)
void store_be(u16bit in, byte out[2])
BigInt operator-(const BigInt &x, const BigInt &y)