Botan  1.10.9
turing.cpp
Go to the documentation of this file.
1 /*
2 * Turing
3 * (C) 1999-2008 Jack Lloyd
4 *
5 * Distributed under the terms of the Botan license
6 */
7 
8 #include <botan/turing.h>
9 #include <botan/loadstor.h>
10 #include <botan/rotate.h>
11 #include <botan/internal/xor_buf.h>
12 
13 namespace Botan {
14 
15 namespace {
16 
17 /*
18 * Perform an N-way PHT
19 */
20 inline void PHT(MemoryRegion<u32bit>& B)
21  {
22  u32bit sum = 0;
23  for(size_t i = 0; i < B.size() - 1; ++i)
24  sum += B[i];
25 
26  B[B.size()-1] += sum;
27 
28  sum = B[B.size()-1];
29  for(size_t i = 0; i < B.size() - 1; ++i)
30  B[i] += sum;
31  }
32 
33 }
34 
35 /*
36 * Combine cipher stream with message
37 */
38 void Turing::cipher(const byte in[], byte out[], size_t length)
39  {
40  while(length >= buffer.size() - position)
41  {
42  xor_buf(out, in, &buffer[position], buffer.size() - position);
43  length -= (buffer.size() - position);
44  in += (buffer.size() - position);
45  out += (buffer.size() - position);
46  generate();
47  }
48  xor_buf(out, in, &buffer[position], length);
49  position += length;
50  }
51 
52 /*
53 * Generate cipher stream
54 */
55 void Turing::generate()
56  {
57  // Table for Turing's polynomial multiplication
58  static const u32bit MULT_TAB[256] = {
59  0x00000000, 0xD02B4367, 0xED5686CE, 0x3D7DC5A9, 0x97AC41D1, 0x478702B6,
60  0x7AFAC71F, 0xAAD18478, 0x631582EF, 0xB33EC188, 0x8E430421, 0x5E684746,
61  0xF4B9C33E, 0x24928059, 0x19EF45F0, 0xC9C40697, 0xC62A4993, 0x16010AF4,
62  0x2B7CCF5D, 0xFB578C3A, 0x51860842, 0x81AD4B25, 0xBCD08E8C, 0x6CFBCDEB,
63  0xA53FCB7C, 0x7514881B, 0x48694DB2, 0x98420ED5, 0x32938AAD, 0xE2B8C9CA,
64  0xDFC50C63, 0x0FEE4F04, 0xC154926B, 0x117FD10C, 0x2C0214A5, 0xFC2957C2,
65  0x56F8D3BA, 0x86D390DD, 0xBBAE5574, 0x6B851613, 0xA2411084, 0x726A53E3,
66  0x4F17964A, 0x9F3CD52D, 0x35ED5155, 0xE5C61232, 0xD8BBD79B, 0x089094FC,
67  0x077EDBF8, 0xD755989F, 0xEA285D36, 0x3A031E51, 0x90D29A29, 0x40F9D94E,
68  0x7D841CE7, 0xADAF5F80, 0x646B5917, 0xB4401A70, 0x893DDFD9, 0x59169CBE,
69  0xF3C718C6, 0x23EC5BA1, 0x1E919E08, 0xCEBADD6F, 0xCFA869D6, 0x1F832AB1,
70  0x22FEEF18, 0xF2D5AC7F, 0x58042807, 0x882F6B60, 0xB552AEC9, 0x6579EDAE,
71  0xACBDEB39, 0x7C96A85E, 0x41EB6DF7, 0x91C02E90, 0x3B11AAE8, 0xEB3AE98F,
72  0xD6472C26, 0x066C6F41, 0x09822045, 0xD9A96322, 0xE4D4A68B, 0x34FFE5EC,
73  0x9E2E6194, 0x4E0522F3, 0x7378E75A, 0xA353A43D, 0x6A97A2AA, 0xBABCE1CD,
74  0x87C12464, 0x57EA6703, 0xFD3BE37B, 0x2D10A01C, 0x106D65B5, 0xC04626D2,
75  0x0EFCFBBD, 0xDED7B8DA, 0xE3AA7D73, 0x33813E14, 0x9950BA6C, 0x497BF90B,
76  0x74063CA2, 0xA42D7FC5, 0x6DE97952, 0xBDC23A35, 0x80BFFF9C, 0x5094BCFB,
77  0xFA453883, 0x2A6E7BE4, 0x1713BE4D, 0xC738FD2A, 0xC8D6B22E, 0x18FDF149,
78  0x258034E0, 0xF5AB7787, 0x5F7AF3FF, 0x8F51B098, 0xB22C7531, 0x62073656,
79  0xABC330C1, 0x7BE873A6, 0x4695B60F, 0x96BEF568, 0x3C6F7110, 0xEC443277,
80  0xD139F7DE, 0x0112B4B9, 0xD31DD2E1, 0x03369186, 0x3E4B542F, 0xEE601748,
81  0x44B19330, 0x949AD057, 0xA9E715FE, 0x79CC5699, 0xB008500E, 0x60231369,
82  0x5D5ED6C0, 0x8D7595A7, 0x27A411DF, 0xF78F52B8, 0xCAF29711, 0x1AD9D476,
83  0x15379B72, 0xC51CD815, 0xF8611DBC, 0x284A5EDB, 0x829BDAA3, 0x52B099C4,
84  0x6FCD5C6D, 0xBFE61F0A, 0x7622199D, 0xA6095AFA, 0x9B749F53, 0x4B5FDC34,
85  0xE18E584C, 0x31A51B2B, 0x0CD8DE82, 0xDCF39DE5, 0x1249408A, 0xC26203ED,
86  0xFF1FC644, 0x2F348523, 0x85E5015B, 0x55CE423C, 0x68B38795, 0xB898C4F2,
87  0x715CC265, 0xA1778102, 0x9C0A44AB, 0x4C2107CC, 0xE6F083B4, 0x36DBC0D3,
88  0x0BA6057A, 0xDB8D461D, 0xD4630919, 0x04484A7E, 0x39358FD7, 0xE91ECCB0,
89  0x43CF48C8, 0x93E40BAF, 0xAE99CE06, 0x7EB28D61, 0xB7768BF6, 0x675DC891,
90  0x5A200D38, 0x8A0B4E5F, 0x20DACA27, 0xF0F18940, 0xCD8C4CE9, 0x1DA70F8E,
91  0x1CB5BB37, 0xCC9EF850, 0xF1E33DF9, 0x21C87E9E, 0x8B19FAE6, 0x5B32B981,
92  0x664F7C28, 0xB6643F4F, 0x7FA039D8, 0xAF8B7ABF, 0x92F6BF16, 0x42DDFC71,
93  0xE80C7809, 0x38273B6E, 0x055AFEC7, 0xD571BDA0, 0xDA9FF2A4, 0x0AB4B1C3,
94  0x37C9746A, 0xE7E2370D, 0x4D33B375, 0x9D18F012, 0xA06535BB, 0x704E76DC,
95  0xB98A704B, 0x69A1332C, 0x54DCF685, 0x84F7B5E2, 0x2E26319A, 0xFE0D72FD,
96  0xC370B754, 0x135BF433, 0xDDE1295C, 0x0DCA6A3B, 0x30B7AF92, 0xE09CECF5,
97  0x4A4D688D, 0x9A662BEA, 0xA71BEE43, 0x7730AD24, 0xBEF4ABB3, 0x6EDFE8D4,
98  0x53A22D7D, 0x83896E1A, 0x2958EA62, 0xF973A905, 0xC40E6CAC, 0x14252FCB,
99  0x1BCB60CF, 0xCBE023A8, 0xF69DE601, 0x26B6A566, 0x8C67211E, 0x5C4C6279,
100  0x6131A7D0, 0xB11AE4B7, 0x78DEE220, 0xA8F5A147, 0x958864EE, 0x45A32789,
101  0xEF72A3F1, 0x3F59E096, 0x0224253F, 0xD20F6658 };
102 
103  /*
104  I tried an implementation without precomputed LFSR offsets, since
105  I thought that might allow (especially on x86-64) the use of leal to
106  compute all the offsets.. However on my Core2 with GCC 4.3 it
107  turned out significantly slower (238 Mib/s, versus 300 Mib/s
108  with precomputed offsets)
109 
110  I also tried using byte vs u32bit for the offset variable (since
111  x86 memory addressing modes can be odd), but it made things even
112  slower (186 Mib/s)
113  */
114  static const byte OFFSETS[221] = {
115  0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 15, 16,
116  5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 2, 3, 4,
117  10, 11, 12, 13, 14, 15, 16, 0, 1, 5, 7, 8, 9,
118  15, 16, 0, 1, 2, 3, 4, 5, 6, 10, 12, 13, 14,
119  3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 0, 1, 2,
120  8, 9, 10, 11, 12, 13, 14, 15, 16, 3, 5, 6, 7,
121  13, 14, 15, 16, 0, 1, 2, 3, 4, 8, 10, 11, 12,
122  1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 15, 16, 0,
123  6, 7, 8, 9, 10, 11, 12, 13, 14, 1, 3, 4, 5,
124  11, 12, 13, 14, 15, 16, 0, 1, 2, 6, 8, 9, 10,
125  16, 0, 1, 2, 3, 4, 5, 6, 7, 11, 13, 14, 15,
126  4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 1, 2, 3,
127  9, 10, 11, 12, 13, 14, 15, 16, 0, 4, 6, 7, 8,
128  14, 15, 16, 0, 1, 2, 3, 4, 5, 9, 11, 12, 13,
129  2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 16, 0, 1,
130  7, 8, 9, 10, 11, 12, 13, 14, 15, 2, 4, 5, 6,
131  12, 13, 14, 15, 16, 0, 1, 2, 3, 7, 9, 10, 11 };
132 
133  for(size_t i = 0; i != 17; ++i)
134  {
135  const byte* R_off = OFFSETS + 13*i;
136 
137  u32bit R0 = R[R_off[0]];
138  u32bit R1 = R[R_off[1]];
139  u32bit R2 = R[R_off[2]];
140  u32bit R3 = R[R_off[3]];
141  u32bit R4 = R[R_off[4]];
142 
143  const u32bit R5 = R[R_off[5]];
144  const u32bit R6 = R[R_off[6]];
145  const u32bit R7 = R[R_off[7]];
146  const u32bit R8 = R[R_off[8]];
147  const u32bit R9 = R[R_off[9]];
148  const u32bit R10 = R[R_off[10]];
149  const u32bit R11 = R[R_off[11]];
150  const u32bit R12 = R[R_off[12]];
151 
152  R[R_off[0]] = R0 = ((R0 << 8) ^ MULT_TAB[(R0 >> 24) & 0xFF]) ^ R11 ^ R4;
153 
154  u32bit A = R0;
155  u32bit B = R10;
156  u32bit C = R7;
157  u32bit D = R2;
158  u32bit E = R1;
159 
160  E += A + B + C + D;
161 
162  A += E;
163  B += E;
164  C += E;
165  D += E;
166 
167  A = S0[get_byte(0, A)] ^ S1[get_byte(1, A)] ^
168  S2[get_byte(2, A)] ^ S3[get_byte(3, A)];
169  B = S0[get_byte(1, B)] ^ S1[get_byte(2, B)] ^
170  S2[get_byte(3, B)] ^ S3[get_byte(0, B)];
171  C = S0[get_byte(2, C)] ^ S1[get_byte(3, C)] ^
172  S2[get_byte(0, C)] ^ S3[get_byte(1, C)];
173  D = S0[get_byte(3, D)] ^ S1[get_byte(0, D)] ^
174  S2[get_byte(1, D)] ^ S3[get_byte(2, D)];
175  E = S0[get_byte(0, E)] ^ S1[get_byte(1, E)] ^
176  S2[get_byte(2, E)] ^ S3[get_byte(3, E)];
177 
178  E += A + B + C + D;
179 
180  A += E;
181  B += E;
182  C += E;
183  D += E;
184 
185  R[R_off[1]] = R1 = ((R1 << 8) ^ MULT_TAB[(R1 >> 24) & 0xFF]) ^ R12 ^ R5;
186  R[R_off[2]] = R2 = ((R2 << 8) ^ MULT_TAB[(R2 >> 24) & 0xFF]) ^ R0 ^ R6;
187  R[R_off[3]] = ((R3 << 8) ^ MULT_TAB[(R3 >> 24) & 0xFF]) ^ R1 ^ R7;
188 
189  E += R4;
190 
191  R[R_off[4]] = ((R4 << 8) ^ MULT_TAB[(R4 >> 24) & 0xFF]) ^ R2 ^ R8;
192 
193  A += R1;
194  B += R12;
195  C += R9;
196  D += R5;
197 
198  store_be(A, &buffer[20*i + 0]);
199  store_be(B, &buffer[20*i + 4]);
200  store_be(C, &buffer[20*i + 8]);
201  store_be(D, &buffer[20*i + 12]);
202  store_be(E, &buffer[20*i + 16]);
203  }
204 
205  position = 0;
206  }
207 
208 /*
209 * Turing's byte mixing step
210 */
211 u32bit Turing::fixedS(u32bit W)
212  {
213  byte B = SBOX[get_byte(0, W)];
214  W ^= Q_BOX[B];
215  W &= 0x00FFFFFF;
216  W |= B << 24;
217 
218  B = SBOX[get_byte(1, W)];
219  W ^= rotate_left(Q_BOX[B], 8);
220  W &= 0xFF00FFFF;
221  W |= B << 16;
222 
223  B = SBOX[get_byte(2, W)];
224  W ^= rotate_left(Q_BOX[B], 16);
225  W &= 0xFFFF00FF;
226  W |= B << 8;
227 
228  B = SBOX[get_byte(3, W)];
229  W ^= rotate_left(Q_BOX[B], 24);
230  W &= 0xFFFFFF00;
231  W |= B;
232 
233  return W;
234  }
235 
236 /*
237 * Turing Key Schedule
238 */
239 void Turing::key_schedule(const byte key[], size_t length)
240  {
241  K.resize(length / 4);
242  for(size_t i = 0; i != length; ++i)
243  K[i/4] = (K[i/4] << 8) + key[i];
244 
245  for(size_t i = 0; i != K.size(); ++i)
246  K[i] = fixedS(K[i]);
247 
248  PHT(K);
249 
250  for(u32bit i = 0; i != 256; ++i)
251  {
252  u32bit W0 = 0, C0 = i;
253  u32bit W1 = 0, C1 = i;
254  u32bit W2 = 0, C2 = i;
255  u32bit W3 = 0, C3 = i;
256 
257  for(size_t j = 0; j < K.size(); ++j)
258  {
259  C0 = SBOX[get_byte(0, K[j]) ^ C0];
260  C1 = SBOX[get_byte(1, K[j]) ^ C1];
261  C2 = SBOX[get_byte(2, K[j]) ^ C2];
262  C3 = SBOX[get_byte(3, K[j]) ^ C3];
263 
264  W0 ^= rotate_left(Q_BOX[C0], j);
265  W1 ^= rotate_left(Q_BOX[C1], j + 8);
266  W2 ^= rotate_left(Q_BOX[C2], j + 16);
267  W3 ^= rotate_left(Q_BOX[C3], j + 24);
268  }
269 
270  S0[i] = (W0 & 0x00FFFFFF) | (C0 << 24);
271  S1[i] = (W1 & 0xFF00FFFF) | (C1 << 16);
272  S2[i] = (W2 & 0xFFFF00FF) | (C2 << 8);
273  S3[i] = (W3 & 0xFFFFFF00) | C3;
274  }
275 
276  set_iv(0, 0);
277  }
278 
279 /*
280 * Resynchronization
281 */
282 void Turing::set_iv(const byte iv[], size_t length)
283  {
284  if(!valid_iv_length(length))
285  throw Invalid_IV_Length(name(), length);
286 
287  SecureVector<u32bit> IV(length / 4);
288  for(size_t i = 0; i != length; ++i)
289  IV[i/4] = (IV[i/4] << 8) + iv[i];
290 
291  for(size_t i = 0; i != IV.size(); ++i)
292  R[i] = IV[i] = fixedS(IV[i]);
293 
294  for(size_t i = 0; i != K.size(); ++i)
295  R[i+IV.size()] = K[i];
296 
297  R[K.size() + IV.size()] = (0x010203 << 8) | (K.size() << 4) | IV.size();
298 
299  for(size_t i = K.size() + IV.size() + 1; i != 17; ++i)
300  {
301  const u32bit W = R[i-K.size()-IV.size()-1] + R[i-1];
302  R[i] = S0[get_byte(0, W)] ^ S1[get_byte(1, W)] ^
303  S2[get_byte(2, W)] ^ S3[get_byte(3, W)];
304  }
305 
306  PHT(R);
307 
308  generate();
309  }
310 
311 /*
312 * Clear memory of sensitive data
313 */
315  {
316  zeroise(S0);
317  zeroise(S1);
318  zeroise(S2);
319  zeroise(S3);
320 
321  zeroise(buffer);
322  position = 0;
323  }
324 
325 }
void resize(size_t n)
Definition: secmem.h:211
void set_iv(const byte iv[], size_t iv_length)
Definition: turing.cpp:282
#define R9
Definition: asm_x86_64.h:63
#define R11
Definition: asm_x86_64.h:66
#define R6
Definition: asm_x86_64.h:59
T rotate_left(T input, size_t rot)
Definition: rotate.h:21
byte get_byte(size_t byte_num, T input)
Definition: get_byte.h:21
void cipher(const byte in[], byte out[], size_t length)
Definition: turing.cpp:38
#define R5
Definition: asm_x86_64.h:58
#define R0
Definition: asm_x86_64.h:51
unsigned char byte
Definition: types.h:22
#define R10
Definition: asm_x86_64.h:65
#define R2
Definition: asm_x86_64.h:53
void clear()
Definition: turing.cpp:314
size_t size() const
Definition: secmem.h:29
#define R1
Definition: asm_x86_64.h:52
#define R7
Definition: asm_x86_64.h:61
bool valid_iv_length(size_t iv_len) const
Definition: turing.h:24
#define R8
Definition: asm_x86_64.h:62
void store_be(u16bit in, byte out[2])
Definition: loadstor.h:412
#define R4
Definition: asm_x86_64.h:57
#define R12
Definition: asm_x86_64.h:67
#define R3
Definition: asm_x86_64.h:55
std::string name() const
Definition: turing.h:33
void xor_buf(byte out[], const byte in[], size_t length)
Definition: xor_buf.h:21
void zeroise(MemoryRegion< T > &vec)
Definition: secmem.h:415
unsigned int u32bit
Definition: types.h:32