2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
17 #include "include/kernel_functions.c"
18 #include "types_ocl.c"
22 #define COMPARE_S "check_single_vect1_comp4_bs.c"
23 #define COMPARE_M "check_multi_vect1_comp4_bs.c"
27 #define COMPARE_S "check_single_vect2_comp4_bs.c"
28 #define COMPARE_M "check_multi_vect2_comp4_bs.c"
32 #define COMPARE_S "check_single_vect4_comp4_bs.c"
33 #define COMPARE_M "check_multi_vect4_comp4_bs.c"
39 * Bitslice DES S-boxes making use of a vector conditional select operation
40 * (e.g., vsel on PowerPC with AltiVec).
42 * Gate counts: 36 33 33 26 35 34 34 32
45 * Several same-gate-count expressions for each S-box are included (for use on
46 * different CPUs/GPUs).
48 * These Boolean expressions corresponding to DES S-boxes have been generated
49 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
50 * John the Ripper password cracker: http://www.openwall.com/john/
51 * Being mathematical formulas, they are not copyrighted and are free for reuse
54 * This file (a specific representation of the S-box expressions, surrounding
55 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
56 * Redistribution and use in source and binary forms, with or without
57 * modification, are permitted. (This is a heavily cut-down "BSD license".)
59 * The effort has been sponsored by Rapid7: http://www.rapid7.com
62 #define vnot(dst, a) (dst) = ~(a)
63 #define vand(dst, a, b) (dst) = (a) & (b)
64 #define vor(dst, a, b) (dst) = (a) | (b)
65 #define vandn(dst, a, b) (dst) = (a) & ~(b)
66 #define vxor(dst, a, b) (dst) = (a) ^ (b)
67 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
70 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
71 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
73 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
75 u32 x55AFD1B7, x3C3C69C3, x6993B874;
76 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
77 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
78 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
79 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
80 u32 x0DBCE883, x3A25A215, x37994A96;
81 u32 x8A487EA7, x8B480F07, xB96C2D16;
84 vsel(x0F0F3333, a3, a2, a5);
85 vxor(x3C3C3C3C, a2, a3);
86 vor(x55FF55FF, a1, a4);
87 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
88 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
89 vxor(x09FCB7C0, a4, x0903B73F);
90 vxor(x5CA9E295, a1, x09FCB7C0);
92 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
93 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
94 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
96 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
97 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
98 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
99 vxor(x529E962D, x0F0F3333, x5D91A51E);
101 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
102 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
103 vsel(x428679F3, a5, x4B8771A3, x529E962D);
104 vxor(x6B68D433, x29EEADC0, x428679F3);
106 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
107 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
108 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
109 vnot(x94D83B6C, x6B27C493);
110 vsel(x0, x94D83B6C, x6B68D433, a6);
111 vxor(*out1, *out1, x0);
113 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
114 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
115 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
116 vxor(xD6E19C32, x529E962D, x847F0A1F);
117 vsel(x1, xD6E19C32, x5CA9E295, a6);
118 vxor(*out2, *out2, x1);
120 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
121 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
122 vxor(x37994A96, x0DBCE883, x3A25A215);
123 vsel(x3, x37994A96, x529E962D, a6);
124 vxor(*out4, *out4, x3);
126 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
127 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
128 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
129 vsel(x2, xB96C2D16, x6993B874, a6);
130 vxor(*out3, *out3, x2);
134 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
135 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
137 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
138 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
139 u32 x0F5AF03C, x6600FF56, x87A5F09C;
140 u32 xA55A963C, x3C69C30F, xB44BC32D;
141 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
142 u32 xB46C662D, x278DB412, xB66CB43B;
143 u32 xD2DC4E52, x27993333, xD2994E33;
144 u32 x278D0F2D, x2E0E547B, x09976748;
147 vsel(x55553333, a1, a3, a6);
148 vsel(x0055FF33, a6, x55553333, a5);
149 vsel(x33270F03, a3, a4, x0055FF33);
150 vxor(x66725A56, a1, x33270F03);
151 vxor(x00FFFF00, a5, a6);
152 vxor(x668DA556, x66725A56, x00FFFF00);
154 vsel(x0F0F5A56, a4, x66725A56, a6);
155 vnot(xF0F0A5A9, x0F0F5A56);
156 vxor(xA5A5969A, x55553333, xF0F0A5A9);
157 vxor(xA55A699A, x00FFFF00, xA5A5969A);
158 vsel(x1, xA55A699A, x668DA556, a2);
159 vxor(*out2, *out2, x1);
161 vxor(x0F5AF03C, a4, x0055FF33);
162 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
163 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
165 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
166 vxor(x3C69C30F, a3, x0F5AF03C);
167 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
169 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
170 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
171 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
172 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
173 vsel(x0, x996C66D2, xB44BC32D, a2);
174 vxor(*out1, *out1, x0);
176 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
177 vsel(x278DB412, x668DA556, xA5A5969A, a1);
178 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
180 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
181 vsel(x27993333, x278DB412, a3, x0055FF33);
182 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
183 vsel(x3, x87A5F09C, xD2994E33, a2);
184 vxor(*out4, *out4, x3);
186 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
187 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
188 vxor(x09976748, x27993333, x2E0E547B);
189 vsel(x2, xB66CB43B, x09976748, a2);
190 vxor(*out3, *out3, x2);
194 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
195 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
197 u32 x0F330F33, x0F33F0CC, x5A66A599;
198 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
199 u32 x556BA09E, x665A93AC, x99A56C53;
200 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
201 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
202 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
203 u32 xD6599874, x05330022, xD2699876;
204 u32 x665F9364, xD573F0F2, xB32C6396;
207 vsel(x0F330F33, a4, a3, a5);
208 vxor(x0F33F0CC, a6, x0F330F33);
209 vxor(x5A66A599, a2, x0F33F0CC);
211 vsel(x2111B7BB, a3, a6, x5A66A599);
212 vsel(x03FF3033, a5, a3, x0F33F0CC);
213 vsel(x05BB50EE, a5, x0F33F0CC, a2);
214 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
215 vxor(x265E97A4, x2111B7BB, x074F201F);
217 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
218 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
219 vnot(x99A56C53, x665A93AC);
220 vsel(x1, x265E97A4, x99A56C53, a1);
221 vxor(*out2, *out2, x1);
223 vxor(x25A1A797, x03FF3033, x265E97A4);
224 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
225 vsel(x66559355, x665A93AC, a2, a5);
226 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
228 vxor(x9A5A5C60, x03FF3033, x99A56C53);
229 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
230 vxor(x87698DB4, x5713754C, xD07AF8F8);
231 vxor(xE13C1EE1, x66559355, x87698DB4);
233 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
234 vsel(x655B905E, x66559355, x05BB50EE, a4);
235 vsel(x00A55CFF, a5, a6, x9A5A5C60);
236 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
237 vsel(x0, x9E49915E, xE13C1EE1, a1);
238 vxor(*out1, *out1, x0);
240 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
241 vand(x05330022, x0F330F33, x05BB50EE);
242 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
243 vsel(x3, x5A66A599, xD2699876, a1);
244 vxor(*out4, *out4, x3);
246 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
247 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
248 vxor(xB32C6396, x665F9364, xD573F0F2);
249 vsel(x2, xB32C6396, x47B135C6, a1);
250 vxor(*out3, *out3, x2);
254 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
255 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
257 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
258 x0AF50F0F, x4CA36B59;
260 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
262 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
263 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
266 vsel(x0505AFAF, a5, a3, a1);
267 vsel(x0555AF55, x0505AFAF, a1, a4);
268 vxor(x0A5AA05A, a3, x0555AF55);
269 vsel(x46566456, a1, x0A5AA05A, a2);
270 vsel(x0A0A5F5F, a3, a5, a1);
271 vxor(x0AF55FA0, a4, x0A0A5F5F);
272 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
273 vxor(x4CA36B59, x46566456, x0AF50F0F);
275 vnot(xB35C94A6, x4CA36B59);
277 vsel(x01BB23BB, a4, a2, x0555AF55);
278 vxor(x5050FAFA, a1, x0505AFAF);
279 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
280 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
282 vnot(x56E9861E, xA91679E1);
284 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
285 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
286 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
287 vxor(xD2946D9A, x50E9FA1E, x827D9784);
288 vsel(x2, xD2946D9A, x4CA36B59, a6);
289 vxor(*out3, *out3, x2);
290 vsel(x3, xB35C94A6, xD2946D9A, a6);
291 vxor(*out4, *out4, x3);
293 vsel(x31F720B3, a2, a4, x0AF55FA0);
294 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
295 vxor(x4712A7AD, x56E9861E, x11FB21B3);
296 vxor(x9586CA37, xD2946D9A, x4712A7AD);
297 vsel(x0, x56E9861E, x9586CA37, a6);
298 vxor(*out1, *out1, x0);
299 vsel(x1, x9586CA37, xA91679E1, a6);
300 vxor(*out2, *out2, x1);
304 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
305 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
307 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
308 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
309 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
310 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
311 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
312 u32 x12E6283D, x9E47D3D4, x1A676AB4;
313 u32 x891556DF, xE5E77F82, x6CF2295D;
314 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
317 vsel(x550F550F, a1, a3, a5);
318 vnot(xAAF0AAF0, x550F550F);
319 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
320 vxor(x96C696C6, a2, xA5F5A5F5);
321 vxor(x00FFFF00, a5, a6);
322 vxor(x963969C6, x96C696C6, x00FFFF00);
324 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
325 vsel(xB73121F7, a2, x963969C6, x96C696C6);
326 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
327 vsel(x00558A5F, x1501DF0F, a5, a1);
328 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
330 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
331 vsel(x045157FD, a6, a1, x0679ED42);
332 vsel(xB32077FF, xB73121F7, a6, x045157FD);
333 vxor(x9D49D39C, x2E69A463, xB32077FF);
334 vsel(x2, x9D49D39C, x2E69A463, a4);
335 vxor(*out3, *out3, x2);
337 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
338 vsel(xF72577AF, xB32077FF, x550F550F, a1);
339 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
340 vsel(x1, x5BA4B81D, x963969C6, a4);
341 vxor(*out2, *out2, x1);
343 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
344 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
345 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
346 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
348 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
349 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
350 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
352 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
353 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
354 vxor(x6CF2295D, x891556DF, xE5E77F82);
355 vsel(x3, x1A35669A, x6CF2295D, a4);
356 vxor(*out4, *out4, x3);
358 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
359 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
360 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
361 vsel(x0, x369CC1D6, x1A676AB4, a4);
362 vxor(*out1, *out1, x0);
366 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
367 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
369 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
370 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
371 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
372 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
373 u32 x142956AB, x455D45DF, x1C3EE619;
374 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
375 u32 x840DBB67, x6DA19C1E, x925E63E1;
376 u32 x9C3CA761, x257A75D5, xB946D2B4;
379 vsel(x555500FF, a1, a4, a5);
380 vxor(x666633CC, a2, x555500FF);
381 vsel(x606F30CF, x666633CC, a4, a3);
382 vxor(x353A659A, a1, x606F30CF);
383 vxor(x353A9A65, a5, x353A659A);
384 vnot(xCAC5659A, x353A9A65);
386 vsel(x353A6565, x353A659A, x353A9A65, a4);
387 vsel(x0A3F0A6F, a3, a4, x353A6565);
388 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
389 vxor(x5963A3C6, x353A9A65, x6C5939A3);
391 vsel(x35FF659A, a4, x353A659A, x353A6565);
392 vxor(x3AF06A95, a3, x35FF659A);
393 vsel(x05CF0A9F, a4, a3, x353A9A65);
394 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
396 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
397 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
398 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
399 vsel(x0, xCAC5659A, x942D9A67, a6);
400 vxor(*out1, *out1, x0);
402 vsel(x142956AB, x353A659A, x942D9A67, a2);
403 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
404 vxor(x1C3EE619, x5963A3C6, x455D45DF);
405 vsel(x3, x5963A3C6, x1C3EE619, a6);
406 vxor(*out4, *out4, x3);
408 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
409 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
410 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
411 vxor(x69A49C79, x555500FF, x3CF19C86);
413 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
414 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
415 vnot(x925E63E1, x6DA19C1E);
416 vsel(x1, x925E63E1, x69A49C79, a6);
417 vxor(*out2, *out2, x1);
419 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
420 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
421 vxor(xB946D2B4, x9C3CA761, x257A75D5);
422 vsel(x2, x16E94A97, xB946D2B4, a6);
423 vxor(*out3, *out3, x2);
427 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
428 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
430 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
431 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
432 u32 x738F9C63, x11EF9867, x26DA9867;
433 u32 x4B4B9C63, x4B666663, x4E639396;
434 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
435 u32 xD728827B, x6698807B, x699C585B;
436 u32 x738C847B, xA4A71E18, x74878E78;
437 u32 x333D9639, x74879639, x8B7869C6;
440 vsel(x44447777, a2, a6, a3);
441 vxor(x4B4B7878, a4, x44447777);
442 vsel(x22772277, a3, a5, a2);
443 vsel(x0505F5F5, a6, a2, a4);
444 vsel(x220522F5, x22772277, x0505F5F5, a5);
445 vxor(x694E5A8D, x4B4B7878, x220522F5);
447 vxor(x00FFFF00, a5, a6);
448 vxor(x66666666, a2, a3);
449 vsel(x32353235, a3, x220522F5, a4);
450 vsel(x26253636, x66666666, x32353235, x4B4B7878);
451 vxor(x26DAC936, x00FFFF00, x26253636);
452 vsel(x0, x26DAC936, x694E5A8D, a1);
453 vxor(*out1, *out1, x0);
455 vxor(x738F9C63, a2, x26DAC936);
456 vsel(x11EF9867, x738F9C63, a5, x66666666);
457 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
459 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
460 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
461 vxor(x4E639396, x0505F5F5, x4B666663);
463 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
465 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
466 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
467 vsel(x1, xB14EE41D, x26DA9867, a1);
468 vxor(*out2, *out2, x1);
470 vxor(xD728827B, x66666666, xB14EE41D);
471 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
472 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
473 vsel(x2, x699C585B, x4E639396, a1);
474 vxor(*out3, *out3, x2);
476 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
477 vxor(xA4A71E18, x738F9C63, xD728827B);
478 vsel(x74878E78, x738C847B, xA4A71E18, a4);
480 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
481 vsel(x74879639, x74878E78, x333D9639, a6);
482 vnot(x8B7869C6, x74879639);
483 vsel(x3, x74878E78, x8B7869C6, a1);
484 vxor(*out4, *out4, x3);
488 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
489 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
491 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
492 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
493 u32 x3001F74E, x30555745, x693CD926;
494 u32 x0C0CD926, x0C3F25E9, x38D696A5;
496 u32 x03D2117B, xC778395B, xCB471CB2;
497 u32 x5425B13F, x56B3803F, x919AE965;
498 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
501 vsel(x0505F5F5, a5, a1, a3);
502 vxor(x05FAF50A, a4, x0505F5F5);
503 vsel(x0F0F00FF, a3, a4, a5);
504 vsel(x22227777, a2, a5, a1);
505 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
506 vxor(x34E9B34C, a2, x07DA807F);
508 vsel(x00FFF00F, x05FAF50A, a4, a3);
509 vsel(x0033FCCF, a5, x00FFF00F, a2);
510 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
511 vsel(x0C0C3F3F, a3, a5, a2);
512 vxor(x59698E63, x5565B15C, x0C0C3F3F);
514 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
515 vsel(x30555745, x3001F74E, a1, x00FFF00F);
516 vxor(x693CD926, x59698E63, x30555745);
517 vsel(x2, x693CD926, x59698E63, a6);
518 vxor(*out3, *out3, x2);
520 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
521 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
522 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
524 vnot(xC729695A, x38D696A5);
526 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
527 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
528 vxor(xCB471CB2, x0C3F25E9, xC778395B);
529 vsel(x1, xCB471CB2, x34E9B34C, a6);
530 vxor(*out2, *out2, x1);
532 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
533 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
534 vxor(x919AE965, xC729695A, x56B3803F);
535 vsel(x3, xC729695A, x919AE965, a6);
536 vxor(*out4, *out4, x3);
538 vsel(x17B3023F, x07DA807F, a2, x59698E63);
539 vor(x75555755, a1, x30555745);
540 vxor(x62E6556A, x17B3023F, x75555755);
541 vxor(xA59E6C31, xC778395B, x62E6556A);
542 vsel(x0, xA59E6C31, x38D696A5, a6);
543 vxor(*out1, *out1, x0);
546 #define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; }
582 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
583 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
584 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
585 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
586 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
587 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
588 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
589 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
590 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
591 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
592 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
593 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
594 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
595 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
596 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
597 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
599 static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
601 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
602 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
603 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
604 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
605 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
606 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
607 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
608 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
611 for (u32 i = 0; i < 16; i++)
615 case 0: KEYSET00; break;
616 case 1: KEYSET01; break;
617 case 2: KEYSET02; break;
618 case 3: KEYSET03; break;
619 case 4: KEYSET04; break;
620 case 5: KEYSET05; break;
621 case 6: KEYSET06; break;
622 case 7: KEYSET07; break;
623 case 8: KEYSET10; break;
624 case 9: KEYSET11; break;
625 case 10: KEYSET12; break;
626 case 11: KEYSET13; break;
627 case 12: KEYSET14; break;
628 case 13: KEYSET15; break;
629 case 14: KEYSET16; break;
630 case 15: KEYSET17; break;
633 s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30);
634 s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17);
635 s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
636 s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
637 s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02);
638 s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18);
639 s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
640 s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
646 static void transpose32c (u32 data[32])
648 #define swap(x,y,j,m) \
649 t = ((x) ^ ((y) >> (j))) & (m); \
651 (y) = (y) ^ (t << (j));
655 swap (data[ 0], data[16], 16, 0x0000ffff);
656 swap (data[ 1], data[17], 16, 0x0000ffff);
657 swap (data[ 2], data[18], 16, 0x0000ffff);
658 swap (data[ 3], data[19], 16, 0x0000ffff);
659 swap (data[ 4], data[20], 16, 0x0000ffff);
660 swap (data[ 5], data[21], 16, 0x0000ffff);
661 swap (data[ 6], data[22], 16, 0x0000ffff);
662 swap (data[ 7], data[23], 16, 0x0000ffff);
663 swap (data[ 8], data[24], 16, 0x0000ffff);
664 swap (data[ 9], data[25], 16, 0x0000ffff);
665 swap (data[10], data[26], 16, 0x0000ffff);
666 swap (data[11], data[27], 16, 0x0000ffff);
667 swap (data[12], data[28], 16, 0x0000ffff);
668 swap (data[13], data[29], 16, 0x0000ffff);
669 swap (data[14], data[30], 16, 0x0000ffff);
670 swap (data[15], data[31], 16, 0x0000ffff);
671 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
672 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
673 swap (data[ 2], data[10], 8, 0x00ff00ff);
674 swap (data[ 3], data[11], 8, 0x00ff00ff);
675 swap (data[ 4], data[12], 8, 0x00ff00ff);
676 swap (data[ 5], data[13], 8, 0x00ff00ff);
677 swap (data[ 6], data[14], 8, 0x00ff00ff);
678 swap (data[ 7], data[15], 8, 0x00ff00ff);
679 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
680 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
681 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
682 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
683 swap (data[ 0], data[ 2], 2, 0x33333333);
684 swap (data[ 1], data[ 3], 2, 0x33333333);
685 swap (data[ 0], data[ 1], 1, 0x55555555);
686 swap (data[ 2], data[ 3], 1, 0x55555555);
687 swap (data[ 4], data[ 6], 2, 0x33333333);
688 swap (data[ 5], data[ 7], 2, 0x33333333);
689 swap (data[ 4], data[ 5], 1, 0x55555555);
690 swap (data[ 6], data[ 7], 1, 0x55555555);
691 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
692 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
693 swap (data[10], data[14], 4, 0x0f0f0f0f);
694 swap (data[11], data[15], 4, 0x0f0f0f0f);
695 swap (data[ 8], data[10], 2, 0x33333333);
696 swap (data[ 9], data[11], 2, 0x33333333);
697 swap (data[ 8], data[ 9], 1, 0x55555555);
698 swap (data[10], data[11], 1, 0x55555555);
699 swap (data[12], data[14], 2, 0x33333333);
700 swap (data[13], data[15], 2, 0x33333333);
701 swap (data[12], data[13], 1, 0x55555555);
702 swap (data[14], data[15], 1, 0x55555555);
703 swap (data[16], data[24], 8, 0x00ff00ff);
704 swap (data[17], data[25], 8, 0x00ff00ff);
705 swap (data[18], data[26], 8, 0x00ff00ff);
706 swap (data[19], data[27], 8, 0x00ff00ff);
707 swap (data[20], data[28], 8, 0x00ff00ff);
708 swap (data[21], data[29], 8, 0x00ff00ff);
709 swap (data[22], data[30], 8, 0x00ff00ff);
710 swap (data[23], data[31], 8, 0x00ff00ff);
711 swap (data[16], data[20], 4, 0x0f0f0f0f);
712 swap (data[17], data[21], 4, 0x0f0f0f0f);
713 swap (data[18], data[22], 4, 0x0f0f0f0f);
714 swap (data[19], data[23], 4, 0x0f0f0f0f);
715 swap (data[16], data[18], 2, 0x33333333);
716 swap (data[17], data[19], 2, 0x33333333);
717 swap (data[16], data[17], 1, 0x55555555);
718 swap (data[18], data[19], 1, 0x55555555);
719 swap (data[20], data[22], 2, 0x33333333);
720 swap (data[21], data[23], 2, 0x33333333);
721 swap (data[20], data[21], 1, 0x55555555);
722 swap (data[22], data[23], 1, 0x55555555);
723 swap (data[24], data[28], 4, 0x0f0f0f0f);
724 swap (data[25], data[29], 4, 0x0f0f0f0f);
725 swap (data[26], data[30], 4, 0x0f0f0f0f);
726 swap (data[27], data[31], 4, 0x0f0f0f0f);
727 swap (data[24], data[26], 2, 0x33333333);
728 swap (data[25], data[27], 2, 0x33333333);
729 swap (data[24], data[25], 1, 0x55555555);
730 swap (data[26], data[27], 1, 0x55555555);
731 swap (data[28], data[30], 2, 0x33333333);
732 swap (data[29], data[31], 2, 0x33333333);
733 swap (data[28], data[29], 1, 0x55555555);
734 swap (data[30], data[31], 1, 0x55555555);
737 static void m03000m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
743 const u32 gid = get_global_id (0);
744 const u32 lid = get_local_id (0);
750 const u32 K00 = pws[gid].i[ 0];
751 const u32 K01 = pws[gid].i[ 1];
752 const u32 K02 = pws[gid].i[ 2];
753 const u32 K03 = pws[gid].i[ 3];
754 const u32 K04 = pws[gid].i[ 4];
755 const u32 K05 = pws[gid].i[ 5];
756 const u32 K06 = pws[gid].i[ 6];
757 const u32 K07 = pws[gid].i[ 7];
758 const u32 K08 = pws[gid].i[ 8];
759 const u32 K09 = pws[gid].i[ 9];
760 const u32 K10 = pws[gid].i[10];
761 const u32 K11 = pws[gid].i[11];
762 const u32 K12 = pws[gid].i[12];
763 const u32 K13 = pws[gid].i[13];
764 const u32 K14 = pws[gid].i[14];
765 const u32 K15 = pws[gid].i[15];
766 const u32 K16 = pws[gid].i[16];
767 const u32 K17 = pws[gid].i[17];
768 const u32 K18 = pws[gid].i[18];
769 const u32 K19 = pws[gid].i[19];
770 const u32 K20 = pws[gid].i[20];
771 const u32 K21 = pws[gid].i[21];
772 const u32 K22 = pws[gid].i[22];
773 const u32 K23 = pws[gid].i[23];
774 const u32 K24 = pws[gid].i[24];
775 const u32 K25 = pws[gid].i[25];
776 const u32 K26 = pws[gid].i[26];
777 const u32 K27 = pws[gid].i[27];
778 const u32 K28 = pws[gid].i[28];
779 const u32 K29 = pws[gid].i[29];
780 const u32 K30 = pws[gid].i[30];
781 const u32 K31 = pws[gid].i[31];
782 const u32 K32 = pws[gid].i[32];
783 const u32 K33 = pws[gid].i[33];
784 const u32 K34 = pws[gid].i[34];
785 const u32 K35 = pws[gid].i[35];
786 const u32 K36 = pws[gid].i[36];
787 const u32 K37 = pws[gid].i[37];
788 const u32 K38 = pws[gid].i[38];
789 const u32 K39 = pws[gid].i[39];
790 const u32 K40 = pws[gid].i[40];
791 const u32 K41 = pws[gid].i[41];
792 const u32 K42 = pws[gid].i[42];
793 const u32 K43 = pws[gid].i[43];
794 const u32 K44 = pws[gid].i[44];
795 const u32 K45 = pws[gid].i[45];
796 const u32 K46 = pws[gid].i[46];
797 const u32 K47 = pws[gid].i[47];
798 const u32 K48 = pws[gid].i[48];
799 const u32 K49 = pws[gid].i[49];
800 const u32 K50 = pws[gid].i[50];
801 const u32 K51 = pws[gid].i[51];
802 const u32 K52 = pws[gid].i[52];
803 const u32 K53 = pws[gid].i[53];
804 const u32 K54 = pws[gid].i[54];
805 const u32 K55 = pws[gid].i[55];
807 const u32 bfs_cnt = bfs_cnt;
809 const u32 pc_pos = get_local_id (1);
811 const u32 il_pos = pc_pos * 32;
846 k00 |= words_buf_r[pc_pos].b[ 0];
847 k01 |= words_buf_r[pc_pos].b[ 1];
848 k02 |= words_buf_r[pc_pos].b[ 2];
849 k03 |= words_buf_r[pc_pos].b[ 3];
850 k04 |= words_buf_r[pc_pos].b[ 4];
851 k05 |= words_buf_r[pc_pos].b[ 5];
852 k06 |= words_buf_r[pc_pos].b[ 6];
853 k07 |= words_buf_r[pc_pos].b[ 7];
854 k08 |= words_buf_r[pc_pos].b[ 8];
855 k09 |= words_buf_r[pc_pos].b[ 9];
856 k10 |= words_buf_r[pc_pos].b[10];
857 k11 |= words_buf_r[pc_pos].b[11];
858 k12 |= words_buf_r[pc_pos].b[12];
859 k13 |= words_buf_r[pc_pos].b[13];
860 k14 |= words_buf_r[pc_pos].b[14];
861 k15 |= words_buf_r[pc_pos].b[15];
862 k16 |= words_buf_r[pc_pos].b[16];
863 k17 |= words_buf_r[pc_pos].b[17];
864 k18 |= words_buf_r[pc_pos].b[18];
865 k19 |= words_buf_r[pc_pos].b[19];
866 k20 |= words_buf_r[pc_pos].b[20];
867 k21 |= words_buf_r[pc_pos].b[21];
868 k22 |= words_buf_r[pc_pos].b[22];
869 k23 |= words_buf_r[pc_pos].b[23];
870 k24 |= words_buf_r[pc_pos].b[24];
871 k25 |= words_buf_r[pc_pos].b[25];
872 k26 |= words_buf_r[pc_pos].b[26];
873 k27 |= words_buf_r[pc_pos].b[27];
874 k28 |= words_buf_r[pc_pos].b[28];
875 k29 |= words_buf_r[pc_pos].b[29];
876 k30 |= words_buf_r[pc_pos].b[30];
877 k31 |= words_buf_r[pc_pos].b[31];
879 // KGS!@#$% including IP
884 u32 D03 = 0xffffffff;
886 u32 D05 = 0xffffffff;
887 u32 D06 = 0xffffffff;
888 u32 D07 = 0xffffffff;
894 u32 D13 = 0xffffffff;
897 u32 D16 = 0xffffffff;
898 u32 D17 = 0xffffffff;
903 u32 D22 = 0xffffffff;
905 u32 D24 = 0xffffffff;
907 u32 D26 = 0xffffffff;
909 u32 D28 = 0xffffffff;
910 u32 D29 = 0xffffffff;
911 u32 D30 = 0xffffffff;
912 u32 D31 = 0xffffffff;
921 u32 D40 = 0xffffffff;
922 u32 D41 = 0xffffffff;
923 u32 D42 = 0xffffffff;
925 u32 D44 = 0xffffffff;
936 u32 D55 = 0xffffffff;
939 u32 D58 = 0xffffffff;
942 u32 D61 = 0xffffffff;
943 u32 D62 = 0xffffffff;
944 u32 D63 = 0xffffffff;
948 k00, k01, k02, k03, k04, k05, k06,
949 k07, k08, k09, k10, k11, k12, k13,
950 k14, k15, k16, k17, k18, k19, k20,
951 k21, k22, k23, k24, k25, k26, k27,
952 k28, k29, k30, k31, K32, K33, K34,
953 K35, K36, K37, K38, K39, K40, K41,
954 K42, K43, K44, K45, K46, K47, K48,
955 K49, K50, K51, K52, K53, K54, K55,
956 D00, D01, D02, D03, D04, D05, D06, D07,
957 D08, D09, D10, D11, D12, D13, D14, D15,
958 D16, D17, D18, D19, D20, D21, D22, D23,
959 D24, D25, D26, D27, D28, D29, D30, D31,
960 D32, D33, D34, D35, D36, D37, D38, D39,
961 D40, D41, D42, D43, D44, D45, D46, D47,
962 D48, D49, D50, D51, D52, D53, D54, D55,
963 D56, D57, D58, D59, D60, D61, D62, D63
1033 if (digests_cnt < 16)
1035 for (u32 d = 0; d < digests_cnt; d++)
1037 const u32 final_hash_pos = digests_offset + d;
1039 if (hashes_shown[final_hash_pos]) continue;
1043 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1044 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1049 for (int i = 0; i < 32; i++)
1051 const u32 b0 = -((search[0] >> i) & 1);
1052 const u32 b1 = -((search[1] >> i) & 1);
1054 tmpResult |= out[ 0 + i] ^ b0;
1055 tmpResult |= out[32 + i] ^ b1;
1058 if (tmpResult == 0xffffffff) continue;
1060 const u32 slice = 31 - clz (~tmpResult);
1062 const u32 r0 = search[0];
1063 const u32 r1 = search[1];
1076 for (int i = 0; i < 32; i++)
1078 out0[i] = out[ 0 + 31 - i];
1079 out1[i] = out[32 + 31 - i];
1082 transpose32c (out0);
1083 transpose32c (out1);
1086 for (int slice = 0; slice < 32; slice++)
1088 const u32 r0 = out0[31 - slice];
1089 const u32 r1 = out1[31 - slice];
1098 static void m03000s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1104 const u32 gid = get_global_id (0);
1105 const u32 lid = get_local_id (0);
1180 const u32 K00 = pws[gid].i[ 0];
1181 const u32 K01 = pws[gid].i[ 1];
1182 const u32 K02 = pws[gid].i[ 2];
1183 const u32 K03 = pws[gid].i[ 3];
1184 const u32 K04 = pws[gid].i[ 4];
1185 const u32 K05 = pws[gid].i[ 5];
1186 const u32 K06 = pws[gid].i[ 6];
1187 const u32 K07 = pws[gid].i[ 7];
1188 const u32 K08 = pws[gid].i[ 8];
1189 const u32 K09 = pws[gid].i[ 9];
1190 const u32 K10 = pws[gid].i[10];
1191 const u32 K11 = pws[gid].i[11];
1192 const u32 K12 = pws[gid].i[12];
1193 const u32 K13 = pws[gid].i[13];
1194 const u32 K14 = pws[gid].i[14];
1195 const u32 K15 = pws[gid].i[15];
1196 const u32 K16 = pws[gid].i[16];
1197 const u32 K17 = pws[gid].i[17];
1198 const u32 K18 = pws[gid].i[18];
1199 const u32 K19 = pws[gid].i[19];
1200 const u32 K20 = pws[gid].i[20];
1201 const u32 K21 = pws[gid].i[21];
1202 const u32 K22 = pws[gid].i[22];
1203 const u32 K23 = pws[gid].i[23];
1204 const u32 K24 = pws[gid].i[24];
1205 const u32 K25 = pws[gid].i[25];
1206 const u32 K26 = pws[gid].i[26];
1207 const u32 K27 = pws[gid].i[27];
1208 const u32 K28 = pws[gid].i[28];
1209 const u32 K29 = pws[gid].i[29];
1210 const u32 K30 = pws[gid].i[30];
1211 const u32 K31 = pws[gid].i[31];
1212 const u32 K32 = pws[gid].i[32];
1213 const u32 K33 = pws[gid].i[33];
1214 const u32 K34 = pws[gid].i[34];
1215 const u32 K35 = pws[gid].i[35];
1216 const u32 K36 = pws[gid].i[36];
1217 const u32 K37 = pws[gid].i[37];
1218 const u32 K38 = pws[gid].i[38];
1219 const u32 K39 = pws[gid].i[39];
1220 const u32 K40 = pws[gid].i[40];
1221 const u32 K41 = pws[gid].i[41];
1222 const u32 K42 = pws[gid].i[42];
1223 const u32 K43 = pws[gid].i[43];
1224 const u32 K44 = pws[gid].i[44];
1225 const u32 K45 = pws[gid].i[45];
1226 const u32 K46 = pws[gid].i[46];
1227 const u32 K47 = pws[gid].i[47];
1228 const u32 K48 = pws[gid].i[48];
1229 const u32 K49 = pws[gid].i[49];
1230 const u32 K50 = pws[gid].i[50];
1231 const u32 K51 = pws[gid].i[51];
1232 const u32 K52 = pws[gid].i[52];
1233 const u32 K53 = pws[gid].i[53];
1234 const u32 K54 = pws[gid].i[54];
1235 const u32 K55 = pws[gid].i[55];
1237 const u32 pc_pos = get_local_id (1);
1239 const u32 il_pos = pc_pos * 32;
1274 k00 |= words_buf_r[pc_pos].b[ 0];
1275 k01 |= words_buf_r[pc_pos].b[ 1];
1276 k02 |= words_buf_r[pc_pos].b[ 2];
1277 k03 |= words_buf_r[pc_pos].b[ 3];
1278 k04 |= words_buf_r[pc_pos].b[ 4];
1279 k05 |= words_buf_r[pc_pos].b[ 5];
1280 k06 |= words_buf_r[pc_pos].b[ 6];
1281 k07 |= words_buf_r[pc_pos].b[ 7];
1282 k08 |= words_buf_r[pc_pos].b[ 8];
1283 k09 |= words_buf_r[pc_pos].b[ 9];
1284 k10 |= words_buf_r[pc_pos].b[10];
1285 k11 |= words_buf_r[pc_pos].b[11];
1286 k12 |= words_buf_r[pc_pos].b[12];
1287 k13 |= words_buf_r[pc_pos].b[13];
1288 k14 |= words_buf_r[pc_pos].b[14];
1289 k15 |= words_buf_r[pc_pos].b[15];
1290 k16 |= words_buf_r[pc_pos].b[16];
1291 k17 |= words_buf_r[pc_pos].b[17];
1292 k18 |= words_buf_r[pc_pos].b[18];
1293 k19 |= words_buf_r[pc_pos].b[19];
1294 k20 |= words_buf_r[pc_pos].b[20];
1295 k21 |= words_buf_r[pc_pos].b[21];
1296 k22 |= words_buf_r[pc_pos].b[22];
1297 k23 |= words_buf_r[pc_pos].b[23];
1298 k24 |= words_buf_r[pc_pos].b[24];
1299 k25 |= words_buf_r[pc_pos].b[25];
1300 k26 |= words_buf_r[pc_pos].b[26];
1301 k27 |= words_buf_r[pc_pos].b[27];
1302 k28 |= words_buf_r[pc_pos].b[28];
1303 k29 |= words_buf_r[pc_pos].b[29];
1304 k30 |= words_buf_r[pc_pos].b[30];
1305 k31 |= words_buf_r[pc_pos].b[31];
1307 // KGS!@#$% including IP
1312 u32 D03 = 0xffffffff;
1314 u32 D05 = 0xffffffff;
1315 u32 D06 = 0xffffffff;
1316 u32 D07 = 0xffffffff;
1322 u32 D13 = 0xffffffff;
1325 u32 D16 = 0xffffffff;
1326 u32 D17 = 0xffffffff;
1331 u32 D22 = 0xffffffff;
1333 u32 D24 = 0xffffffff;
1335 u32 D26 = 0xffffffff;
1337 u32 D28 = 0xffffffff;
1338 u32 D29 = 0xffffffff;
1339 u32 D30 = 0xffffffff;
1340 u32 D31 = 0xffffffff;
1349 u32 D40 = 0xffffffff;
1350 u32 D41 = 0xffffffff;
1351 u32 D42 = 0xffffffff;
1353 u32 D44 = 0xffffffff;
1364 u32 D55 = 0xffffffff;
1367 u32 D58 = 0xffffffff;
1370 u32 D61 = 0xffffffff;
1371 u32 D62 = 0xffffffff;
1372 u32 D63 = 0xffffffff;
1376 k00, k01, k02, k03, k04, k05, k06,
1377 k07, k08, k09, k10, k11, k12, k13,
1378 k14, k15, k16, k17, k18, k19, k20,
1379 k21, k22, k23, k24, k25, k26, k27,
1380 k28, k29, k30, k31, K32, K33, K34,
1381 K35, K36, K37, K38, K39, K40, K41,
1382 K42, K43, K44, K45, K46, K47, K48,
1383 K49, K50, K51, K52, K53, K54, K55,
1384 D00, D01, D02, D03, D04, D05, D06, D07,
1385 D08, D09, D10, D11, D12, D13, D14, D15,
1386 D16, D17, D18, D19, D20, D21, D22, D23,
1387 D24, D25, D26, D27, D28, D29, D30, D31,
1388 D32, D33, D34, D35, D36, D37, D38, D39,
1389 D40, D41, D42, D43, D44, D45, D46, D47,
1390 D48, D49, D50, D51, D52, D53, D54, D55,
1391 D56, D57, D58, D59, D60, D61, D62, D63
1396 tmpResult |= D00 ^ S00;
1397 tmpResult |= D01 ^ S01;
1398 tmpResult |= D02 ^ S02;
1399 tmpResult |= D03 ^ S03;
1400 tmpResult |= D04 ^ S04;
1401 tmpResult |= D05 ^ S05;
1402 tmpResult |= D06 ^ S06;
1403 tmpResult |= D07 ^ S07;
1404 tmpResult |= D08 ^ S08;
1405 tmpResult |= D09 ^ S09;
1406 tmpResult |= D10 ^ S10;
1407 tmpResult |= D11 ^ S11;
1408 tmpResult |= D12 ^ S12;
1409 tmpResult |= D13 ^ S13;
1410 tmpResult |= D14 ^ S14;
1411 tmpResult |= D15 ^ S15;
1413 if (tmpResult == 0xffffffff) return;
1415 tmpResult |= D16 ^ S16;
1416 tmpResult |= D17 ^ S17;
1417 tmpResult |= D18 ^ S18;
1418 tmpResult |= D19 ^ S19;
1419 tmpResult |= D20 ^ S20;
1420 tmpResult |= D21 ^ S21;
1421 tmpResult |= D22 ^ S22;
1422 tmpResult |= D23 ^ S23;
1423 tmpResult |= D24 ^ S24;
1424 tmpResult |= D25 ^ S25;
1425 tmpResult |= D26 ^ S26;
1426 tmpResult |= D27 ^ S27;
1427 tmpResult |= D28 ^ S28;
1428 tmpResult |= D29 ^ S29;
1429 tmpResult |= D30 ^ S30;
1430 tmpResult |= D31 ^ S31;
1432 if (tmpResult == 0xffffffff) return;
1434 tmpResult |= D32 ^ S32;
1435 tmpResult |= D33 ^ S33;
1436 tmpResult |= D34 ^ S34;
1437 tmpResult |= D35 ^ S35;
1438 tmpResult |= D36 ^ S36;
1439 tmpResult |= D37 ^ S37;
1440 tmpResult |= D38 ^ S38;
1441 tmpResult |= D39 ^ S39;
1442 tmpResult |= D40 ^ S40;
1443 tmpResult |= D41 ^ S41;
1444 tmpResult |= D42 ^ S42;
1445 tmpResult |= D43 ^ S43;
1446 tmpResult |= D44 ^ S44;
1447 tmpResult |= D45 ^ S45;
1448 tmpResult |= D46 ^ S46;
1449 tmpResult |= D47 ^ S47;
1451 if (tmpResult == 0xffffffff) return;
1453 tmpResult |= D48 ^ S48;
1454 tmpResult |= D49 ^ S49;
1455 tmpResult |= D50 ^ S50;
1456 tmpResult |= D51 ^ S51;
1457 tmpResult |= D52 ^ S52;
1458 tmpResult |= D53 ^ S53;
1459 tmpResult |= D54 ^ S54;
1460 tmpResult |= D55 ^ S55;
1461 tmpResult |= D56 ^ S56;
1462 tmpResult |= D57 ^ S57;
1463 tmpResult |= D58 ^ S58;
1464 tmpResult |= D59 ^ S59;
1465 tmpResult |= D60 ^ S60;
1466 tmpResult |= D61 ^ S61;
1467 tmpResult |= D62 ^ S62;
1468 tmpResult |= D63 ^ S63;
1470 if (tmpResult == 0xffffffff) return;
1472 const u32 slice = 31 - clz (~tmpResult);
1478 // transpose bitslice base : easy because no overlapping buffers
1479 // mod : attention race conditions, need different buffers for *in and *out
1482 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_tb (__global pw_t *pws)
1484 const u32 gid = get_global_id (0);
1486 const u32 w0s = pws[gid].i[0];
1487 const u32 w1s = pws[gid].i[1];
1490 for (int i = 0; i < 32; i += 8)
1492 pws[gid].i[i + 0 + 0] = -((w0s >> (i + 7)) & 1);
1493 pws[gid].i[i + 0 + 1] = -((w0s >> (i + 6)) & 1);
1494 pws[gid].i[i + 0 + 2] = -((w0s >> (i + 5)) & 1);
1495 pws[gid].i[i + 0 + 3] = -((w0s >> (i + 4)) & 1);
1496 pws[gid].i[i + 0 + 4] = -((w0s >> (i + 3)) & 1);
1497 pws[gid].i[i + 0 + 5] = -((w0s >> (i + 2)) & 1);
1498 pws[gid].i[i + 0 + 6] = -((w0s >> (i + 1)) & 1);
1499 pws[gid].i[i + 0 + 7] = -((w0s >> (i + 0)) & 1);
1503 for (int i = 0; i < 24; i += 8)
1505 pws[gid].i[i + 32 + 0] = -((w1s >> (i + 7)) & 1);
1506 pws[gid].i[i + 32 + 1] = -((w1s >> (i + 6)) & 1);
1507 pws[gid].i[i + 32 + 2] = -((w1s >> (i + 5)) & 1);
1508 pws[gid].i[i + 32 + 3] = -((w1s >> (i + 4)) & 1);
1509 pws[gid].i[i + 32 + 4] = -((w1s >> (i + 3)) & 1);
1510 pws[gid].i[i + 32 + 5] = -((w1s >> (i + 2)) & 1);
1511 pws[gid].i[i + 32 + 6] = -((w1s >> (i + 1)) & 1);
1512 pws[gid].i[i + 32 + 7] = -((w1s >> (i + 0)) & 1);
1516 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
1518 const u32 gid = get_global_id (0);
1520 const u32 block = gid / 32;
1521 const u32 slice = gid % 32;
1523 const u32 w0 = mod[gid];
1526 for (int i = 0; i < 32; i += 8)
1528 atomic_or (&words_buf_r[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
1529 atomic_or (&words_buf_r[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
1530 atomic_or (&words_buf_r[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
1531 atomic_or (&words_buf_r[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
1532 atomic_or (&words_buf_r[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
1533 atomic_or (&words_buf_r[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
1534 atomic_or (&words_buf_r[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
1535 atomic_or (&words_buf_r[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
1539 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1545 const u32 gid = get_global_id (0);
1546 const u32 lid = get_local_id (0);
1547 const u32 vid = get_local_id (1);
1549 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1550 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1552 __local u32 s_S[64];
1556 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1560 s_S[32 + vid] = -((s1 >> vid) & 1);
1563 barrier (CLK_LOCAL_MEM_FENCE);
1565 if (gid >= gid_max) return;
1571 m03000m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1574 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1578 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1582 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1588 const u32 gid = get_global_id (0);
1589 const u32 lid = get_local_id (0);
1590 const u32 vid = get_local_id (1);
1592 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1593 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1595 __local u32 s_S[64];
1599 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1603 s_S[32 + vid] = -((s1 >> vid) & 1);
1606 barrier (CLK_LOCAL_MEM_FENCE);
1608 if (gid >= gid_max) return;
1614 m03000s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1617 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1621 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)