2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
17 #include "include/kernel_functions.c"
18 #include "types_ocl.c"
22 #define COMPARE_S "check_single_vect1_comp4_bs.c"
23 #define COMPARE_M "check_multi_vect1_comp4_bs.c"
27 #define COMPARE_S "check_single_vect2_comp4_bs.c"
28 #define COMPARE_M "check_multi_vect2_comp4_bs.c"
32 #define COMPARE_S "check_single_vect4_comp4_bs.c"
33 #define COMPARE_M "check_multi_vect4_comp4_bs.c"
36 #define KXX_DECL volatile
37 #define sXXX_DECL volatile
39 //#define myselx(a,b,c) ((c) ? (b) : (a))
40 //#define myselx(a,b,c) ((b & c) | (a & ~c))
41 #define myselx(a,b,c) bitselect ((a), (b), (c))
44 * Bitslice DES S-boxes making use of a vector conditional select operation
45 * (e.g., vsel on PowerPC with AltiVec).
47 * Gate counts: 36 33 33 26 35 34 34 32
50 * Several same-gate-count expressions for each S-box are included (for use on
51 * different CPUs/GPUs).
53 * These Boolean expressions corresponding to DES S-boxes have been generated
54 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
55 * John the Ripper password cracker: http://www.openwall.com/john/
56 * Being mathematical formulas, they are not copyrighted and are free for reuse
59 * This file (a specific representation of the S-box expressions, surrounding
60 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted. (This is a heavily cut-down "BSD license".)
64 * The effort has been sponsored by Rapid7: http://www.rapid7.com
67 #define vnot(dst, a) (dst) = ~(a)
68 #define vand(dst, a, b) (dst) = (a) & (b)
69 #define vor(dst, a, b) (dst) = (a) | (b)
70 #define vandn(dst, a, b) (dst) = (a) & ~(b)
71 #define vxor(dst, a, b) (dst) = (a) ^ (b)
72 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
75 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
76 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
78 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
80 u32 x55AFD1B7, x3C3C69C3, x6993B874;
81 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
82 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
83 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
84 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
85 u32 x0DBCE883, x3A25A215, x37994A96;
86 u32 x8A487EA7, x8B480F07, xB96C2D16;
89 vsel(x0F0F3333, a3, a2, a5);
90 vxor(x3C3C3C3C, a2, a3);
91 vor(x55FF55FF, a1, a4);
92 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
93 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
94 vxor(x09FCB7C0, a4, x0903B73F);
95 vxor(x5CA9E295, a1, x09FCB7C0);
97 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
98 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
99 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
101 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
102 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
103 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
104 vxor(x529E962D, x0F0F3333, x5D91A51E);
106 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
107 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
108 vsel(x428679F3, a5, x4B8771A3, x529E962D);
109 vxor(x6B68D433, x29EEADC0, x428679F3);
111 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
112 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
113 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
114 vnot(x94D83B6C, x6B27C493);
115 vsel(x0, x94D83B6C, x6B68D433, a6);
116 vxor(*out1, *out1, x0);
118 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
119 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
120 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
121 vxor(xD6E19C32, x529E962D, x847F0A1F);
122 vsel(x1, xD6E19C32, x5CA9E295, a6);
123 vxor(*out2, *out2, x1);
125 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
126 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
127 vxor(x37994A96, x0DBCE883, x3A25A215);
128 vsel(x3, x37994A96, x529E962D, a6);
129 vxor(*out4, *out4, x3);
131 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
132 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
133 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
134 vsel(x2, xB96C2D16, x6993B874, a6);
135 vxor(*out3, *out3, x2);
139 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
140 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
142 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
143 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
144 u32 x0F5AF03C, x6600FF56, x87A5F09C;
145 u32 xA55A963C, x3C69C30F, xB44BC32D;
146 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
147 u32 xB46C662D, x278DB412, xB66CB43B;
148 u32 xD2DC4E52, x27993333, xD2994E33;
149 u32 x278D0F2D, x2E0E547B, x09976748;
152 vsel(x55553333, a1, a3, a6);
153 vsel(x0055FF33, a6, x55553333, a5);
154 vsel(x33270F03, a3, a4, x0055FF33);
155 vxor(x66725A56, a1, x33270F03);
156 vxor(x00FFFF00, a5, a6);
157 vxor(x668DA556, x66725A56, x00FFFF00);
159 vsel(x0F0F5A56, a4, x66725A56, a6);
160 vnot(xF0F0A5A9, x0F0F5A56);
161 vxor(xA5A5969A, x55553333, xF0F0A5A9);
162 vxor(xA55A699A, x00FFFF00, xA5A5969A);
163 vsel(x1, xA55A699A, x668DA556, a2);
164 vxor(*out2, *out2, x1);
166 vxor(x0F5AF03C, a4, x0055FF33);
167 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
168 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
170 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
171 vxor(x3C69C30F, a3, x0F5AF03C);
172 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
174 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
175 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
176 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
177 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
178 vsel(x0, x996C66D2, xB44BC32D, a2);
179 vxor(*out1, *out1, x0);
181 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
182 vsel(x278DB412, x668DA556, xA5A5969A, a1);
183 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
185 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
186 vsel(x27993333, x278DB412, a3, x0055FF33);
187 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
188 vsel(x3, x87A5F09C, xD2994E33, a2);
189 vxor(*out4, *out4, x3);
191 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
192 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
193 vxor(x09976748, x27993333, x2E0E547B);
194 vsel(x2, xB66CB43B, x09976748, a2);
195 vxor(*out3, *out3, x2);
199 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
200 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
202 u32 x0F330F33, x0F33F0CC, x5A66A599;
203 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
204 u32 x556BA09E, x665A93AC, x99A56C53;
205 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
206 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
207 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
208 u32 xD6599874, x05330022, xD2699876;
209 u32 x665F9364, xD573F0F2, xB32C6396;
212 vsel(x0F330F33, a4, a3, a5);
213 vxor(x0F33F0CC, a6, x0F330F33);
214 vxor(x5A66A599, a2, x0F33F0CC);
216 vsel(x2111B7BB, a3, a6, x5A66A599);
217 vsel(x03FF3033, a5, a3, x0F33F0CC);
218 vsel(x05BB50EE, a5, x0F33F0CC, a2);
219 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
220 vxor(x265E97A4, x2111B7BB, x074F201F);
222 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
223 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
224 vnot(x99A56C53, x665A93AC);
225 vsel(x1, x265E97A4, x99A56C53, a1);
226 vxor(*out2, *out2, x1);
228 vxor(x25A1A797, x03FF3033, x265E97A4);
229 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
230 vsel(x66559355, x665A93AC, a2, a5);
231 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
233 vxor(x9A5A5C60, x03FF3033, x99A56C53);
234 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
235 vxor(x87698DB4, x5713754C, xD07AF8F8);
236 vxor(xE13C1EE1, x66559355, x87698DB4);
238 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
239 vsel(x655B905E, x66559355, x05BB50EE, a4);
240 vsel(x00A55CFF, a5, a6, x9A5A5C60);
241 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
242 vsel(x0, x9E49915E, xE13C1EE1, a1);
243 vxor(*out1, *out1, x0);
245 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
246 vand(x05330022, x0F330F33, x05BB50EE);
247 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
248 vsel(x3, x5A66A599, xD2699876, a1);
249 vxor(*out4, *out4, x3);
251 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
252 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
253 vxor(xB32C6396, x665F9364, xD573F0F2);
254 vsel(x2, xB32C6396, x47B135C6, a1);
255 vxor(*out3, *out3, x2);
259 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
260 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
262 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
263 x0AF50F0F, x4CA36B59;
265 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
267 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
268 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
271 vsel(x0505AFAF, a5, a3, a1);
272 vsel(x0555AF55, x0505AFAF, a1, a4);
273 vxor(x0A5AA05A, a3, x0555AF55);
274 vsel(x46566456, a1, x0A5AA05A, a2);
275 vsel(x0A0A5F5F, a3, a5, a1);
276 vxor(x0AF55FA0, a4, x0A0A5F5F);
277 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
278 vxor(x4CA36B59, x46566456, x0AF50F0F);
280 vnot(xB35C94A6, x4CA36B59);
282 vsel(x01BB23BB, a4, a2, x0555AF55);
283 vxor(x5050FAFA, a1, x0505AFAF);
284 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
285 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
287 vnot(x56E9861E, xA91679E1);
289 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
290 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
291 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
292 vxor(xD2946D9A, x50E9FA1E, x827D9784);
293 vsel(x2, xD2946D9A, x4CA36B59, a6);
294 vxor(*out3, *out3, x2);
295 vsel(x3, xB35C94A6, xD2946D9A, a6);
296 vxor(*out4, *out4, x3);
298 vsel(x31F720B3, a2, a4, x0AF55FA0);
299 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
300 vxor(x4712A7AD, x56E9861E, x11FB21B3);
301 vxor(x9586CA37, xD2946D9A, x4712A7AD);
302 vsel(x0, x56E9861E, x9586CA37, a6);
303 vxor(*out1, *out1, x0);
304 vsel(x1, x9586CA37, xA91679E1, a6);
305 vxor(*out2, *out2, x1);
309 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
310 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
312 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
313 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
314 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
315 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
316 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
317 u32 x12E6283D, x9E47D3D4, x1A676AB4;
318 u32 x891556DF, xE5E77F82, x6CF2295D;
319 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
322 vsel(x550F550F, a1, a3, a5);
323 vnot(xAAF0AAF0, x550F550F);
324 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
325 vxor(x96C696C6, a2, xA5F5A5F5);
326 vxor(x00FFFF00, a5, a6);
327 vxor(x963969C6, x96C696C6, x00FFFF00);
329 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
330 vsel(xB73121F7, a2, x963969C6, x96C696C6);
331 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
332 vsel(x00558A5F, x1501DF0F, a5, a1);
333 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
335 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
336 vsel(x045157FD, a6, a1, x0679ED42);
337 vsel(xB32077FF, xB73121F7, a6, x045157FD);
338 vxor(x9D49D39C, x2E69A463, xB32077FF);
339 vsel(x2, x9D49D39C, x2E69A463, a4);
340 vxor(*out3, *out3, x2);
342 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
343 vsel(xF72577AF, xB32077FF, x550F550F, a1);
344 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
345 vsel(x1, x5BA4B81D, x963969C6, a4);
346 vxor(*out2, *out2, x1);
348 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
349 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
350 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
351 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
353 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
354 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
355 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
357 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
358 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
359 vxor(x6CF2295D, x891556DF, xE5E77F82);
360 vsel(x3, x1A35669A, x6CF2295D, a4);
361 vxor(*out4, *out4, x3);
363 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
364 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
365 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
366 vsel(x0, x369CC1D6, x1A676AB4, a4);
367 vxor(*out1, *out1, x0);
371 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
372 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
374 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
375 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
376 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
377 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
378 u32 x142956AB, x455D45DF, x1C3EE619;
379 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
380 u32 x840DBB67, x6DA19C1E, x925E63E1;
381 u32 x9C3CA761, x257A75D5, xB946D2B4;
384 vsel(x555500FF, a1, a4, a5);
385 vxor(x666633CC, a2, x555500FF);
386 vsel(x606F30CF, x666633CC, a4, a3);
387 vxor(x353A659A, a1, x606F30CF);
388 vxor(x353A9A65, a5, x353A659A);
389 vnot(xCAC5659A, x353A9A65);
391 vsel(x353A6565, x353A659A, x353A9A65, a4);
392 vsel(x0A3F0A6F, a3, a4, x353A6565);
393 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
394 vxor(x5963A3C6, x353A9A65, x6C5939A3);
396 vsel(x35FF659A, a4, x353A659A, x353A6565);
397 vxor(x3AF06A95, a3, x35FF659A);
398 vsel(x05CF0A9F, a4, a3, x353A9A65);
399 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
401 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
402 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
403 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
404 vsel(x0, xCAC5659A, x942D9A67, a6);
405 vxor(*out1, *out1, x0);
407 vsel(x142956AB, x353A659A, x942D9A67, a2);
408 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
409 vxor(x1C3EE619, x5963A3C6, x455D45DF);
410 vsel(x3, x5963A3C6, x1C3EE619, a6);
411 vxor(*out4, *out4, x3);
413 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
414 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
415 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
416 vxor(x69A49C79, x555500FF, x3CF19C86);
418 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
419 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
420 vnot(x925E63E1, x6DA19C1E);
421 vsel(x1, x925E63E1, x69A49C79, a6);
422 vxor(*out2, *out2, x1);
424 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
425 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
426 vxor(xB946D2B4, x9C3CA761, x257A75D5);
427 vsel(x2, x16E94A97, xB946D2B4, a6);
428 vxor(*out3, *out3, x2);
432 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
433 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
435 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
436 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
437 u32 x738F9C63, x11EF9867, x26DA9867;
438 u32 x4B4B9C63, x4B666663, x4E639396;
439 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
440 u32 xD728827B, x6698807B, x699C585B;
441 u32 x738C847B, xA4A71E18, x74878E78;
442 u32 x333D9639, x74879639, x8B7869C6;
445 vsel(x44447777, a2, a6, a3);
446 vxor(x4B4B7878, a4, x44447777);
447 vsel(x22772277, a3, a5, a2);
448 vsel(x0505F5F5, a6, a2, a4);
449 vsel(x220522F5, x22772277, x0505F5F5, a5);
450 vxor(x694E5A8D, x4B4B7878, x220522F5);
452 vxor(x00FFFF00, a5, a6);
453 vxor(x66666666, a2, a3);
454 vsel(x32353235, a3, x220522F5, a4);
455 vsel(x26253636, x66666666, x32353235, x4B4B7878);
456 vxor(x26DAC936, x00FFFF00, x26253636);
457 vsel(x0, x26DAC936, x694E5A8D, a1);
458 vxor(*out1, *out1, x0);
460 vxor(x738F9C63, a2, x26DAC936);
461 vsel(x11EF9867, x738F9C63, a5, x66666666);
462 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
464 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
465 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
466 vxor(x4E639396, x0505F5F5, x4B666663);
468 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
470 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
471 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
472 vsel(x1, xB14EE41D, x26DA9867, a1);
473 vxor(*out2, *out2, x1);
475 vxor(xD728827B, x66666666, xB14EE41D);
476 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
477 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
478 vsel(x2, x699C585B, x4E639396, a1);
479 vxor(*out3, *out3, x2);
481 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
482 vxor(xA4A71E18, x738F9C63, xD728827B);
483 vsel(x74878E78, x738C847B, xA4A71E18, a4);
485 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
486 vsel(x74879639, x74878E78, x333D9639, a6);
487 vnot(x8B7869C6, x74879639);
488 vsel(x3, x74878E78, x8B7869C6, a1);
489 vxor(*out4, *out4, x3);
493 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
494 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
496 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
497 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
498 u32 x3001F74E, x30555745, x693CD926;
499 u32 x0C0CD926, x0C3F25E9, x38D696A5;
501 u32 x03D2117B, xC778395B, xCB471CB2;
502 u32 x5425B13F, x56B3803F, x919AE965;
503 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
506 vsel(x0505F5F5, a5, a1, a3);
507 vxor(x05FAF50A, a4, x0505F5F5);
508 vsel(x0F0F00FF, a3, a4, a5);
509 vsel(x22227777, a2, a5, a1);
510 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
511 vxor(x34E9B34C, a2, x07DA807F);
513 vsel(x00FFF00F, x05FAF50A, a4, a3);
514 vsel(x0033FCCF, a5, x00FFF00F, a2);
515 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
516 vsel(x0C0C3F3F, a3, a5, a2);
517 vxor(x59698E63, x5565B15C, x0C0C3F3F);
519 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
520 vsel(x30555745, x3001F74E, a1, x00FFF00F);
521 vxor(x693CD926, x59698E63, x30555745);
522 vsel(x2, x693CD926, x59698E63, a6);
523 vxor(*out3, *out3, x2);
525 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
526 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
527 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
529 vnot(xC729695A, x38D696A5);
531 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
532 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
533 vxor(xCB471CB2, x0C3F25E9, xC778395B);
534 vsel(x1, xCB471CB2, x34E9B34C, a6);
535 vxor(*out2, *out2, x1);
537 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
538 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
539 vxor(x919AE965, xC729695A, x56B3803F);
540 vsel(x3, xC729695A, x919AE965, a6);
541 vxor(*out4, *out4, x3);
543 vsel(x17B3023F, x07DA807F, a2, x59698E63);
544 vor(x75555755, a1, x30555745);
545 vxor(x62E6556A, x17B3023F, x75555755);
546 vxor(xA59E6C31, xC778395B, x62E6556A);
547 vsel(x0, xA59E6C31, x38D696A5, a6);
548 vxor(*out1, *out1, x0);
551 #define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; }
587 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
588 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
589 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
590 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
591 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
592 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
593 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
594 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
595 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
596 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
597 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
598 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
599 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
600 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
601 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
602 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
606 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
609 #define myselx(a,b,c) ((c) ? (b) : (a))
611 sXXX_DECL u32 s001 = (0x001 & DESCRYPT_SALT) ? 0xffffffff : 0;
612 sXXX_DECL u32 s002 = (0x002 & DESCRYPT_SALT) ? 0xffffffff : 0;
613 sXXX_DECL u32 s004 = (0x004 & DESCRYPT_SALT) ? 0xffffffff : 0;
614 sXXX_DECL u32 s008 = (0x008 & DESCRYPT_SALT) ? 0xffffffff : 0;
615 sXXX_DECL u32 s010 = (0x010 & DESCRYPT_SALT) ? 0xffffffff : 0;
616 sXXX_DECL u32 s020 = (0x020 & DESCRYPT_SALT) ? 0xffffffff : 0;
617 sXXX_DECL u32 s040 = (0x040 & DESCRYPT_SALT) ? 0xffffffff : 0;
618 sXXX_DECL u32 s080 = (0x080 & DESCRYPT_SALT) ? 0xffffffff : 0;
619 sXXX_DECL u32 s100 = (0x100 & DESCRYPT_SALT) ? 0xffffffff : 0;
620 sXXX_DECL u32 s200 = (0x200 & DESCRYPT_SALT) ? 0xffffffff : 0;
621 sXXX_DECL u32 s400 = (0x400 & DESCRYPT_SALT) ? 0xffffffff : 0;
622 sXXX_DECL u32 s800 = (0x800 & DESCRYPT_SALT) ? 0xffffffff : 0;
624 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
625 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
626 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
627 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
628 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
629 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
630 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
631 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
633 for (u32 ii = 0; ii < 25; ii++)
636 for (u32 i = 0; i < 16; i++)
640 case 0: KEYSET00; break;
641 case 1: KEYSET01; break;
642 case 2: KEYSET02; break;
643 case 3: KEYSET03; break;
644 case 4: KEYSET04; break;
645 case 5: KEYSET05; break;
646 case 6: KEYSET06; break;
647 case 7: KEYSET07; break;
648 case 8: KEYSET10; break;
649 case 9: KEYSET11; break;
650 case 10: KEYSET12; break;
651 case 11: KEYSET13; break;
652 case 12: KEYSET14; break;
653 case 13: KEYSET15; break;
654 case 14: KEYSET16; break;
655 case 15: KEYSET17; break;
658 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
659 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
660 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
661 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
662 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
663 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
664 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
665 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
678 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
680 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
681 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
682 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
683 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
684 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
685 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
686 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
687 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
688 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
689 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
690 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
691 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
693 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
694 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
695 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
696 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
697 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
698 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
699 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
700 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
703 * descrypt uses all zero data input, so we can optimize this first round of des
708 s1(k00, k01, k02, k03, k04, k05, &D08, &D16, &D22, &D30);
709 s2(k06, k07, k08, k09, k10, k11, &D12, &D27, &D01, &D17);
710 s3(k12, k13, k14, k15, k16, k17, &D23, &D15, &D29, &D05);
711 s4(k18, k19, k20, k21, k22, k23, &D25, &D19, &D09, &D00);
712 s5(k24, k25, k26, k27, k28, k29, &D07, &D13, &D24, &D02);
713 s6(k30, k31, k32, k33, k34, k35, &D03, &D28, &D10, &D18);
714 s7(k36, k37, k38, k39, k40, k41, &D31, &D11, &D21, &D06);
715 s8(k42, k43, k44, k45, k46, k47, &D04, &D26, &D14, &D20);
719 for (u32 i = 1; i < 16; i++)
723 case 0: KEYSET00; break;
724 case 1: KEYSET01; break;
725 case 2: KEYSET02; break;
726 case 3: KEYSET03; break;
727 case 4: KEYSET04; break;
728 case 5: KEYSET05; break;
729 case 6: KEYSET06; break;
730 case 7: KEYSET07; break;
731 case 8: KEYSET10; break;
732 case 9: KEYSET11; break;
733 case 10: KEYSET12; break;
734 case 11: KEYSET13; break;
735 case 12: KEYSET14; break;
736 case 13: KEYSET15; break;
737 case 14: KEYSET16; break;
738 case 15: KEYSET17; break;
741 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
742 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
743 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
744 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
745 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
746 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
747 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
748 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
755 for (u32 ii = 1; ii < 25; ii++)
757 for (u32 i = 0; i < 16; i++)
761 case 0: KEYSET00; break;
762 case 1: KEYSET01; break;
763 case 2: KEYSET02; break;
764 case 3: KEYSET03; break;
765 case 4: KEYSET04; break;
766 case 5: KEYSET05; break;
767 case 6: KEYSET06; break;
768 case 7: KEYSET07; break;
769 case 8: KEYSET10; break;
770 case 9: KEYSET11; break;
771 case 10: KEYSET12; break;
772 case 11: KEYSET13; break;
773 case 12: KEYSET14; break;
774 case 13: KEYSET15; break;
775 case 14: KEYSET16; break;
776 case 15: KEYSET17; break;
779 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
780 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
781 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
782 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
783 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
784 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
785 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
786 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
799 static void transpose32c (u32 data[32])
801 #define swap(x,y,j,m) \
802 t = ((x) ^ ((y) >> (j))) & (m); \
804 (y) = (y) ^ (t << (j));
808 swap (data[ 0], data[16], 16, 0x0000ffff);
809 swap (data[ 1], data[17], 16, 0x0000ffff);
810 swap (data[ 2], data[18], 16, 0x0000ffff);
811 swap (data[ 3], data[19], 16, 0x0000ffff);
812 swap (data[ 4], data[20], 16, 0x0000ffff);
813 swap (data[ 5], data[21], 16, 0x0000ffff);
814 swap (data[ 6], data[22], 16, 0x0000ffff);
815 swap (data[ 7], data[23], 16, 0x0000ffff);
816 swap (data[ 8], data[24], 16, 0x0000ffff);
817 swap (data[ 9], data[25], 16, 0x0000ffff);
818 swap (data[10], data[26], 16, 0x0000ffff);
819 swap (data[11], data[27], 16, 0x0000ffff);
820 swap (data[12], data[28], 16, 0x0000ffff);
821 swap (data[13], data[29], 16, 0x0000ffff);
822 swap (data[14], data[30], 16, 0x0000ffff);
823 swap (data[15], data[31], 16, 0x0000ffff);
824 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
825 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
826 swap (data[ 2], data[10], 8, 0x00ff00ff);
827 swap (data[ 3], data[11], 8, 0x00ff00ff);
828 swap (data[ 4], data[12], 8, 0x00ff00ff);
829 swap (data[ 5], data[13], 8, 0x00ff00ff);
830 swap (data[ 6], data[14], 8, 0x00ff00ff);
831 swap (data[ 7], data[15], 8, 0x00ff00ff);
832 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
833 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
834 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
835 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
836 swap (data[ 0], data[ 2], 2, 0x33333333);
837 swap (data[ 1], data[ 3], 2, 0x33333333);
838 swap (data[ 0], data[ 1], 1, 0x55555555);
839 swap (data[ 2], data[ 3], 1, 0x55555555);
840 swap (data[ 4], data[ 6], 2, 0x33333333);
841 swap (data[ 5], data[ 7], 2, 0x33333333);
842 swap (data[ 4], data[ 5], 1, 0x55555555);
843 swap (data[ 6], data[ 7], 1, 0x55555555);
844 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
845 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
846 swap (data[10], data[14], 4, 0x0f0f0f0f);
847 swap (data[11], data[15], 4, 0x0f0f0f0f);
848 swap (data[ 8], data[10], 2, 0x33333333);
849 swap (data[ 9], data[11], 2, 0x33333333);
850 swap (data[ 8], data[ 9], 1, 0x55555555);
851 swap (data[10], data[11], 1, 0x55555555);
852 swap (data[12], data[14], 2, 0x33333333);
853 swap (data[13], data[15], 2, 0x33333333);
854 swap (data[12], data[13], 1, 0x55555555);
855 swap (data[14], data[15], 1, 0x55555555);
856 swap (data[16], data[24], 8, 0x00ff00ff);
857 swap (data[17], data[25], 8, 0x00ff00ff);
858 swap (data[18], data[26], 8, 0x00ff00ff);
859 swap (data[19], data[27], 8, 0x00ff00ff);
860 swap (data[20], data[28], 8, 0x00ff00ff);
861 swap (data[21], data[29], 8, 0x00ff00ff);
862 swap (data[22], data[30], 8, 0x00ff00ff);
863 swap (data[23], data[31], 8, 0x00ff00ff);
864 swap (data[16], data[20], 4, 0x0f0f0f0f);
865 swap (data[17], data[21], 4, 0x0f0f0f0f);
866 swap (data[18], data[22], 4, 0x0f0f0f0f);
867 swap (data[19], data[23], 4, 0x0f0f0f0f);
868 swap (data[16], data[18], 2, 0x33333333);
869 swap (data[17], data[19], 2, 0x33333333);
870 swap (data[16], data[17], 1, 0x55555555);
871 swap (data[18], data[19], 1, 0x55555555);
872 swap (data[20], data[22], 2, 0x33333333);
873 swap (data[21], data[23], 2, 0x33333333);
874 swap (data[20], data[21], 1, 0x55555555);
875 swap (data[22], data[23], 1, 0x55555555);
876 swap (data[24], data[28], 4, 0x0f0f0f0f);
877 swap (data[25], data[29], 4, 0x0f0f0f0f);
878 swap (data[26], data[30], 4, 0x0f0f0f0f);
879 swap (data[27], data[31], 4, 0x0f0f0f0f);
880 swap (data[24], data[26], 2, 0x33333333);
881 swap (data[25], data[27], 2, 0x33333333);
882 swap (data[24], data[25], 1, 0x55555555);
883 swap (data[26], data[27], 1, 0x55555555);
884 swap (data[28], data[30], 2, 0x33333333);
885 swap (data[29], data[31], 2, 0x33333333);
886 swap (data[28], data[29], 1, 0x55555555);
887 swap (data[30], data[31], 1, 0x55555555);
890 static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
896 const u32 gid = get_global_id (0);
897 const u32 lid = get_local_id (0);
903 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
909 const u32 K00 = pws[gid].i[ 0];
910 const u32 K01 = pws[gid].i[ 1];
911 const u32 K02 = pws[gid].i[ 2];
912 const u32 K03 = pws[gid].i[ 3];
913 const u32 K04 = pws[gid].i[ 4];
914 const u32 K05 = pws[gid].i[ 5];
915 const u32 K06 = pws[gid].i[ 6];
916 const u32 K07 = pws[gid].i[ 7];
917 const u32 K08 = pws[gid].i[ 8];
918 const u32 K09 = pws[gid].i[ 9];
919 const u32 K10 = pws[gid].i[10];
920 const u32 K11 = pws[gid].i[11];
921 const u32 K12 = pws[gid].i[12];
922 const u32 K13 = pws[gid].i[13];
923 const u32 K14 = pws[gid].i[14];
924 const u32 K15 = pws[gid].i[15];
925 const u32 K16 = pws[gid].i[16];
926 const u32 K17 = pws[gid].i[17];
927 const u32 K18 = pws[gid].i[18];
928 const u32 K19 = pws[gid].i[19];
929 const u32 K20 = pws[gid].i[20];
930 const u32 K21 = pws[gid].i[21];
931 const u32 K22 = pws[gid].i[22];
932 const u32 K23 = pws[gid].i[23];
933 const u32 K24 = pws[gid].i[24];
934 const u32 K25 = pws[gid].i[25];
935 const u32 K26 = pws[gid].i[26];
936 const u32 K27 = pws[gid].i[27];
937 const u32 K28 = pws[gid].i[28];
938 const u32 K29 = pws[gid].i[29];
939 const u32 K30 = pws[gid].i[30];
940 const u32 K31 = pws[gid].i[31];
941 const u32 K32 = pws[gid].i[32];
942 const u32 K33 = pws[gid].i[33];
943 const u32 K34 = pws[gid].i[34];
944 const u32 K35 = pws[gid].i[35];
945 const u32 K36 = pws[gid].i[36];
946 const u32 K37 = pws[gid].i[37];
947 const u32 K38 = pws[gid].i[38];
948 const u32 K39 = pws[gid].i[39];
949 const u32 K40 = pws[gid].i[40];
950 const u32 K41 = pws[gid].i[41];
951 const u32 K42 = pws[gid].i[42];
952 const u32 K43 = pws[gid].i[43];
953 const u32 K44 = pws[gid].i[44];
954 const u32 K45 = pws[gid].i[45];
955 const u32 K46 = pws[gid].i[46];
956 const u32 K47 = pws[gid].i[47];
957 const u32 K48 = pws[gid].i[48];
958 const u32 K49 = pws[gid].i[49];
959 const u32 K50 = pws[gid].i[50];
960 const u32 K51 = pws[gid].i[51];
961 const u32 K52 = pws[gid].i[52];
962 const u32 K53 = pws[gid].i[53];
963 const u32 K54 = pws[gid].i[54];
964 const u32 K55 = pws[gid].i[55];
970 const u32 bfs_cnt = bfs_cnt;
972 const u32 pc_pos = get_local_id (1);
974 const u32 il_pos = pc_pos * 32;
1005 k00 |= words_buf_r[pc_pos].b[ 0];
1006 k01 |= words_buf_r[pc_pos].b[ 1];
1007 k02 |= words_buf_r[pc_pos].b[ 2];
1008 k03 |= words_buf_r[pc_pos].b[ 3];
1009 k04 |= words_buf_r[pc_pos].b[ 4];
1010 k05 |= words_buf_r[pc_pos].b[ 5];
1011 k06 |= words_buf_r[pc_pos].b[ 6];
1012 k07 |= words_buf_r[pc_pos].b[ 7];
1013 k08 |= words_buf_r[pc_pos].b[ 8];
1014 k09 |= words_buf_r[pc_pos].b[ 9];
1015 k10 |= words_buf_r[pc_pos].b[10];
1016 k11 |= words_buf_r[pc_pos].b[11];
1017 k12 |= words_buf_r[pc_pos].b[12];
1018 k13 |= words_buf_r[pc_pos].b[13];
1019 k14 |= words_buf_r[pc_pos].b[14];
1020 k15 |= words_buf_r[pc_pos].b[15];
1021 k16 |= words_buf_r[pc_pos].b[16];
1022 k17 |= words_buf_r[pc_pos].b[17];
1023 k18 |= words_buf_r[pc_pos].b[18];
1024 k19 |= words_buf_r[pc_pos].b[19];
1025 k20 |= words_buf_r[pc_pos].b[20];
1026 k21 |= words_buf_r[pc_pos].b[21];
1027 k22 |= words_buf_r[pc_pos].b[22];
1028 k23 |= words_buf_r[pc_pos].b[23];
1029 k24 |= words_buf_r[pc_pos].b[24];
1030 k25 |= words_buf_r[pc_pos].b[25];
1031 k26 |= words_buf_r[pc_pos].b[26];
1032 k27 |= words_buf_r[pc_pos].b[27];
1102 k00, k01, k02, k03, k04, k05, k06,
1103 k07, k08, k09, k10, k11, k12, k13,
1104 k14, k15, k16, k17, k18, k19, k20,
1105 k21, k22, k23, k24, k25, k26, k27,
1106 K28, K29, K30, K31, K32, K33, K34,
1107 K35, K36, K37, K38, K39, K40, K41,
1108 K42, K43, K44, K45, K46, K47, K48,
1109 K49, K50, K51, K52, K53, K54, K55,
1110 D00, D01, D02, D03, D04, D05, D06, D07,
1111 D08, D09, D10, D11, D12, D13, D14, D15,
1112 D16, D17, D18, D19, D20, D21, D22, D23,
1113 D24, D25, D26, D27, D28, D29, D30, D31,
1114 D32, D33, D34, D35, D36, D37, D38, D39,
1115 D40, D41, D42, D43, D44, D45, D46, D47,
1116 D48, D49, D50, D51, D52, D53, D54, D55,
1117 D56, D57, D58, D59, D60, D61, D62, D63
1187 if (digests_cnt < 16)
1189 for (u32 d = 0; d < digests_cnt; d++)
1191 const u32 final_hash_pos = digests_offset + d;
1193 if (hashes_shown[final_hash_pos]) continue;
1197 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1198 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1203 for (int i = 0; i < 32; i++)
1205 const u32 b0 = -((search[0] >> i) & 1);
1206 const u32 b1 = -((search[1] >> i) & 1);
1208 tmpResult |= out[ 0 + i] ^ b0;
1209 tmpResult |= out[32 + i] ^ b1;
1212 if (tmpResult == 0xffffffff) continue;
1214 const u32 slice = 31 - clz (~tmpResult);
1216 const u32 r0 = search[0];
1217 const u32 r1 = search[1];
1230 for (int i = 0; i < 32; i++)
1232 out0[i] = out[ 0 + 31 - i];
1233 out1[i] = out[32 + 31 - i];
1236 transpose32c (out0);
1237 transpose32c (out1);
1240 for (int slice = 0; slice < 32; slice++)
1242 const u32 r0 = out0[31 - slice];
1243 const u32 r1 = out1[31 - slice];
1252 static void m01500s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1258 const u32 gid = get_global_id (0);
1259 const u32 lid = get_local_id (0);
1265 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1340 const u32 K00 = pws[gid].i[ 0];
1341 const u32 K01 = pws[gid].i[ 1];
1342 const u32 K02 = pws[gid].i[ 2];
1343 const u32 K03 = pws[gid].i[ 3];
1344 const u32 K04 = pws[gid].i[ 4];
1345 const u32 K05 = pws[gid].i[ 5];
1346 const u32 K06 = pws[gid].i[ 6];
1347 const u32 K07 = pws[gid].i[ 7];
1348 const u32 K08 = pws[gid].i[ 8];
1349 const u32 K09 = pws[gid].i[ 9];
1350 const u32 K10 = pws[gid].i[10];
1351 const u32 K11 = pws[gid].i[11];
1352 const u32 K12 = pws[gid].i[12];
1353 const u32 K13 = pws[gid].i[13];
1354 const u32 K14 = pws[gid].i[14];
1355 const u32 K15 = pws[gid].i[15];
1356 const u32 K16 = pws[gid].i[16];
1357 const u32 K17 = pws[gid].i[17];
1358 const u32 K18 = pws[gid].i[18];
1359 const u32 K19 = pws[gid].i[19];
1360 const u32 K20 = pws[gid].i[20];
1361 const u32 K21 = pws[gid].i[21];
1362 const u32 K22 = pws[gid].i[22];
1363 const u32 K23 = pws[gid].i[23];
1364 const u32 K24 = pws[gid].i[24];
1365 const u32 K25 = pws[gid].i[25];
1366 const u32 K26 = pws[gid].i[26];
1367 const u32 K27 = pws[gid].i[27];
1368 const u32 K28 = pws[gid].i[28];
1369 const u32 K29 = pws[gid].i[29];
1370 const u32 K30 = pws[gid].i[30];
1371 const u32 K31 = pws[gid].i[31];
1372 const u32 K32 = pws[gid].i[32];
1373 const u32 K33 = pws[gid].i[33];
1374 const u32 K34 = pws[gid].i[34];
1375 const u32 K35 = pws[gid].i[35];
1376 const u32 K36 = pws[gid].i[36];
1377 const u32 K37 = pws[gid].i[37];
1378 const u32 K38 = pws[gid].i[38];
1379 const u32 K39 = pws[gid].i[39];
1380 const u32 K40 = pws[gid].i[40];
1381 const u32 K41 = pws[gid].i[41];
1382 const u32 K42 = pws[gid].i[42];
1383 const u32 K43 = pws[gid].i[43];
1384 const u32 K44 = pws[gid].i[44];
1385 const u32 K45 = pws[gid].i[45];
1386 const u32 K46 = pws[gid].i[46];
1387 const u32 K47 = pws[gid].i[47];
1388 const u32 K48 = pws[gid].i[48];
1389 const u32 K49 = pws[gid].i[49];
1390 const u32 K50 = pws[gid].i[50];
1391 const u32 K51 = pws[gid].i[51];
1392 const u32 K52 = pws[gid].i[52];
1393 const u32 K53 = pws[gid].i[53];
1394 const u32 K54 = pws[gid].i[54];
1395 const u32 K55 = pws[gid].i[55];
1401 const u32 pc_pos = get_local_id (1);
1403 const u32 il_pos = pc_pos * 32;
1434 k00 |= words_buf_r[pc_pos].b[ 0];
1435 k01 |= words_buf_r[pc_pos].b[ 1];
1436 k02 |= words_buf_r[pc_pos].b[ 2];
1437 k03 |= words_buf_r[pc_pos].b[ 3];
1438 k04 |= words_buf_r[pc_pos].b[ 4];
1439 k05 |= words_buf_r[pc_pos].b[ 5];
1440 k06 |= words_buf_r[pc_pos].b[ 6];
1441 k07 |= words_buf_r[pc_pos].b[ 7];
1442 k08 |= words_buf_r[pc_pos].b[ 8];
1443 k09 |= words_buf_r[pc_pos].b[ 9];
1444 k10 |= words_buf_r[pc_pos].b[10];
1445 k11 |= words_buf_r[pc_pos].b[11];
1446 k12 |= words_buf_r[pc_pos].b[12];
1447 k13 |= words_buf_r[pc_pos].b[13];
1448 k14 |= words_buf_r[pc_pos].b[14];
1449 k15 |= words_buf_r[pc_pos].b[15];
1450 k16 |= words_buf_r[pc_pos].b[16];
1451 k17 |= words_buf_r[pc_pos].b[17];
1452 k18 |= words_buf_r[pc_pos].b[18];
1453 k19 |= words_buf_r[pc_pos].b[19];
1454 k20 |= words_buf_r[pc_pos].b[20];
1455 k21 |= words_buf_r[pc_pos].b[21];
1456 k22 |= words_buf_r[pc_pos].b[22];
1457 k23 |= words_buf_r[pc_pos].b[23];
1458 k24 |= words_buf_r[pc_pos].b[24];
1459 k25 |= words_buf_r[pc_pos].b[25];
1460 k26 |= words_buf_r[pc_pos].b[26];
1461 k27 |= words_buf_r[pc_pos].b[27];
1531 k00, k01, k02, k03, k04, k05, k06,
1532 k07, k08, k09, k10, k11, k12, k13,
1533 k14, k15, k16, k17, k18, k19, k20,
1534 k21, k22, k23, k24, k25, k26, k27,
1535 K28, K29, K30, K31, K32, K33, K34,
1536 K35, K36, K37, K38, K39, K40, K41,
1537 K42, K43, K44, K45, K46, K47, K48,
1538 K49, K50, K51, K52, K53, K54, K55,
1539 D00, D01, D02, D03, D04, D05, D06, D07,
1540 D08, D09, D10, D11, D12, D13, D14, D15,
1541 D16, D17, D18, D19, D20, D21, D22, D23,
1542 D24, D25, D26, D27, D28, D29, D30, D31,
1543 D32, D33, D34, D35, D36, D37, D38, D39,
1544 D40, D41, D42, D43, D44, D45, D46, D47,
1545 D48, D49, D50, D51, D52, D53, D54, D55,
1546 D56, D57, D58, D59, D60, D61, D62, D63
1551 tmpResult |= D00 ^ S00;
1552 tmpResult |= D01 ^ S01;
1553 tmpResult |= D02 ^ S02;
1554 tmpResult |= D03 ^ S03;
1555 tmpResult |= D04 ^ S04;
1556 tmpResult |= D05 ^ S05;
1557 tmpResult |= D06 ^ S06;
1558 tmpResult |= D07 ^ S07;
1559 tmpResult |= D08 ^ S08;
1560 tmpResult |= D09 ^ S09;
1561 tmpResult |= D10 ^ S10;
1562 tmpResult |= D11 ^ S11;
1563 tmpResult |= D12 ^ S12;
1564 tmpResult |= D13 ^ S13;
1565 tmpResult |= D14 ^ S14;
1566 tmpResult |= D15 ^ S15;
1567 tmpResult |= D16 ^ S16;
1568 tmpResult |= D17 ^ S17;
1569 tmpResult |= D18 ^ S18;
1570 tmpResult |= D19 ^ S19;
1571 tmpResult |= D20 ^ S20;
1572 tmpResult |= D21 ^ S21;
1573 tmpResult |= D22 ^ S22;
1574 tmpResult |= D23 ^ S23;
1575 tmpResult |= D24 ^ S24;
1576 tmpResult |= D25 ^ S25;
1577 tmpResult |= D26 ^ S26;
1578 tmpResult |= D27 ^ S27;
1579 tmpResult |= D28 ^ S28;
1580 tmpResult |= D29 ^ S29;
1581 tmpResult |= D30 ^ S30;
1582 tmpResult |= D31 ^ S31;
1583 tmpResult |= D32 ^ S32;
1584 tmpResult |= D33 ^ S33;
1585 tmpResult |= D34 ^ S34;
1586 tmpResult |= D35 ^ S35;
1587 tmpResult |= D36 ^ S36;
1588 tmpResult |= D37 ^ S37;
1589 tmpResult |= D38 ^ S38;
1590 tmpResult |= D39 ^ S39;
1591 tmpResult |= D40 ^ S40;
1592 tmpResult |= D41 ^ S41;
1593 tmpResult |= D42 ^ S42;
1594 tmpResult |= D43 ^ S43;
1595 tmpResult |= D44 ^ S44;
1596 tmpResult |= D45 ^ S45;
1597 tmpResult |= D46 ^ S46;
1598 tmpResult |= D47 ^ S47;
1600 if (tmpResult == 0xffffffff) return;
1602 tmpResult |= D48 ^ S48;
1603 tmpResult |= D49 ^ S49;
1604 tmpResult |= D50 ^ S50;
1605 tmpResult |= D51 ^ S51;
1606 tmpResult |= D52 ^ S52;
1607 tmpResult |= D53 ^ S53;
1608 tmpResult |= D54 ^ S54;
1609 tmpResult |= D55 ^ S55;
1610 tmpResult |= D56 ^ S56;
1611 tmpResult |= D57 ^ S57;
1612 tmpResult |= D58 ^ S58;
1613 tmpResult |= D59 ^ S59;
1614 tmpResult |= D60 ^ S60;
1615 tmpResult |= D61 ^ S61;
1616 tmpResult |= D62 ^ S62;
1617 tmpResult |= D63 ^ S63;
1619 if (tmpResult == 0xffffffff) return;
1621 const u32 slice = 31 - clz (~tmpResult);
1627 // transpose bitslice base : easy because no overlapping buffers
1628 // mod : attention race conditions, need different buffers for *in and *out
1631 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_tb (__global pw_t *pws)
1633 const u32 gid = get_global_id (0);
1635 const u32 w0 = pws[gid].i[0];
1636 const u32 w1 = pws[gid].i[1];
1638 const u32 w0s = (w0 << 1) & 0xfefefefe;
1639 const u32 w1s = (w1 << 1) & 0xfefefefe;
1642 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1644 pws[gid].i[j + 0 + 0] = -((w0s >> (i + 7)) & 1);
1645 pws[gid].i[j + 0 + 1] = -((w0s >> (i + 6)) & 1);
1646 pws[gid].i[j + 0 + 2] = -((w0s >> (i + 5)) & 1);
1647 pws[gid].i[j + 0 + 3] = -((w0s >> (i + 4)) & 1);
1648 pws[gid].i[j + 0 + 4] = -((w0s >> (i + 3)) & 1);
1649 pws[gid].i[j + 0 + 5] = -((w0s >> (i + 2)) & 1);
1650 pws[gid].i[j + 0 + 6] = -((w0s >> (i + 1)) & 1);
1654 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1656 pws[gid].i[j + 28 + 0] = -((w1s >> (i + 7)) & 1);
1657 pws[gid].i[j + 28 + 1] = -((w1s >> (i + 6)) & 1);
1658 pws[gid].i[j + 28 + 2] = -((w1s >> (i + 5)) & 1);
1659 pws[gid].i[j + 28 + 3] = -((w1s >> (i + 4)) & 1);
1660 pws[gid].i[j + 28 + 4] = -((w1s >> (i + 3)) & 1);
1661 pws[gid].i[j + 28 + 5] = -((w1s >> (i + 2)) & 1);
1662 pws[gid].i[j + 28 + 6] = -((w1s >> (i + 1)) & 1);
1666 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
1668 const u32 gid = get_global_id (0);
1669 const u32 lid = get_local_id (0);
1671 const u32 block = gid / 32;
1672 const u32 slice = gid % 32;
1674 const u32 w0 = mod[gid];
1676 const u32 w0s = (w0 << 1) & 0xfefefefe;
1679 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1681 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
1682 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
1683 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
1684 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
1685 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
1686 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
1687 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
1691 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1697 const u32 gid = get_global_id (0);
1698 const u32 lid = get_local_id (0);
1699 const u32 vid = get_local_id (1);
1701 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1702 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1704 __local u32 s_S[64];
1708 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1712 s_S[32 + vid] = -((s1 >> vid) & 1);
1715 barrier (CLK_LOCAL_MEM_FENCE);
1717 if (gid >= gid_max) return;
1723 m01500m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1726 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1730 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1734 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1740 const u32 gid = get_global_id (0);
1741 const u32 lid = get_local_id (0);
1742 const u32 vid = get_local_id (1);
1744 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1745 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1747 __local u32 s_S[64];
1751 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1755 s_S[32 + vid] = -((s1 >> vid) & 1);
1758 barrier (CLK_LOCAL_MEM_FENCE);
1760 if (gid >= gid_max) return;
1766 m01500s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1769 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1773 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)