2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
17 #include "include/kernel_functions.c"
18 #include "types_ocl.c"
21 #define COMPARE_S "check_single_comp4_bs.c"
22 #define COMPARE_M "check_multi_comp4_bs.c"
24 #define KXX_DECL volatile
25 #define sXXX_DECL volatile
27 //#define myselx(a,b,c) ((c) ? (b) : (a))
28 //#define myselx(a,b,c) ((b & c) | (a & ~c))
29 #define myselx(a,b,c) bitselect ((a), (b), (c))
32 * Bitslice DES S-boxes making use of a vector conditional select operation
33 * (e.g., vsel on PowerPC with AltiVec).
35 * Gate counts: 36 33 33 26 35 34 34 32
38 * Several same-gate-count expressions for each S-box are included (for use on
39 * different CPUs/GPUs).
41 * These Boolean expressions corresponding to DES S-boxes have been generated
42 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
43 * John the Ripper password cracker: http://www.openwall.com/john/
44 * Being mathematical formulas, they are not copyrighted and are free for reuse
47 * This file (a specific representation of the S-box expressions, surrounding
48 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted. (This is a heavily cut-down "BSD license".)
52 * The effort has been sponsored by Rapid7: http://www.rapid7.com
55 #define vnot(dst, a) (dst) = ~(a)
56 #define vand(dst, a, b) (dst) = (a) & (b)
57 #define vor(dst, a, b) (dst) = (a) | (b)
58 #define vandn(dst, a, b) (dst) = (a) & ~(b)
59 #define vxor(dst, a, b) (dst) = (a) ^ (b)
60 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
63 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
64 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
66 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
68 u32 x55AFD1B7, x3C3C69C3, x6993B874;
69 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
70 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
71 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
72 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
73 u32 x0DBCE883, x3A25A215, x37994A96;
74 u32 x8A487EA7, x8B480F07, xB96C2D16;
77 vsel(x0F0F3333, a3, a2, a5);
78 vxor(x3C3C3C3C, a2, a3);
79 vor(x55FF55FF, a1, a4);
80 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
81 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
82 vxor(x09FCB7C0, a4, x0903B73F);
83 vxor(x5CA9E295, a1, x09FCB7C0);
85 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
86 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
87 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
89 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
90 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
91 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
92 vxor(x529E962D, x0F0F3333, x5D91A51E);
94 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
95 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
96 vsel(x428679F3, a5, x4B8771A3, x529E962D);
97 vxor(x6B68D433, x29EEADC0, x428679F3);
99 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
100 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
101 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
102 vnot(x94D83B6C, x6B27C493);
103 vsel(x0, x94D83B6C, x6B68D433, a6);
104 vxor(*out1, *out1, x0);
106 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
107 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
108 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
109 vxor(xD6E19C32, x529E962D, x847F0A1F);
110 vsel(x1, xD6E19C32, x5CA9E295, a6);
111 vxor(*out2, *out2, x1);
113 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
114 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
115 vxor(x37994A96, x0DBCE883, x3A25A215);
116 vsel(x3, x37994A96, x529E962D, a6);
117 vxor(*out4, *out4, x3);
119 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
120 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
121 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
122 vsel(x2, xB96C2D16, x6993B874, a6);
123 vxor(*out3, *out3, x2);
127 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
128 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
130 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
131 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
132 u32 x0F5AF03C, x6600FF56, x87A5F09C;
133 u32 xA55A963C, x3C69C30F, xB44BC32D;
134 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
135 u32 xB46C662D, x278DB412, xB66CB43B;
136 u32 xD2DC4E52, x27993333, xD2994E33;
137 u32 x278D0F2D, x2E0E547B, x09976748;
140 vsel(x55553333, a1, a3, a6);
141 vsel(x0055FF33, a6, x55553333, a5);
142 vsel(x33270F03, a3, a4, x0055FF33);
143 vxor(x66725A56, a1, x33270F03);
144 vxor(x00FFFF00, a5, a6);
145 vxor(x668DA556, x66725A56, x00FFFF00);
147 vsel(x0F0F5A56, a4, x66725A56, a6);
148 vnot(xF0F0A5A9, x0F0F5A56);
149 vxor(xA5A5969A, x55553333, xF0F0A5A9);
150 vxor(xA55A699A, x00FFFF00, xA5A5969A);
151 vsel(x1, xA55A699A, x668DA556, a2);
152 vxor(*out2, *out2, x1);
154 vxor(x0F5AF03C, a4, x0055FF33);
155 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
156 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
158 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
159 vxor(x3C69C30F, a3, x0F5AF03C);
160 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
162 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
163 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
164 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
165 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
166 vsel(x0, x996C66D2, xB44BC32D, a2);
167 vxor(*out1, *out1, x0);
169 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
170 vsel(x278DB412, x668DA556, xA5A5969A, a1);
171 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
173 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
174 vsel(x27993333, x278DB412, a3, x0055FF33);
175 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
176 vsel(x3, x87A5F09C, xD2994E33, a2);
177 vxor(*out4, *out4, x3);
179 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
180 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
181 vxor(x09976748, x27993333, x2E0E547B);
182 vsel(x2, xB66CB43B, x09976748, a2);
183 vxor(*out3, *out3, x2);
187 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
188 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
190 u32 x0F330F33, x0F33F0CC, x5A66A599;
191 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
192 u32 x556BA09E, x665A93AC, x99A56C53;
193 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
194 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
195 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
196 u32 xD6599874, x05330022, xD2699876;
197 u32 x665F9364, xD573F0F2, xB32C6396;
200 vsel(x0F330F33, a4, a3, a5);
201 vxor(x0F33F0CC, a6, x0F330F33);
202 vxor(x5A66A599, a2, x0F33F0CC);
204 vsel(x2111B7BB, a3, a6, x5A66A599);
205 vsel(x03FF3033, a5, a3, x0F33F0CC);
206 vsel(x05BB50EE, a5, x0F33F0CC, a2);
207 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
208 vxor(x265E97A4, x2111B7BB, x074F201F);
210 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
211 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
212 vnot(x99A56C53, x665A93AC);
213 vsel(x1, x265E97A4, x99A56C53, a1);
214 vxor(*out2, *out2, x1);
216 vxor(x25A1A797, x03FF3033, x265E97A4);
217 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
218 vsel(x66559355, x665A93AC, a2, a5);
219 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
221 vxor(x9A5A5C60, x03FF3033, x99A56C53);
222 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
223 vxor(x87698DB4, x5713754C, xD07AF8F8);
224 vxor(xE13C1EE1, x66559355, x87698DB4);
226 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
227 vsel(x655B905E, x66559355, x05BB50EE, a4);
228 vsel(x00A55CFF, a5, a6, x9A5A5C60);
229 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
230 vsel(x0, x9E49915E, xE13C1EE1, a1);
231 vxor(*out1, *out1, x0);
233 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
234 vand(x05330022, x0F330F33, x05BB50EE);
235 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
236 vsel(x3, x5A66A599, xD2699876, a1);
237 vxor(*out4, *out4, x3);
239 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
240 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
241 vxor(xB32C6396, x665F9364, xD573F0F2);
242 vsel(x2, xB32C6396, x47B135C6, a1);
243 vxor(*out3, *out3, x2);
247 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
248 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
250 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
251 x0AF50F0F, x4CA36B59;
253 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
255 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
256 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
259 vsel(x0505AFAF, a5, a3, a1);
260 vsel(x0555AF55, x0505AFAF, a1, a4);
261 vxor(x0A5AA05A, a3, x0555AF55);
262 vsel(x46566456, a1, x0A5AA05A, a2);
263 vsel(x0A0A5F5F, a3, a5, a1);
264 vxor(x0AF55FA0, a4, x0A0A5F5F);
265 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
266 vxor(x4CA36B59, x46566456, x0AF50F0F);
268 vnot(xB35C94A6, x4CA36B59);
270 vsel(x01BB23BB, a4, a2, x0555AF55);
271 vxor(x5050FAFA, a1, x0505AFAF);
272 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
273 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
275 vnot(x56E9861E, xA91679E1);
277 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
278 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
279 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
280 vxor(xD2946D9A, x50E9FA1E, x827D9784);
281 vsel(x2, xD2946D9A, x4CA36B59, a6);
282 vxor(*out3, *out3, x2);
283 vsel(x3, xB35C94A6, xD2946D9A, a6);
284 vxor(*out4, *out4, x3);
286 vsel(x31F720B3, a2, a4, x0AF55FA0);
287 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
288 vxor(x4712A7AD, x56E9861E, x11FB21B3);
289 vxor(x9586CA37, xD2946D9A, x4712A7AD);
290 vsel(x0, x56E9861E, x9586CA37, a6);
291 vxor(*out1, *out1, x0);
292 vsel(x1, x9586CA37, xA91679E1, a6);
293 vxor(*out2, *out2, x1);
297 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
298 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
300 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
301 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
302 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
303 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
304 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
305 u32 x12E6283D, x9E47D3D4, x1A676AB4;
306 u32 x891556DF, xE5E77F82, x6CF2295D;
307 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
310 vsel(x550F550F, a1, a3, a5);
311 vnot(xAAF0AAF0, x550F550F);
312 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
313 vxor(x96C696C6, a2, xA5F5A5F5);
314 vxor(x00FFFF00, a5, a6);
315 vxor(x963969C6, x96C696C6, x00FFFF00);
317 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
318 vsel(xB73121F7, a2, x963969C6, x96C696C6);
319 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
320 vsel(x00558A5F, x1501DF0F, a5, a1);
321 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
323 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
324 vsel(x045157FD, a6, a1, x0679ED42);
325 vsel(xB32077FF, xB73121F7, a6, x045157FD);
326 vxor(x9D49D39C, x2E69A463, xB32077FF);
327 vsel(x2, x9D49D39C, x2E69A463, a4);
328 vxor(*out3, *out3, x2);
330 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
331 vsel(xF72577AF, xB32077FF, x550F550F, a1);
332 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
333 vsel(x1, x5BA4B81D, x963969C6, a4);
334 vxor(*out2, *out2, x1);
336 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
337 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
338 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
339 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
341 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
342 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
343 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
345 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
346 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
347 vxor(x6CF2295D, x891556DF, xE5E77F82);
348 vsel(x3, x1A35669A, x6CF2295D, a4);
349 vxor(*out4, *out4, x3);
351 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
352 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
353 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
354 vsel(x0, x369CC1D6, x1A676AB4, a4);
355 vxor(*out1, *out1, x0);
359 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
360 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
362 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
363 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
364 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
365 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
366 u32 x142956AB, x455D45DF, x1C3EE619;
367 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
368 u32 x840DBB67, x6DA19C1E, x925E63E1;
369 u32 x9C3CA761, x257A75D5, xB946D2B4;
372 vsel(x555500FF, a1, a4, a5);
373 vxor(x666633CC, a2, x555500FF);
374 vsel(x606F30CF, x666633CC, a4, a3);
375 vxor(x353A659A, a1, x606F30CF);
376 vxor(x353A9A65, a5, x353A659A);
377 vnot(xCAC5659A, x353A9A65);
379 vsel(x353A6565, x353A659A, x353A9A65, a4);
380 vsel(x0A3F0A6F, a3, a4, x353A6565);
381 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
382 vxor(x5963A3C6, x353A9A65, x6C5939A3);
384 vsel(x35FF659A, a4, x353A659A, x353A6565);
385 vxor(x3AF06A95, a3, x35FF659A);
386 vsel(x05CF0A9F, a4, a3, x353A9A65);
387 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
389 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
390 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
391 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
392 vsel(x0, xCAC5659A, x942D9A67, a6);
393 vxor(*out1, *out1, x0);
395 vsel(x142956AB, x353A659A, x942D9A67, a2);
396 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
397 vxor(x1C3EE619, x5963A3C6, x455D45DF);
398 vsel(x3, x5963A3C6, x1C3EE619, a6);
399 vxor(*out4, *out4, x3);
401 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
402 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
403 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
404 vxor(x69A49C79, x555500FF, x3CF19C86);
406 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
407 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
408 vnot(x925E63E1, x6DA19C1E);
409 vsel(x1, x925E63E1, x69A49C79, a6);
410 vxor(*out2, *out2, x1);
412 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
413 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
414 vxor(xB946D2B4, x9C3CA761, x257A75D5);
415 vsel(x2, x16E94A97, xB946D2B4, a6);
416 vxor(*out3, *out3, x2);
420 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
421 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
423 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
424 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
425 u32 x738F9C63, x11EF9867, x26DA9867;
426 u32 x4B4B9C63, x4B666663, x4E639396;
427 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
428 u32 xD728827B, x6698807B, x699C585B;
429 u32 x738C847B, xA4A71E18, x74878E78;
430 u32 x333D9639, x74879639, x8B7869C6;
433 vsel(x44447777, a2, a6, a3);
434 vxor(x4B4B7878, a4, x44447777);
435 vsel(x22772277, a3, a5, a2);
436 vsel(x0505F5F5, a6, a2, a4);
437 vsel(x220522F5, x22772277, x0505F5F5, a5);
438 vxor(x694E5A8D, x4B4B7878, x220522F5);
440 vxor(x00FFFF00, a5, a6);
441 vxor(x66666666, a2, a3);
442 vsel(x32353235, a3, x220522F5, a4);
443 vsel(x26253636, x66666666, x32353235, x4B4B7878);
444 vxor(x26DAC936, x00FFFF00, x26253636);
445 vsel(x0, x26DAC936, x694E5A8D, a1);
446 vxor(*out1, *out1, x0);
448 vxor(x738F9C63, a2, x26DAC936);
449 vsel(x11EF9867, x738F9C63, a5, x66666666);
450 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
452 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
453 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
454 vxor(x4E639396, x0505F5F5, x4B666663);
456 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
458 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
459 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
460 vsel(x1, xB14EE41D, x26DA9867, a1);
461 vxor(*out2, *out2, x1);
463 vxor(xD728827B, x66666666, xB14EE41D);
464 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
465 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
466 vsel(x2, x699C585B, x4E639396, a1);
467 vxor(*out3, *out3, x2);
469 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
470 vxor(xA4A71E18, x738F9C63, xD728827B);
471 vsel(x74878E78, x738C847B, xA4A71E18, a4);
473 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
474 vsel(x74879639, x74878E78, x333D9639, a6);
475 vnot(x8B7869C6, x74879639);
476 vsel(x3, x74878E78, x8B7869C6, a1);
477 vxor(*out4, *out4, x3);
481 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
482 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
484 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
485 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
486 u32 x3001F74E, x30555745, x693CD926;
487 u32 x0C0CD926, x0C3F25E9, x38D696A5;
489 u32 x03D2117B, xC778395B, xCB471CB2;
490 u32 x5425B13F, x56B3803F, x919AE965;
491 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
494 vsel(x0505F5F5, a5, a1, a3);
495 vxor(x05FAF50A, a4, x0505F5F5);
496 vsel(x0F0F00FF, a3, a4, a5);
497 vsel(x22227777, a2, a5, a1);
498 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
499 vxor(x34E9B34C, a2, x07DA807F);
501 vsel(x00FFF00F, x05FAF50A, a4, a3);
502 vsel(x0033FCCF, a5, x00FFF00F, a2);
503 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
504 vsel(x0C0C3F3F, a3, a5, a2);
505 vxor(x59698E63, x5565B15C, x0C0C3F3F);
507 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
508 vsel(x30555745, x3001F74E, a1, x00FFF00F);
509 vxor(x693CD926, x59698E63, x30555745);
510 vsel(x2, x693CD926, x59698E63, a6);
511 vxor(*out3, *out3, x2);
513 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
514 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
515 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
517 vnot(xC729695A, x38D696A5);
519 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
520 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
521 vxor(xCB471CB2, x0C3F25E9, xC778395B);
522 vsel(x1, xCB471CB2, x34E9B34C, a6);
523 vxor(*out2, *out2, x1);
525 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
526 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
527 vxor(x919AE965, xC729695A, x56B3803F);
528 vsel(x3, xC729695A, x919AE965, a6);
529 vxor(*out4, *out4, x3);
531 vsel(x17B3023F, x07DA807F, a2, x59698E63);
532 vor(x75555755, a1, x30555745);
533 vxor(x62E6556A, x17B3023F, x75555755);
534 vxor(xA59E6C31, xC778395B, x62E6556A);
535 vsel(x0, xA59E6C31, x38D696A5, a6);
536 vxor(*out1, *out1, x0);
539 #define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; }
575 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
576 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
577 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
578 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
579 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
580 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
581 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
582 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
583 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
584 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
585 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
586 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
587 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
588 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
589 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
590 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
594 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
597 #define myselx(a,b,c) ((c) ? (b) : (a))
599 sXXX_DECL u32 s001 = (0x001 & DESCRYPT_SALT) ? 0xffffffff : 0;
600 sXXX_DECL u32 s002 = (0x002 & DESCRYPT_SALT) ? 0xffffffff : 0;
601 sXXX_DECL u32 s004 = (0x004 & DESCRYPT_SALT) ? 0xffffffff : 0;
602 sXXX_DECL u32 s008 = (0x008 & DESCRYPT_SALT) ? 0xffffffff : 0;
603 sXXX_DECL u32 s010 = (0x010 & DESCRYPT_SALT) ? 0xffffffff : 0;
604 sXXX_DECL u32 s020 = (0x020 & DESCRYPT_SALT) ? 0xffffffff : 0;
605 sXXX_DECL u32 s040 = (0x040 & DESCRYPT_SALT) ? 0xffffffff : 0;
606 sXXX_DECL u32 s080 = (0x080 & DESCRYPT_SALT) ? 0xffffffff : 0;
607 sXXX_DECL u32 s100 = (0x100 & DESCRYPT_SALT) ? 0xffffffff : 0;
608 sXXX_DECL u32 s200 = (0x200 & DESCRYPT_SALT) ? 0xffffffff : 0;
609 sXXX_DECL u32 s400 = (0x400 & DESCRYPT_SALT) ? 0xffffffff : 0;
610 sXXX_DECL u32 s800 = (0x800 & DESCRYPT_SALT) ? 0xffffffff : 0;
612 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
613 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
614 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
615 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
616 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
617 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
618 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
619 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
621 for (u32 ii = 0; ii < 25; ii++)
624 for (u32 i = 0; i < 16; i++)
628 case 0: KEYSET00; break;
629 case 1: KEYSET01; break;
630 case 2: KEYSET02; break;
631 case 3: KEYSET03; break;
632 case 4: KEYSET04; break;
633 case 5: KEYSET05; break;
634 case 6: KEYSET06; break;
635 case 7: KEYSET07; break;
636 case 8: KEYSET10; break;
637 case 9: KEYSET11; break;
638 case 10: KEYSET12; break;
639 case 11: KEYSET13; break;
640 case 12: KEYSET14; break;
641 case 13: KEYSET15; break;
642 case 14: KEYSET16; break;
643 case 15: KEYSET17; break;
646 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
647 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
648 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
649 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
650 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
651 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
652 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
653 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
666 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
668 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
669 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
670 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
671 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
672 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
673 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
674 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
675 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
676 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
677 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
678 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
679 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
681 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
682 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
683 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
684 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
685 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
686 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
687 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
688 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
691 * descrypt uses all zero data input, so we can optimize this first round of des
696 s1(k00, k01, k02, k03, k04, k05, &D08, &D16, &D22, &D30);
697 s2(k06, k07, k08, k09, k10, k11, &D12, &D27, &D01, &D17);
698 s3(k12, k13, k14, k15, k16, k17, &D23, &D15, &D29, &D05);
699 s4(k18, k19, k20, k21, k22, k23, &D25, &D19, &D09, &D00);
700 s5(k24, k25, k26, k27, k28, k29, &D07, &D13, &D24, &D02);
701 s6(k30, k31, k32, k33, k34, k35, &D03, &D28, &D10, &D18);
702 s7(k36, k37, k38, k39, k40, k41, &D31, &D11, &D21, &D06);
703 s8(k42, k43, k44, k45, k46, k47, &D04, &D26, &D14, &D20);
707 for (u32 i = 1; i < 16; i++)
711 case 0: KEYSET00; break;
712 case 1: KEYSET01; break;
713 case 2: KEYSET02; break;
714 case 3: KEYSET03; break;
715 case 4: KEYSET04; break;
716 case 5: KEYSET05; break;
717 case 6: KEYSET06; break;
718 case 7: KEYSET07; break;
719 case 8: KEYSET10; break;
720 case 9: KEYSET11; break;
721 case 10: KEYSET12; break;
722 case 11: KEYSET13; break;
723 case 12: KEYSET14; break;
724 case 13: KEYSET15; break;
725 case 14: KEYSET16; break;
726 case 15: KEYSET17; break;
729 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
730 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
731 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
732 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
733 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
734 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
735 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
736 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
743 for (u32 ii = 1; ii < 25; ii++)
745 for (u32 i = 0; i < 16; i++)
749 case 0: KEYSET00; break;
750 case 1: KEYSET01; break;
751 case 2: KEYSET02; break;
752 case 3: KEYSET03; break;
753 case 4: KEYSET04; break;
754 case 5: KEYSET05; break;
755 case 6: KEYSET06; break;
756 case 7: KEYSET07; break;
757 case 8: KEYSET10; break;
758 case 9: KEYSET11; break;
759 case 10: KEYSET12; break;
760 case 11: KEYSET13; break;
761 case 12: KEYSET14; break;
762 case 13: KEYSET15; break;
763 case 14: KEYSET16; break;
764 case 15: KEYSET17; break;
767 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
768 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
769 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
770 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
771 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
772 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
773 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
774 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
787 static void transpose32c (u32 data[32])
789 #define swap(x,y,j,m) \
790 t = ((x) ^ ((y) >> (j))) & (m); \
792 (y) = (y) ^ (t << (j));
796 swap (data[ 0], data[16], 16, 0x0000ffff);
797 swap (data[ 1], data[17], 16, 0x0000ffff);
798 swap (data[ 2], data[18], 16, 0x0000ffff);
799 swap (data[ 3], data[19], 16, 0x0000ffff);
800 swap (data[ 4], data[20], 16, 0x0000ffff);
801 swap (data[ 5], data[21], 16, 0x0000ffff);
802 swap (data[ 6], data[22], 16, 0x0000ffff);
803 swap (data[ 7], data[23], 16, 0x0000ffff);
804 swap (data[ 8], data[24], 16, 0x0000ffff);
805 swap (data[ 9], data[25], 16, 0x0000ffff);
806 swap (data[10], data[26], 16, 0x0000ffff);
807 swap (data[11], data[27], 16, 0x0000ffff);
808 swap (data[12], data[28], 16, 0x0000ffff);
809 swap (data[13], data[29], 16, 0x0000ffff);
810 swap (data[14], data[30], 16, 0x0000ffff);
811 swap (data[15], data[31], 16, 0x0000ffff);
812 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
813 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
814 swap (data[ 2], data[10], 8, 0x00ff00ff);
815 swap (data[ 3], data[11], 8, 0x00ff00ff);
816 swap (data[ 4], data[12], 8, 0x00ff00ff);
817 swap (data[ 5], data[13], 8, 0x00ff00ff);
818 swap (data[ 6], data[14], 8, 0x00ff00ff);
819 swap (data[ 7], data[15], 8, 0x00ff00ff);
820 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
821 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
822 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
823 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
824 swap (data[ 0], data[ 2], 2, 0x33333333);
825 swap (data[ 1], data[ 3], 2, 0x33333333);
826 swap (data[ 0], data[ 1], 1, 0x55555555);
827 swap (data[ 2], data[ 3], 1, 0x55555555);
828 swap (data[ 4], data[ 6], 2, 0x33333333);
829 swap (data[ 5], data[ 7], 2, 0x33333333);
830 swap (data[ 4], data[ 5], 1, 0x55555555);
831 swap (data[ 6], data[ 7], 1, 0x55555555);
832 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
833 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
834 swap (data[10], data[14], 4, 0x0f0f0f0f);
835 swap (data[11], data[15], 4, 0x0f0f0f0f);
836 swap (data[ 8], data[10], 2, 0x33333333);
837 swap (data[ 9], data[11], 2, 0x33333333);
838 swap (data[ 8], data[ 9], 1, 0x55555555);
839 swap (data[10], data[11], 1, 0x55555555);
840 swap (data[12], data[14], 2, 0x33333333);
841 swap (data[13], data[15], 2, 0x33333333);
842 swap (data[12], data[13], 1, 0x55555555);
843 swap (data[14], data[15], 1, 0x55555555);
844 swap (data[16], data[24], 8, 0x00ff00ff);
845 swap (data[17], data[25], 8, 0x00ff00ff);
846 swap (data[18], data[26], 8, 0x00ff00ff);
847 swap (data[19], data[27], 8, 0x00ff00ff);
848 swap (data[20], data[28], 8, 0x00ff00ff);
849 swap (data[21], data[29], 8, 0x00ff00ff);
850 swap (data[22], data[30], 8, 0x00ff00ff);
851 swap (data[23], data[31], 8, 0x00ff00ff);
852 swap (data[16], data[20], 4, 0x0f0f0f0f);
853 swap (data[17], data[21], 4, 0x0f0f0f0f);
854 swap (data[18], data[22], 4, 0x0f0f0f0f);
855 swap (data[19], data[23], 4, 0x0f0f0f0f);
856 swap (data[16], data[18], 2, 0x33333333);
857 swap (data[17], data[19], 2, 0x33333333);
858 swap (data[16], data[17], 1, 0x55555555);
859 swap (data[18], data[19], 1, 0x55555555);
860 swap (data[20], data[22], 2, 0x33333333);
861 swap (data[21], data[23], 2, 0x33333333);
862 swap (data[20], data[21], 1, 0x55555555);
863 swap (data[22], data[23], 1, 0x55555555);
864 swap (data[24], data[28], 4, 0x0f0f0f0f);
865 swap (data[25], data[29], 4, 0x0f0f0f0f);
866 swap (data[26], data[30], 4, 0x0f0f0f0f);
867 swap (data[27], data[31], 4, 0x0f0f0f0f);
868 swap (data[24], data[26], 2, 0x33333333);
869 swap (data[25], data[27], 2, 0x33333333);
870 swap (data[24], data[25], 1, 0x55555555);
871 swap (data[26], data[27], 1, 0x55555555);
872 swap (data[28], data[30], 2, 0x33333333);
873 swap (data[29], data[31], 2, 0x33333333);
874 swap (data[28], data[29], 1, 0x55555555);
875 swap (data[30], data[31], 1, 0x55555555);
878 static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
884 const u32 gid = get_global_id (0);
885 const u32 lid = get_local_id (0);
891 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
897 const u32 K00 = pws[gid].i[ 0];
898 const u32 K01 = pws[gid].i[ 1];
899 const u32 K02 = pws[gid].i[ 2];
900 const u32 K03 = pws[gid].i[ 3];
901 const u32 K04 = pws[gid].i[ 4];
902 const u32 K05 = pws[gid].i[ 5];
903 const u32 K06 = pws[gid].i[ 6];
904 const u32 K07 = pws[gid].i[ 7];
905 const u32 K08 = pws[gid].i[ 8];
906 const u32 K09 = pws[gid].i[ 9];
907 const u32 K10 = pws[gid].i[10];
908 const u32 K11 = pws[gid].i[11];
909 const u32 K12 = pws[gid].i[12];
910 const u32 K13 = pws[gid].i[13];
911 const u32 K14 = pws[gid].i[14];
912 const u32 K15 = pws[gid].i[15];
913 const u32 K16 = pws[gid].i[16];
914 const u32 K17 = pws[gid].i[17];
915 const u32 K18 = pws[gid].i[18];
916 const u32 K19 = pws[gid].i[19];
917 const u32 K20 = pws[gid].i[20];
918 const u32 K21 = pws[gid].i[21];
919 const u32 K22 = pws[gid].i[22];
920 const u32 K23 = pws[gid].i[23];
921 const u32 K24 = pws[gid].i[24];
922 const u32 K25 = pws[gid].i[25];
923 const u32 K26 = pws[gid].i[26];
924 const u32 K27 = pws[gid].i[27];
925 const u32 K28 = pws[gid].i[28];
926 const u32 K29 = pws[gid].i[29];
927 const u32 K30 = pws[gid].i[30];
928 const u32 K31 = pws[gid].i[31];
929 const u32 K32 = pws[gid].i[32];
930 const u32 K33 = pws[gid].i[33];
931 const u32 K34 = pws[gid].i[34];
932 const u32 K35 = pws[gid].i[35];
933 const u32 K36 = pws[gid].i[36];
934 const u32 K37 = pws[gid].i[37];
935 const u32 K38 = pws[gid].i[38];
936 const u32 K39 = pws[gid].i[39];
937 const u32 K40 = pws[gid].i[40];
938 const u32 K41 = pws[gid].i[41];
939 const u32 K42 = pws[gid].i[42];
940 const u32 K43 = pws[gid].i[43];
941 const u32 K44 = pws[gid].i[44];
942 const u32 K45 = pws[gid].i[45];
943 const u32 K46 = pws[gid].i[46];
944 const u32 K47 = pws[gid].i[47];
945 const u32 K48 = pws[gid].i[48];
946 const u32 K49 = pws[gid].i[49];
947 const u32 K50 = pws[gid].i[50];
948 const u32 K51 = pws[gid].i[51];
949 const u32 K52 = pws[gid].i[52];
950 const u32 K53 = pws[gid].i[53];
951 const u32 K54 = pws[gid].i[54];
952 const u32 K55 = pws[gid].i[55];
958 const u32 bfs_cnt = bfs_cnt;
960 const u32 pc_pos = get_local_id (1);
962 const u32 il_pos = pc_pos * 32;
993 k00 |= words_buf_r[pc_pos].b[ 0];
994 k01 |= words_buf_r[pc_pos].b[ 1];
995 k02 |= words_buf_r[pc_pos].b[ 2];
996 k03 |= words_buf_r[pc_pos].b[ 3];
997 k04 |= words_buf_r[pc_pos].b[ 4];
998 k05 |= words_buf_r[pc_pos].b[ 5];
999 k06 |= words_buf_r[pc_pos].b[ 6];
1000 k07 |= words_buf_r[pc_pos].b[ 7];
1001 k08 |= words_buf_r[pc_pos].b[ 8];
1002 k09 |= words_buf_r[pc_pos].b[ 9];
1003 k10 |= words_buf_r[pc_pos].b[10];
1004 k11 |= words_buf_r[pc_pos].b[11];
1005 k12 |= words_buf_r[pc_pos].b[12];
1006 k13 |= words_buf_r[pc_pos].b[13];
1007 k14 |= words_buf_r[pc_pos].b[14];
1008 k15 |= words_buf_r[pc_pos].b[15];
1009 k16 |= words_buf_r[pc_pos].b[16];
1010 k17 |= words_buf_r[pc_pos].b[17];
1011 k18 |= words_buf_r[pc_pos].b[18];
1012 k19 |= words_buf_r[pc_pos].b[19];
1013 k20 |= words_buf_r[pc_pos].b[20];
1014 k21 |= words_buf_r[pc_pos].b[21];
1015 k22 |= words_buf_r[pc_pos].b[22];
1016 k23 |= words_buf_r[pc_pos].b[23];
1017 k24 |= words_buf_r[pc_pos].b[24];
1018 k25 |= words_buf_r[pc_pos].b[25];
1019 k26 |= words_buf_r[pc_pos].b[26];
1020 k27 |= words_buf_r[pc_pos].b[27];
1090 k00, k01, k02, k03, k04, k05, k06,
1091 k07, k08, k09, k10, k11, k12, k13,
1092 k14, k15, k16, k17, k18, k19, k20,
1093 k21, k22, k23, k24, k25, k26, k27,
1094 K28, K29, K30, K31, K32, K33, K34,
1095 K35, K36, K37, K38, K39, K40, K41,
1096 K42, K43, K44, K45, K46, K47, K48,
1097 K49, K50, K51, K52, K53, K54, K55,
1098 D00, D01, D02, D03, D04, D05, D06, D07,
1099 D08, D09, D10, D11, D12, D13, D14, D15,
1100 D16, D17, D18, D19, D20, D21, D22, D23,
1101 D24, D25, D26, D27, D28, D29, D30, D31,
1102 D32, D33, D34, D35, D36, D37, D38, D39,
1103 D40, D41, D42, D43, D44, D45, D46, D47,
1104 D48, D49, D50, D51, D52, D53, D54, D55,
1105 D56, D57, D58, D59, D60, D61, D62, D63
1175 if (digests_cnt < 16)
1177 for (u32 d = 0; d < digests_cnt; d++)
1179 const u32 final_hash_pos = digests_offset + d;
1181 if (hashes_shown[final_hash_pos]) continue;
1185 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1186 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1191 for (int i = 0; i < 32; i++)
1193 const u32 b0 = -((search[0] >> i) & 1);
1194 const u32 b1 = -((search[1] >> i) & 1);
1196 tmpResult |= out[ 0 + i] ^ b0;
1197 tmpResult |= out[32 + i] ^ b1;
1200 if (tmpResult == 0xffffffff) continue;
1202 const u32 slice = 31 - clz (~tmpResult);
1204 const u32 r0 = search[0];
1205 const u32 r1 = search[1];
1218 for (int i = 0; i < 32; i++)
1220 out0[i] = out[ 0 + 31 - i];
1221 out1[i] = out[32 + 31 - i];
1224 transpose32c (out0);
1225 transpose32c (out1);
1228 for (int slice = 0; slice < 32; slice++)
1230 const u32 r0 = out0[31 - slice];
1231 const u32 r1 = out1[31 - slice];
1240 static void m01500s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1246 const u32 gid = get_global_id (0);
1247 const u32 lid = get_local_id (0);
1253 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1328 const u32 K00 = pws[gid].i[ 0];
1329 const u32 K01 = pws[gid].i[ 1];
1330 const u32 K02 = pws[gid].i[ 2];
1331 const u32 K03 = pws[gid].i[ 3];
1332 const u32 K04 = pws[gid].i[ 4];
1333 const u32 K05 = pws[gid].i[ 5];
1334 const u32 K06 = pws[gid].i[ 6];
1335 const u32 K07 = pws[gid].i[ 7];
1336 const u32 K08 = pws[gid].i[ 8];
1337 const u32 K09 = pws[gid].i[ 9];
1338 const u32 K10 = pws[gid].i[10];
1339 const u32 K11 = pws[gid].i[11];
1340 const u32 K12 = pws[gid].i[12];
1341 const u32 K13 = pws[gid].i[13];
1342 const u32 K14 = pws[gid].i[14];
1343 const u32 K15 = pws[gid].i[15];
1344 const u32 K16 = pws[gid].i[16];
1345 const u32 K17 = pws[gid].i[17];
1346 const u32 K18 = pws[gid].i[18];
1347 const u32 K19 = pws[gid].i[19];
1348 const u32 K20 = pws[gid].i[20];
1349 const u32 K21 = pws[gid].i[21];
1350 const u32 K22 = pws[gid].i[22];
1351 const u32 K23 = pws[gid].i[23];
1352 const u32 K24 = pws[gid].i[24];
1353 const u32 K25 = pws[gid].i[25];
1354 const u32 K26 = pws[gid].i[26];
1355 const u32 K27 = pws[gid].i[27];
1356 const u32 K28 = pws[gid].i[28];
1357 const u32 K29 = pws[gid].i[29];
1358 const u32 K30 = pws[gid].i[30];
1359 const u32 K31 = pws[gid].i[31];
1360 const u32 K32 = pws[gid].i[32];
1361 const u32 K33 = pws[gid].i[33];
1362 const u32 K34 = pws[gid].i[34];
1363 const u32 K35 = pws[gid].i[35];
1364 const u32 K36 = pws[gid].i[36];
1365 const u32 K37 = pws[gid].i[37];
1366 const u32 K38 = pws[gid].i[38];
1367 const u32 K39 = pws[gid].i[39];
1368 const u32 K40 = pws[gid].i[40];
1369 const u32 K41 = pws[gid].i[41];
1370 const u32 K42 = pws[gid].i[42];
1371 const u32 K43 = pws[gid].i[43];
1372 const u32 K44 = pws[gid].i[44];
1373 const u32 K45 = pws[gid].i[45];
1374 const u32 K46 = pws[gid].i[46];
1375 const u32 K47 = pws[gid].i[47];
1376 const u32 K48 = pws[gid].i[48];
1377 const u32 K49 = pws[gid].i[49];
1378 const u32 K50 = pws[gid].i[50];
1379 const u32 K51 = pws[gid].i[51];
1380 const u32 K52 = pws[gid].i[52];
1381 const u32 K53 = pws[gid].i[53];
1382 const u32 K54 = pws[gid].i[54];
1383 const u32 K55 = pws[gid].i[55];
1389 const u32 pc_pos = get_local_id (1);
1391 const u32 il_pos = pc_pos * 32;
1422 k00 |= words_buf_r[pc_pos].b[ 0];
1423 k01 |= words_buf_r[pc_pos].b[ 1];
1424 k02 |= words_buf_r[pc_pos].b[ 2];
1425 k03 |= words_buf_r[pc_pos].b[ 3];
1426 k04 |= words_buf_r[pc_pos].b[ 4];
1427 k05 |= words_buf_r[pc_pos].b[ 5];
1428 k06 |= words_buf_r[pc_pos].b[ 6];
1429 k07 |= words_buf_r[pc_pos].b[ 7];
1430 k08 |= words_buf_r[pc_pos].b[ 8];
1431 k09 |= words_buf_r[pc_pos].b[ 9];
1432 k10 |= words_buf_r[pc_pos].b[10];
1433 k11 |= words_buf_r[pc_pos].b[11];
1434 k12 |= words_buf_r[pc_pos].b[12];
1435 k13 |= words_buf_r[pc_pos].b[13];
1436 k14 |= words_buf_r[pc_pos].b[14];
1437 k15 |= words_buf_r[pc_pos].b[15];
1438 k16 |= words_buf_r[pc_pos].b[16];
1439 k17 |= words_buf_r[pc_pos].b[17];
1440 k18 |= words_buf_r[pc_pos].b[18];
1441 k19 |= words_buf_r[pc_pos].b[19];
1442 k20 |= words_buf_r[pc_pos].b[20];
1443 k21 |= words_buf_r[pc_pos].b[21];
1444 k22 |= words_buf_r[pc_pos].b[22];
1445 k23 |= words_buf_r[pc_pos].b[23];
1446 k24 |= words_buf_r[pc_pos].b[24];
1447 k25 |= words_buf_r[pc_pos].b[25];
1448 k26 |= words_buf_r[pc_pos].b[26];
1449 k27 |= words_buf_r[pc_pos].b[27];
1519 k00, k01, k02, k03, k04, k05, k06,
1520 k07, k08, k09, k10, k11, k12, k13,
1521 k14, k15, k16, k17, k18, k19, k20,
1522 k21, k22, k23, k24, k25, k26, k27,
1523 K28, K29, K30, K31, K32, K33, K34,
1524 K35, K36, K37, K38, K39, K40, K41,
1525 K42, K43, K44, K45, K46, K47, K48,
1526 K49, K50, K51, K52, K53, K54, K55,
1527 D00, D01, D02, D03, D04, D05, D06, D07,
1528 D08, D09, D10, D11, D12, D13, D14, D15,
1529 D16, D17, D18, D19, D20, D21, D22, D23,
1530 D24, D25, D26, D27, D28, D29, D30, D31,
1531 D32, D33, D34, D35, D36, D37, D38, D39,
1532 D40, D41, D42, D43, D44, D45, D46, D47,
1533 D48, D49, D50, D51, D52, D53, D54, D55,
1534 D56, D57, D58, D59, D60, D61, D62, D63
1539 tmpResult |= D00 ^ S00;
1540 tmpResult |= D01 ^ S01;
1541 tmpResult |= D02 ^ S02;
1542 tmpResult |= D03 ^ S03;
1543 tmpResult |= D04 ^ S04;
1544 tmpResult |= D05 ^ S05;
1545 tmpResult |= D06 ^ S06;
1546 tmpResult |= D07 ^ S07;
1547 tmpResult |= D08 ^ S08;
1548 tmpResult |= D09 ^ S09;
1549 tmpResult |= D10 ^ S10;
1550 tmpResult |= D11 ^ S11;
1551 tmpResult |= D12 ^ S12;
1552 tmpResult |= D13 ^ S13;
1553 tmpResult |= D14 ^ S14;
1554 tmpResult |= D15 ^ S15;
1555 tmpResult |= D16 ^ S16;
1556 tmpResult |= D17 ^ S17;
1557 tmpResult |= D18 ^ S18;
1558 tmpResult |= D19 ^ S19;
1559 tmpResult |= D20 ^ S20;
1560 tmpResult |= D21 ^ S21;
1561 tmpResult |= D22 ^ S22;
1562 tmpResult |= D23 ^ S23;
1563 tmpResult |= D24 ^ S24;
1564 tmpResult |= D25 ^ S25;
1565 tmpResult |= D26 ^ S26;
1566 tmpResult |= D27 ^ S27;
1567 tmpResult |= D28 ^ S28;
1568 tmpResult |= D29 ^ S29;
1569 tmpResult |= D30 ^ S30;
1570 tmpResult |= D31 ^ S31;
1571 tmpResult |= D32 ^ S32;
1572 tmpResult |= D33 ^ S33;
1573 tmpResult |= D34 ^ S34;
1574 tmpResult |= D35 ^ S35;
1575 tmpResult |= D36 ^ S36;
1576 tmpResult |= D37 ^ S37;
1577 tmpResult |= D38 ^ S38;
1578 tmpResult |= D39 ^ S39;
1579 tmpResult |= D40 ^ S40;
1580 tmpResult |= D41 ^ S41;
1581 tmpResult |= D42 ^ S42;
1582 tmpResult |= D43 ^ S43;
1583 tmpResult |= D44 ^ S44;
1584 tmpResult |= D45 ^ S45;
1585 tmpResult |= D46 ^ S46;
1586 tmpResult |= D47 ^ S47;
1588 if (tmpResult == 0xffffffff) return;
1590 tmpResult |= D48 ^ S48;
1591 tmpResult |= D49 ^ S49;
1592 tmpResult |= D50 ^ S50;
1593 tmpResult |= D51 ^ S51;
1594 tmpResult |= D52 ^ S52;
1595 tmpResult |= D53 ^ S53;
1596 tmpResult |= D54 ^ S54;
1597 tmpResult |= D55 ^ S55;
1598 tmpResult |= D56 ^ S56;
1599 tmpResult |= D57 ^ S57;
1600 tmpResult |= D58 ^ S58;
1601 tmpResult |= D59 ^ S59;
1602 tmpResult |= D60 ^ S60;
1603 tmpResult |= D61 ^ S61;
1604 tmpResult |= D62 ^ S62;
1605 tmpResult |= D63 ^ S63;
1607 if (tmpResult == 0xffffffff) return;
1609 const u32 slice = 31 - clz (~tmpResult);
1615 // transpose bitslice base : easy because no overlapping buffers
1616 // mod : attention race conditions, need different buffers for *in and *out
1619 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_tb (__global pw_t *pws)
1621 const u32 gid = get_global_id (0);
1623 const u32 w0 = pws[gid].i[0];
1624 const u32 w1 = pws[gid].i[1];
1626 const u32 w0s = (w0 << 1) & 0xfefefefe;
1627 const u32 w1s = (w1 << 1) & 0xfefefefe;
1630 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1632 pws[gid].i[j + 0 + 0] = -((w0s >> (i + 7)) & 1);
1633 pws[gid].i[j + 0 + 1] = -((w0s >> (i + 6)) & 1);
1634 pws[gid].i[j + 0 + 2] = -((w0s >> (i + 5)) & 1);
1635 pws[gid].i[j + 0 + 3] = -((w0s >> (i + 4)) & 1);
1636 pws[gid].i[j + 0 + 4] = -((w0s >> (i + 3)) & 1);
1637 pws[gid].i[j + 0 + 5] = -((w0s >> (i + 2)) & 1);
1638 pws[gid].i[j + 0 + 6] = -((w0s >> (i + 1)) & 1);
1642 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1644 pws[gid].i[j + 28 + 0] = -((w1s >> (i + 7)) & 1);
1645 pws[gid].i[j + 28 + 1] = -((w1s >> (i + 6)) & 1);
1646 pws[gid].i[j + 28 + 2] = -((w1s >> (i + 5)) & 1);
1647 pws[gid].i[j + 28 + 3] = -((w1s >> (i + 4)) & 1);
1648 pws[gid].i[j + 28 + 4] = -((w1s >> (i + 3)) & 1);
1649 pws[gid].i[j + 28 + 5] = -((w1s >> (i + 2)) & 1);
1650 pws[gid].i[j + 28 + 6] = -((w1s >> (i + 1)) & 1);
1654 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
1656 const u32 gid = get_global_id (0);
1657 const u32 lid = get_local_id (0);
1659 const u32 block = gid / 32;
1660 const u32 slice = gid % 32;
1662 const u32 w0 = mod[gid];
1664 const u32 w0s = (w0 << 1) & 0xfefefefe;
1667 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1669 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
1670 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
1671 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
1672 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
1673 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
1674 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
1675 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
1679 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1685 const u32 gid = get_global_id (0);
1686 const u32 lid = get_local_id (0);
1687 const u32 vid = get_local_id (1);
1689 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1690 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1692 __local u32 s_S[64];
1696 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1700 s_S[32 + vid] = -((s1 >> vid) & 1);
1703 barrier (CLK_LOCAL_MEM_FENCE);
1705 if (gid >= gid_max) return;
1711 m01500m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1714 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1718 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1722 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1728 const u32 gid = get_global_id (0);
1729 const u32 lid = get_local_id (0);
1730 const u32 vid = get_local_id (1);
1732 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1733 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1735 __local u32 s_S[64];
1739 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1743 s_S[32 + vid] = -((s1 >> vid) & 1);
1746 barrier (CLK_LOCAL_MEM_FENCE);
1748 if (gid >= gid_max) return;
1754 m01500s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1757 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1761 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)