2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
30 #include "include/kernel_functions.c"
31 #include "types_amd.c"
32 #include "common_amd.c"
35 #define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c"
36 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c"
40 #define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c"
41 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c"
45 #define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c"
46 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c"
49 #define KXX_DECL volatile
50 #define sXXX_DECL volatile
52 //#define myselx(a,b,c) ((c) ? (b) : (a))
53 //#define myselx(a,b,c) ((b & c) | (a & ~c))
54 #define myselx(a,b,c) bitselect ((a), (b), (c))
57 * Bitslice DES S-boxes making use of a vector conditional select operation
58 * (e.g., vsel on PowerPC with AltiVec).
60 * Gate counts: 36 33 33 26 35 34 34 32
63 * Several same-gate-count expressions for each S-box are included (for use on
64 * different CPUs/GPUs).
66 * These Boolean expressions corresponding to DES S-boxes have been generated
67 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
68 * John the Ripper password cracker: http://www.openwall.com/john/
69 * Being mathematical formulas, they are not copyrighted and are free for reuse
72 * This file (a specific representation of the S-box expressions, surrounding
73 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
74 * Redistribution and use in source and binary forms, with or without
75 * modification, are permitted. (This is a heavily cut-down "BSD license".)
77 * The effort has been sponsored by Rapid7: http://www.rapid7.com
80 #define vnot(dst, a) (dst) = ~(a)
81 #define vand(dst, a, b) (dst) = (a) & (b)
82 #define vor(dst, a, b) (dst) = (a) | (b)
83 #define vandn(dst, a, b) (dst) = (a) & ~(b)
84 #define vxor(dst, a, b) (dst) = (a) ^ (b)
85 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
88 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
89 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
91 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
93 u32 x55AFD1B7, x3C3C69C3, x6993B874;
94 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
95 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
96 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
97 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
98 u32 x0DBCE883, x3A25A215, x37994A96;
99 u32 x8A487EA7, x8B480F07, xB96C2D16;
102 vsel(x0F0F3333, a3, a2, a5);
103 vxor(x3C3C3C3C, a2, a3);
104 vor(x55FF55FF, a1, a4);
105 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
106 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
107 vxor(x09FCB7C0, a4, x0903B73F);
108 vxor(x5CA9E295, a1, x09FCB7C0);
110 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
111 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
112 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
114 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
115 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
116 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
117 vxor(x529E962D, x0F0F3333, x5D91A51E);
119 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
120 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
121 vsel(x428679F3, a5, x4B8771A3, x529E962D);
122 vxor(x6B68D433, x29EEADC0, x428679F3);
124 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
125 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
126 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
127 vnot(x94D83B6C, x6B27C493);
128 vsel(x0, x94D83B6C, x6B68D433, a6);
129 vxor(*out1, *out1, x0);
131 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
132 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
133 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
134 vxor(xD6E19C32, x529E962D, x847F0A1F);
135 vsel(x1, xD6E19C32, x5CA9E295, a6);
136 vxor(*out2, *out2, x1);
138 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
139 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
140 vxor(x37994A96, x0DBCE883, x3A25A215);
141 vsel(x3, x37994A96, x529E962D, a6);
142 vxor(*out4, *out4, x3);
144 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
145 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
146 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
147 vsel(x2, xB96C2D16, x6993B874, a6);
148 vxor(*out3, *out3, x2);
152 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
153 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
155 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
156 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
157 u32 x0F5AF03C, x6600FF56, x87A5F09C;
158 u32 xA55A963C, x3C69C30F, xB44BC32D;
159 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
160 u32 xB46C662D, x278DB412, xB66CB43B;
161 u32 xD2DC4E52, x27993333, xD2994E33;
162 u32 x278D0F2D, x2E0E547B, x09976748;
165 vsel(x55553333, a1, a3, a6);
166 vsel(x0055FF33, a6, x55553333, a5);
167 vsel(x33270F03, a3, a4, x0055FF33);
168 vxor(x66725A56, a1, x33270F03);
169 vxor(x00FFFF00, a5, a6);
170 vxor(x668DA556, x66725A56, x00FFFF00);
172 vsel(x0F0F5A56, a4, x66725A56, a6);
173 vnot(xF0F0A5A9, x0F0F5A56);
174 vxor(xA5A5969A, x55553333, xF0F0A5A9);
175 vxor(xA55A699A, x00FFFF00, xA5A5969A);
176 vsel(x1, xA55A699A, x668DA556, a2);
177 vxor(*out2, *out2, x1);
179 vxor(x0F5AF03C, a4, x0055FF33);
180 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
181 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
183 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
184 vxor(x3C69C30F, a3, x0F5AF03C);
185 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
187 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
188 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
189 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
190 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
191 vsel(x0, x996C66D2, xB44BC32D, a2);
192 vxor(*out1, *out1, x0);
194 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
195 vsel(x278DB412, x668DA556, xA5A5969A, a1);
196 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
198 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
199 vsel(x27993333, x278DB412, a3, x0055FF33);
200 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
201 vsel(x3, x87A5F09C, xD2994E33, a2);
202 vxor(*out4, *out4, x3);
204 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
205 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
206 vxor(x09976748, x27993333, x2E0E547B);
207 vsel(x2, xB66CB43B, x09976748, a2);
208 vxor(*out3, *out3, x2);
212 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
213 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
215 u32 x0F330F33, x0F33F0CC, x5A66A599;
216 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
217 u32 x556BA09E, x665A93AC, x99A56C53;
218 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
219 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
220 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
221 u32 xD6599874, x05330022, xD2699876;
222 u32 x665F9364, xD573F0F2, xB32C6396;
225 vsel(x0F330F33, a4, a3, a5);
226 vxor(x0F33F0CC, a6, x0F330F33);
227 vxor(x5A66A599, a2, x0F33F0CC);
229 vsel(x2111B7BB, a3, a6, x5A66A599);
230 vsel(x03FF3033, a5, a3, x0F33F0CC);
231 vsel(x05BB50EE, a5, x0F33F0CC, a2);
232 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
233 vxor(x265E97A4, x2111B7BB, x074F201F);
235 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
236 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
237 vnot(x99A56C53, x665A93AC);
238 vsel(x1, x265E97A4, x99A56C53, a1);
239 vxor(*out2, *out2, x1);
241 vxor(x25A1A797, x03FF3033, x265E97A4);
242 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
243 vsel(x66559355, x665A93AC, a2, a5);
244 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
246 vxor(x9A5A5C60, x03FF3033, x99A56C53);
247 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
248 vxor(x87698DB4, x5713754C, xD07AF8F8);
249 vxor(xE13C1EE1, x66559355, x87698DB4);
251 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
252 vsel(x655B905E, x66559355, x05BB50EE, a4);
253 vsel(x00A55CFF, a5, a6, x9A5A5C60);
254 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
255 vsel(x0, x9E49915E, xE13C1EE1, a1);
256 vxor(*out1, *out1, x0);
258 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
259 vand(x05330022, x0F330F33, x05BB50EE);
260 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
261 vsel(x3, x5A66A599, xD2699876, a1);
262 vxor(*out4, *out4, x3);
264 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
265 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
266 vxor(xB32C6396, x665F9364, xD573F0F2);
267 vsel(x2, xB32C6396, x47B135C6, a1);
268 vxor(*out3, *out3, x2);
272 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
273 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
275 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
276 x0AF50F0F, x4CA36B59;
278 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
280 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
281 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
284 vsel(x0505AFAF, a5, a3, a1);
285 vsel(x0555AF55, x0505AFAF, a1, a4);
286 vxor(x0A5AA05A, a3, x0555AF55);
287 vsel(x46566456, a1, x0A5AA05A, a2);
288 vsel(x0A0A5F5F, a3, a5, a1);
289 vxor(x0AF55FA0, a4, x0A0A5F5F);
290 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
291 vxor(x4CA36B59, x46566456, x0AF50F0F);
293 vnot(xB35C94A6, x4CA36B59);
295 vsel(x01BB23BB, a4, a2, x0555AF55);
296 vxor(x5050FAFA, a1, x0505AFAF);
297 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
298 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
300 vnot(x56E9861E, xA91679E1);
302 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
303 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
304 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
305 vxor(xD2946D9A, x50E9FA1E, x827D9784);
306 vsel(x2, xD2946D9A, x4CA36B59, a6);
307 vxor(*out3, *out3, x2);
308 vsel(x3, xB35C94A6, xD2946D9A, a6);
309 vxor(*out4, *out4, x3);
311 vsel(x31F720B3, a2, a4, x0AF55FA0);
312 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
313 vxor(x4712A7AD, x56E9861E, x11FB21B3);
314 vxor(x9586CA37, xD2946D9A, x4712A7AD);
315 vsel(x0, x56E9861E, x9586CA37, a6);
316 vxor(*out1, *out1, x0);
317 vsel(x1, x9586CA37, xA91679E1, a6);
318 vxor(*out2, *out2, x1);
322 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
323 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
325 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
326 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
327 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
328 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
329 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
330 u32 x12E6283D, x9E47D3D4, x1A676AB4;
331 u32 x891556DF, xE5E77F82, x6CF2295D;
332 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
335 vsel(x550F550F, a1, a3, a5);
336 vnot(xAAF0AAF0, x550F550F);
337 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
338 vxor(x96C696C6, a2, xA5F5A5F5);
339 vxor(x00FFFF00, a5, a6);
340 vxor(x963969C6, x96C696C6, x00FFFF00);
342 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
343 vsel(xB73121F7, a2, x963969C6, x96C696C6);
344 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
345 vsel(x00558A5F, x1501DF0F, a5, a1);
346 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
348 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
349 vsel(x045157FD, a6, a1, x0679ED42);
350 vsel(xB32077FF, xB73121F7, a6, x045157FD);
351 vxor(x9D49D39C, x2E69A463, xB32077FF);
352 vsel(x2, x9D49D39C, x2E69A463, a4);
353 vxor(*out3, *out3, x2);
355 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
356 vsel(xF72577AF, xB32077FF, x550F550F, a1);
357 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
358 vsel(x1, x5BA4B81D, x963969C6, a4);
359 vxor(*out2, *out2, x1);
361 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
362 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
363 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
364 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
366 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
367 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
368 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
370 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
371 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
372 vxor(x6CF2295D, x891556DF, xE5E77F82);
373 vsel(x3, x1A35669A, x6CF2295D, a4);
374 vxor(*out4, *out4, x3);
376 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
377 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
378 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
379 vsel(x0, x369CC1D6, x1A676AB4, a4);
380 vxor(*out1, *out1, x0);
384 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
385 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
387 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
388 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
389 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
390 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
391 u32 x142956AB, x455D45DF, x1C3EE619;
392 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
393 u32 x840DBB67, x6DA19C1E, x925E63E1;
394 u32 x9C3CA761, x257A75D5, xB946D2B4;
397 vsel(x555500FF, a1, a4, a5);
398 vxor(x666633CC, a2, x555500FF);
399 vsel(x606F30CF, x666633CC, a4, a3);
400 vxor(x353A659A, a1, x606F30CF);
401 vxor(x353A9A65, a5, x353A659A);
402 vnot(xCAC5659A, x353A9A65);
404 vsel(x353A6565, x353A659A, x353A9A65, a4);
405 vsel(x0A3F0A6F, a3, a4, x353A6565);
406 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
407 vxor(x5963A3C6, x353A9A65, x6C5939A3);
409 vsel(x35FF659A, a4, x353A659A, x353A6565);
410 vxor(x3AF06A95, a3, x35FF659A);
411 vsel(x05CF0A9F, a4, a3, x353A9A65);
412 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
414 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
415 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
416 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
417 vsel(x0, xCAC5659A, x942D9A67, a6);
418 vxor(*out1, *out1, x0);
420 vsel(x142956AB, x353A659A, x942D9A67, a2);
421 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
422 vxor(x1C3EE619, x5963A3C6, x455D45DF);
423 vsel(x3, x5963A3C6, x1C3EE619, a6);
424 vxor(*out4, *out4, x3);
426 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
427 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
428 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
429 vxor(x69A49C79, x555500FF, x3CF19C86);
431 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
432 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
433 vnot(x925E63E1, x6DA19C1E);
434 vsel(x1, x925E63E1, x69A49C79, a6);
435 vxor(*out2, *out2, x1);
437 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
438 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
439 vxor(xB946D2B4, x9C3CA761, x257A75D5);
440 vsel(x2, x16E94A97, xB946D2B4, a6);
441 vxor(*out3, *out3, x2);
445 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
446 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
448 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
449 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
450 u32 x738F9C63, x11EF9867, x26DA9867;
451 u32 x4B4B9C63, x4B666663, x4E639396;
452 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
453 u32 xD728827B, x6698807B, x699C585B;
454 u32 x738C847B, xA4A71E18, x74878E78;
455 u32 x333D9639, x74879639, x8B7869C6;
458 vsel(x44447777, a2, a6, a3);
459 vxor(x4B4B7878, a4, x44447777);
460 vsel(x22772277, a3, a5, a2);
461 vsel(x0505F5F5, a6, a2, a4);
462 vsel(x220522F5, x22772277, x0505F5F5, a5);
463 vxor(x694E5A8D, x4B4B7878, x220522F5);
465 vxor(x00FFFF00, a5, a6);
466 vxor(x66666666, a2, a3);
467 vsel(x32353235, a3, x220522F5, a4);
468 vsel(x26253636, x66666666, x32353235, x4B4B7878);
469 vxor(x26DAC936, x00FFFF00, x26253636);
470 vsel(x0, x26DAC936, x694E5A8D, a1);
471 vxor(*out1, *out1, x0);
473 vxor(x738F9C63, a2, x26DAC936);
474 vsel(x11EF9867, x738F9C63, a5, x66666666);
475 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
477 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
478 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
479 vxor(x4E639396, x0505F5F5, x4B666663);
481 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
483 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
484 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
485 vsel(x1, xB14EE41D, x26DA9867, a1);
486 vxor(*out2, *out2, x1);
488 vxor(xD728827B, x66666666, xB14EE41D);
489 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
490 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
491 vsel(x2, x699C585B, x4E639396, a1);
492 vxor(*out3, *out3, x2);
494 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
495 vxor(xA4A71E18, x738F9C63, xD728827B);
496 vsel(x74878E78, x738C847B, xA4A71E18, a4);
498 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
499 vsel(x74879639, x74878E78, x333D9639, a6);
500 vnot(x8B7869C6, x74879639);
501 vsel(x3, x74878E78, x8B7869C6, a1);
502 vxor(*out4, *out4, x3);
506 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
507 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
509 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
510 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
511 u32 x3001F74E, x30555745, x693CD926;
512 u32 x0C0CD926, x0C3F25E9, x38D696A5;
514 u32 x03D2117B, xC778395B, xCB471CB2;
515 u32 x5425B13F, x56B3803F, x919AE965;
516 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
519 vsel(x0505F5F5, a5, a1, a3);
520 vxor(x05FAF50A, a4, x0505F5F5);
521 vsel(x0F0F00FF, a3, a4, a5);
522 vsel(x22227777, a2, a5, a1);
523 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
524 vxor(x34E9B34C, a2, x07DA807F);
526 vsel(x00FFF00F, x05FAF50A, a4, a3);
527 vsel(x0033FCCF, a5, x00FFF00F, a2);
528 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
529 vsel(x0C0C3F3F, a3, a5, a2);
530 vxor(x59698E63, x5565B15C, x0C0C3F3F);
532 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
533 vsel(x30555745, x3001F74E, a1, x00FFF00F);
534 vxor(x693CD926, x59698E63, x30555745);
535 vsel(x2, x693CD926, x59698E63, a6);
536 vxor(*out3, *out3, x2);
538 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
539 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
540 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
542 vnot(xC729695A, x38D696A5);
544 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
545 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
546 vxor(xCB471CB2, x0C3F25E9, xC778395B);
547 vsel(x1, xCB471CB2, x34E9B34C, a6);
548 vxor(*out2, *out2, x1);
550 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
551 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
552 vxor(x919AE965, xC729695A, x56B3803F);
553 vsel(x3, xC729695A, x919AE965, a6);
554 vxor(*out4, *out4, x3);
556 vsel(x17B3023F, x07DA807F, a2, x59698E63);
557 vor(x75555755, a1, x30555745);
558 vxor(x62E6556A, x17B3023F, x75555755);
559 vxor(xA59E6C31, xC778395B, x62E6556A);
560 vsel(x0, xA59E6C31, x38D696A5, a6);
561 vxor(*out1, *out1, x0);
564 #define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; }
600 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
601 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
602 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
603 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
604 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
605 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
606 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
607 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
608 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
609 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
610 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
611 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
612 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
613 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
614 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
615 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
619 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
622 #define myselx(a,b,c) ((c) ? (b) : (a))
624 sXXX_DECL u32 s001 = (0x001 & DESCRYPT_SALT) ? 0xffffffff : 0;
625 sXXX_DECL u32 s002 = (0x002 & DESCRYPT_SALT) ? 0xffffffff : 0;
626 sXXX_DECL u32 s004 = (0x004 & DESCRYPT_SALT) ? 0xffffffff : 0;
627 sXXX_DECL u32 s008 = (0x008 & DESCRYPT_SALT) ? 0xffffffff : 0;
628 sXXX_DECL u32 s010 = (0x010 & DESCRYPT_SALT) ? 0xffffffff : 0;
629 sXXX_DECL u32 s020 = (0x020 & DESCRYPT_SALT) ? 0xffffffff : 0;
630 sXXX_DECL u32 s040 = (0x040 & DESCRYPT_SALT) ? 0xffffffff : 0;
631 sXXX_DECL u32 s080 = (0x080 & DESCRYPT_SALT) ? 0xffffffff : 0;
632 sXXX_DECL u32 s100 = (0x100 & DESCRYPT_SALT) ? 0xffffffff : 0;
633 sXXX_DECL u32 s200 = (0x200 & DESCRYPT_SALT) ? 0xffffffff : 0;
634 sXXX_DECL u32 s400 = (0x400 & DESCRYPT_SALT) ? 0xffffffff : 0;
635 sXXX_DECL u32 s800 = (0x800 & DESCRYPT_SALT) ? 0xffffffff : 0;
637 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
638 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
639 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
640 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
641 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
642 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
643 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
644 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
646 for (u32 ii = 0; ii < 25; ii++)
649 for (u32 i = 0; i < 16; i++)
653 case 0: KEYSET00; break;
654 case 1: KEYSET01; break;
655 case 2: KEYSET02; break;
656 case 3: KEYSET03; break;
657 case 4: KEYSET04; break;
658 case 5: KEYSET05; break;
659 case 6: KEYSET06; break;
660 case 7: KEYSET07; break;
661 case 8: KEYSET10; break;
662 case 9: KEYSET11; break;
663 case 10: KEYSET12; break;
664 case 11: KEYSET13; break;
665 case 12: KEYSET14; break;
666 case 13: KEYSET15; break;
667 case 14: KEYSET16; break;
668 case 15: KEYSET17; break;
671 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
672 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
673 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
674 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
675 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
676 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
677 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
678 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
691 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
693 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
694 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
695 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
696 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
697 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
698 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
699 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
700 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
701 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
702 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
703 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
704 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
706 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
707 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
708 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
709 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
710 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
711 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
712 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
713 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
716 * descrypt uses all zero data input, so we can optimize this first round of des
721 s1(k00, k01, k02, k03, k04, k05, &D08, &D16, &D22, &D30);
722 s2(k06, k07, k08, k09, k10, k11, &D12, &D27, &D01, &D17);
723 s3(k12, k13, k14, k15, k16, k17, &D23, &D15, &D29, &D05);
724 s4(k18, k19, k20, k21, k22, k23, &D25, &D19, &D09, &D00);
725 s5(k24, k25, k26, k27, k28, k29, &D07, &D13, &D24, &D02);
726 s6(k30, k31, k32, k33, k34, k35, &D03, &D28, &D10, &D18);
727 s7(k36, k37, k38, k39, k40, k41, &D31, &D11, &D21, &D06);
728 s8(k42, k43, k44, k45, k46, k47, &D04, &D26, &D14, &D20);
732 for (u32 i = 1; i < 16; i++)
736 case 0: KEYSET00; break;
737 case 1: KEYSET01; break;
738 case 2: KEYSET02; break;
739 case 3: KEYSET03; break;
740 case 4: KEYSET04; break;
741 case 5: KEYSET05; break;
742 case 6: KEYSET06; break;
743 case 7: KEYSET07; break;
744 case 8: KEYSET10; break;
745 case 9: KEYSET11; break;
746 case 10: KEYSET12; break;
747 case 11: KEYSET13; break;
748 case 12: KEYSET14; break;
749 case 13: KEYSET15; break;
750 case 14: KEYSET16; break;
751 case 15: KEYSET17; break;
754 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
755 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
756 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
757 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
758 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
759 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
760 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
761 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
768 for (u32 ii = 1; ii < 25; ii++)
770 for (u32 i = 0; i < 16; i++)
774 case 0: KEYSET00; break;
775 case 1: KEYSET01; break;
776 case 2: KEYSET02; break;
777 case 3: KEYSET03; break;
778 case 4: KEYSET04; break;
779 case 5: KEYSET05; break;
780 case 6: KEYSET06; break;
781 case 7: KEYSET07; break;
782 case 8: KEYSET10; break;
783 case 9: KEYSET11; break;
784 case 10: KEYSET12; break;
785 case 11: KEYSET13; break;
786 case 12: KEYSET14; break;
787 case 13: KEYSET15; break;
788 case 14: KEYSET16; break;
789 case 15: KEYSET17; break;
792 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
793 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
794 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
795 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
796 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
797 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
798 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
799 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
812 static void transpose32c (u32 data[32])
814 #define swap(x,y,j,m) \
815 t = ((x) ^ ((y) >> (j))) & (m); \
817 (y) = (y) ^ (t << (j));
821 swap (data[ 0], data[16], 16, 0x0000ffff);
822 swap (data[ 1], data[17], 16, 0x0000ffff);
823 swap (data[ 2], data[18], 16, 0x0000ffff);
824 swap (data[ 3], data[19], 16, 0x0000ffff);
825 swap (data[ 4], data[20], 16, 0x0000ffff);
826 swap (data[ 5], data[21], 16, 0x0000ffff);
827 swap (data[ 6], data[22], 16, 0x0000ffff);
828 swap (data[ 7], data[23], 16, 0x0000ffff);
829 swap (data[ 8], data[24], 16, 0x0000ffff);
830 swap (data[ 9], data[25], 16, 0x0000ffff);
831 swap (data[10], data[26], 16, 0x0000ffff);
832 swap (data[11], data[27], 16, 0x0000ffff);
833 swap (data[12], data[28], 16, 0x0000ffff);
834 swap (data[13], data[29], 16, 0x0000ffff);
835 swap (data[14], data[30], 16, 0x0000ffff);
836 swap (data[15], data[31], 16, 0x0000ffff);
837 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
838 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
839 swap (data[ 2], data[10], 8, 0x00ff00ff);
840 swap (data[ 3], data[11], 8, 0x00ff00ff);
841 swap (data[ 4], data[12], 8, 0x00ff00ff);
842 swap (data[ 5], data[13], 8, 0x00ff00ff);
843 swap (data[ 6], data[14], 8, 0x00ff00ff);
844 swap (data[ 7], data[15], 8, 0x00ff00ff);
845 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
846 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
847 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
848 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
849 swap (data[ 0], data[ 2], 2, 0x33333333);
850 swap (data[ 1], data[ 3], 2, 0x33333333);
851 swap (data[ 0], data[ 1], 1, 0x55555555);
852 swap (data[ 2], data[ 3], 1, 0x55555555);
853 swap (data[ 4], data[ 6], 2, 0x33333333);
854 swap (data[ 5], data[ 7], 2, 0x33333333);
855 swap (data[ 4], data[ 5], 1, 0x55555555);
856 swap (data[ 6], data[ 7], 1, 0x55555555);
857 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
858 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
859 swap (data[10], data[14], 4, 0x0f0f0f0f);
860 swap (data[11], data[15], 4, 0x0f0f0f0f);
861 swap (data[ 8], data[10], 2, 0x33333333);
862 swap (data[ 9], data[11], 2, 0x33333333);
863 swap (data[ 8], data[ 9], 1, 0x55555555);
864 swap (data[10], data[11], 1, 0x55555555);
865 swap (data[12], data[14], 2, 0x33333333);
866 swap (data[13], data[15], 2, 0x33333333);
867 swap (data[12], data[13], 1, 0x55555555);
868 swap (data[14], data[15], 1, 0x55555555);
869 swap (data[16], data[24], 8, 0x00ff00ff);
870 swap (data[17], data[25], 8, 0x00ff00ff);
871 swap (data[18], data[26], 8, 0x00ff00ff);
872 swap (data[19], data[27], 8, 0x00ff00ff);
873 swap (data[20], data[28], 8, 0x00ff00ff);
874 swap (data[21], data[29], 8, 0x00ff00ff);
875 swap (data[22], data[30], 8, 0x00ff00ff);
876 swap (data[23], data[31], 8, 0x00ff00ff);
877 swap (data[16], data[20], 4, 0x0f0f0f0f);
878 swap (data[17], data[21], 4, 0x0f0f0f0f);
879 swap (data[18], data[22], 4, 0x0f0f0f0f);
880 swap (data[19], data[23], 4, 0x0f0f0f0f);
881 swap (data[16], data[18], 2, 0x33333333);
882 swap (data[17], data[19], 2, 0x33333333);
883 swap (data[16], data[17], 1, 0x55555555);
884 swap (data[18], data[19], 1, 0x55555555);
885 swap (data[20], data[22], 2, 0x33333333);
886 swap (data[21], data[23], 2, 0x33333333);
887 swap (data[20], data[21], 1, 0x55555555);
888 swap (data[22], data[23], 1, 0x55555555);
889 swap (data[24], data[28], 4, 0x0f0f0f0f);
890 swap (data[25], data[29], 4, 0x0f0f0f0f);
891 swap (data[26], data[30], 4, 0x0f0f0f0f);
892 swap (data[27], data[31], 4, 0x0f0f0f0f);
893 swap (data[24], data[26], 2, 0x33333333);
894 swap (data[25], data[27], 2, 0x33333333);
895 swap (data[24], data[25], 1, 0x55555555);
896 swap (data[26], data[27], 1, 0x55555555);
897 swap (data[28], data[30], 2, 0x33333333);
898 swap (data[29], data[31], 2, 0x33333333);
899 swap (data[28], data[29], 1, 0x55555555);
900 swap (data[30], data[31], 1, 0x55555555);
903 static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
909 const u32 gid = get_global_id (0);
910 const u32 lid = get_local_id (0);
916 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
922 const u32 K00 = pws[gid].i[ 0];
923 const u32 K01 = pws[gid].i[ 1];
924 const u32 K02 = pws[gid].i[ 2];
925 const u32 K03 = pws[gid].i[ 3];
926 const u32 K04 = pws[gid].i[ 4];
927 const u32 K05 = pws[gid].i[ 5];
928 const u32 K06 = pws[gid].i[ 6];
929 const u32 K07 = pws[gid].i[ 7];
930 const u32 K08 = pws[gid].i[ 8];
931 const u32 K09 = pws[gid].i[ 9];
932 const u32 K10 = pws[gid].i[10];
933 const u32 K11 = pws[gid].i[11];
934 const u32 K12 = pws[gid].i[12];
935 const u32 K13 = pws[gid].i[13];
936 const u32 K14 = pws[gid].i[14];
937 const u32 K15 = pws[gid].i[15];
938 const u32 K16 = pws[gid].i[16];
939 const u32 K17 = pws[gid].i[17];
940 const u32 K18 = pws[gid].i[18];
941 const u32 K19 = pws[gid].i[19];
942 const u32 K20 = pws[gid].i[20];
943 const u32 K21 = pws[gid].i[21];
944 const u32 K22 = pws[gid].i[22];
945 const u32 K23 = pws[gid].i[23];
946 const u32 K24 = pws[gid].i[24];
947 const u32 K25 = pws[gid].i[25];
948 const u32 K26 = pws[gid].i[26];
949 const u32 K27 = pws[gid].i[27];
950 const u32 K28 = pws[gid].i[28];
951 const u32 K29 = pws[gid].i[29];
952 const u32 K30 = pws[gid].i[30];
953 const u32 K31 = pws[gid].i[31];
954 const u32 K32 = pws[gid].i[32];
955 const u32 K33 = pws[gid].i[33];
956 const u32 K34 = pws[gid].i[34];
957 const u32 K35 = pws[gid].i[35];
958 const u32 K36 = pws[gid].i[36];
959 const u32 K37 = pws[gid].i[37];
960 const u32 K38 = pws[gid].i[38];
961 const u32 K39 = pws[gid].i[39];
962 const u32 K40 = pws[gid].i[40];
963 const u32 K41 = pws[gid].i[41];
964 const u32 K42 = pws[gid].i[42];
965 const u32 K43 = pws[gid].i[43];
966 const u32 K44 = pws[gid].i[44];
967 const u32 K45 = pws[gid].i[45];
968 const u32 K46 = pws[gid].i[46];
969 const u32 K47 = pws[gid].i[47];
970 const u32 K48 = pws[gid].i[48];
971 const u32 K49 = pws[gid].i[49];
972 const u32 K50 = pws[gid].i[50];
973 const u32 K51 = pws[gid].i[51];
974 const u32 K52 = pws[gid].i[52];
975 const u32 K53 = pws[gid].i[53];
976 const u32 K54 = pws[gid].i[54];
977 const u32 K55 = pws[gid].i[55];
983 const u32 bf_loops = bfs_cnt;
985 const u32 pc_pos = get_local_id (1);
987 const u32 il_pos = pc_pos * 32;
1018 k00 |= words_buf_r[pc_pos].b[ 0];
1019 k01 |= words_buf_r[pc_pos].b[ 1];
1020 k02 |= words_buf_r[pc_pos].b[ 2];
1021 k03 |= words_buf_r[pc_pos].b[ 3];
1022 k04 |= words_buf_r[pc_pos].b[ 4];
1023 k05 |= words_buf_r[pc_pos].b[ 5];
1024 k06 |= words_buf_r[pc_pos].b[ 6];
1025 k07 |= words_buf_r[pc_pos].b[ 7];
1026 k08 |= words_buf_r[pc_pos].b[ 8];
1027 k09 |= words_buf_r[pc_pos].b[ 9];
1028 k10 |= words_buf_r[pc_pos].b[10];
1029 k11 |= words_buf_r[pc_pos].b[11];
1030 k12 |= words_buf_r[pc_pos].b[12];
1031 k13 |= words_buf_r[pc_pos].b[13];
1032 k14 |= words_buf_r[pc_pos].b[14];
1033 k15 |= words_buf_r[pc_pos].b[15];
1034 k16 |= words_buf_r[pc_pos].b[16];
1035 k17 |= words_buf_r[pc_pos].b[17];
1036 k18 |= words_buf_r[pc_pos].b[18];
1037 k19 |= words_buf_r[pc_pos].b[19];
1038 k20 |= words_buf_r[pc_pos].b[20];
1039 k21 |= words_buf_r[pc_pos].b[21];
1040 k22 |= words_buf_r[pc_pos].b[22];
1041 k23 |= words_buf_r[pc_pos].b[23];
1042 k24 |= words_buf_r[pc_pos].b[24];
1043 k25 |= words_buf_r[pc_pos].b[25];
1044 k26 |= words_buf_r[pc_pos].b[26];
1045 k27 |= words_buf_r[pc_pos].b[27];
1115 k00, k01, k02, k03, k04, k05, k06,
1116 k07, k08, k09, k10, k11, k12, k13,
1117 k14, k15, k16, k17, k18, k19, k20,
1118 k21, k22, k23, k24, k25, k26, k27,
1119 K28, K29, K30, K31, K32, K33, K34,
1120 K35, K36, K37, K38, K39, K40, K41,
1121 K42, K43, K44, K45, K46, K47, K48,
1122 K49, K50, K51, K52, K53, K54, K55,
1123 D00, D01, D02, D03, D04, D05, D06, D07,
1124 D08, D09, D10, D11, D12, D13, D14, D15,
1125 D16, D17, D18, D19, D20, D21, D22, D23,
1126 D24, D25, D26, D27, D28, D29, D30, D31,
1127 D32, D33, D34, D35, D36, D37, D38, D39,
1128 D40, D41, D42, D43, D44, D45, D46, D47,
1129 D48, D49, D50, D51, D52, D53, D54, D55,
1130 D56, D57, D58, D59, D60, D61, D62, D63
1200 if (digests_cnt < 16)
1202 for (u32 d = 0; d < digests_cnt; d++)
1204 const u32 final_hash_pos = digests_offset + d;
1206 if (hashes_shown[final_hash_pos]) continue;
1210 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1211 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1216 for (int i = 0; i < 32; i++)
1218 const u32 b0 = -((search[0] >> i) & 1);
1219 const u32 b1 = -((search[1] >> i) & 1);
1221 tmpResult |= out[ 0 + i] ^ b0;
1222 tmpResult |= out[32 + i] ^ b1;
1225 if (tmpResult == 0xffffffff) continue;
1227 const u32 slice = 31 - clz (~tmpResult);
1229 const u32x r0 = search[0];
1230 const u32x r1 = search[1];
1234 #include VECT_COMPARE_M
1243 for (int i = 0; i < 32; i++)
1245 out0[i] = out[ 0 + 31 - i];
1246 out1[i] = out[32 + 31 - i];
1249 transpose32c (out0);
1250 transpose32c (out1);
1253 for (int slice = 0; slice < 32; slice++)
1255 const u32x r0 = out0[31 - slice];
1256 const u32x r1 = out1[31 - slice];
1260 #include VECT_COMPARE_M
1265 static void m01500s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1271 const u32 gid = get_global_id (0);
1272 const u32 lid = get_local_id (0);
1278 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1353 const u32 K00 = pws[gid].i[ 0];
1354 const u32 K01 = pws[gid].i[ 1];
1355 const u32 K02 = pws[gid].i[ 2];
1356 const u32 K03 = pws[gid].i[ 3];
1357 const u32 K04 = pws[gid].i[ 4];
1358 const u32 K05 = pws[gid].i[ 5];
1359 const u32 K06 = pws[gid].i[ 6];
1360 const u32 K07 = pws[gid].i[ 7];
1361 const u32 K08 = pws[gid].i[ 8];
1362 const u32 K09 = pws[gid].i[ 9];
1363 const u32 K10 = pws[gid].i[10];
1364 const u32 K11 = pws[gid].i[11];
1365 const u32 K12 = pws[gid].i[12];
1366 const u32 K13 = pws[gid].i[13];
1367 const u32 K14 = pws[gid].i[14];
1368 const u32 K15 = pws[gid].i[15];
1369 const u32 K16 = pws[gid].i[16];
1370 const u32 K17 = pws[gid].i[17];
1371 const u32 K18 = pws[gid].i[18];
1372 const u32 K19 = pws[gid].i[19];
1373 const u32 K20 = pws[gid].i[20];
1374 const u32 K21 = pws[gid].i[21];
1375 const u32 K22 = pws[gid].i[22];
1376 const u32 K23 = pws[gid].i[23];
1377 const u32 K24 = pws[gid].i[24];
1378 const u32 K25 = pws[gid].i[25];
1379 const u32 K26 = pws[gid].i[26];
1380 const u32 K27 = pws[gid].i[27];
1381 const u32 K28 = pws[gid].i[28];
1382 const u32 K29 = pws[gid].i[29];
1383 const u32 K30 = pws[gid].i[30];
1384 const u32 K31 = pws[gid].i[31];
1385 const u32 K32 = pws[gid].i[32];
1386 const u32 K33 = pws[gid].i[33];
1387 const u32 K34 = pws[gid].i[34];
1388 const u32 K35 = pws[gid].i[35];
1389 const u32 K36 = pws[gid].i[36];
1390 const u32 K37 = pws[gid].i[37];
1391 const u32 K38 = pws[gid].i[38];
1392 const u32 K39 = pws[gid].i[39];
1393 const u32 K40 = pws[gid].i[40];
1394 const u32 K41 = pws[gid].i[41];
1395 const u32 K42 = pws[gid].i[42];
1396 const u32 K43 = pws[gid].i[43];
1397 const u32 K44 = pws[gid].i[44];
1398 const u32 K45 = pws[gid].i[45];
1399 const u32 K46 = pws[gid].i[46];
1400 const u32 K47 = pws[gid].i[47];
1401 const u32 K48 = pws[gid].i[48];
1402 const u32 K49 = pws[gid].i[49];
1403 const u32 K50 = pws[gid].i[50];
1404 const u32 K51 = pws[gid].i[51];
1405 const u32 K52 = pws[gid].i[52];
1406 const u32 K53 = pws[gid].i[53];
1407 const u32 K54 = pws[gid].i[54];
1408 const u32 K55 = pws[gid].i[55];
1414 const u32 pc_pos = get_local_id (1);
1416 const u32 il_pos = pc_pos * 32;
1447 k00 |= words_buf_r[pc_pos].b[ 0];
1448 k01 |= words_buf_r[pc_pos].b[ 1];
1449 k02 |= words_buf_r[pc_pos].b[ 2];
1450 k03 |= words_buf_r[pc_pos].b[ 3];
1451 k04 |= words_buf_r[pc_pos].b[ 4];
1452 k05 |= words_buf_r[pc_pos].b[ 5];
1453 k06 |= words_buf_r[pc_pos].b[ 6];
1454 k07 |= words_buf_r[pc_pos].b[ 7];
1455 k08 |= words_buf_r[pc_pos].b[ 8];
1456 k09 |= words_buf_r[pc_pos].b[ 9];
1457 k10 |= words_buf_r[pc_pos].b[10];
1458 k11 |= words_buf_r[pc_pos].b[11];
1459 k12 |= words_buf_r[pc_pos].b[12];
1460 k13 |= words_buf_r[pc_pos].b[13];
1461 k14 |= words_buf_r[pc_pos].b[14];
1462 k15 |= words_buf_r[pc_pos].b[15];
1463 k16 |= words_buf_r[pc_pos].b[16];
1464 k17 |= words_buf_r[pc_pos].b[17];
1465 k18 |= words_buf_r[pc_pos].b[18];
1466 k19 |= words_buf_r[pc_pos].b[19];
1467 k20 |= words_buf_r[pc_pos].b[20];
1468 k21 |= words_buf_r[pc_pos].b[21];
1469 k22 |= words_buf_r[pc_pos].b[22];
1470 k23 |= words_buf_r[pc_pos].b[23];
1471 k24 |= words_buf_r[pc_pos].b[24];
1472 k25 |= words_buf_r[pc_pos].b[25];
1473 k26 |= words_buf_r[pc_pos].b[26];
1474 k27 |= words_buf_r[pc_pos].b[27];
1544 k00, k01, k02, k03, k04, k05, k06,
1545 k07, k08, k09, k10, k11, k12, k13,
1546 k14, k15, k16, k17, k18, k19, k20,
1547 k21, k22, k23, k24, k25, k26, k27,
1548 K28, K29, K30, K31, K32, K33, K34,
1549 K35, K36, K37, K38, K39, K40, K41,
1550 K42, K43, K44, K45, K46, K47, K48,
1551 K49, K50, K51, K52, K53, K54, K55,
1552 D00, D01, D02, D03, D04, D05, D06, D07,
1553 D08, D09, D10, D11, D12, D13, D14, D15,
1554 D16, D17, D18, D19, D20, D21, D22, D23,
1555 D24, D25, D26, D27, D28, D29, D30, D31,
1556 D32, D33, D34, D35, D36, D37, D38, D39,
1557 D40, D41, D42, D43, D44, D45, D46, D47,
1558 D48, D49, D50, D51, D52, D53, D54, D55,
1559 D56, D57, D58, D59, D60, D61, D62, D63
1564 tmpResult |= D00 ^ S00;
1565 tmpResult |= D01 ^ S01;
1566 tmpResult |= D02 ^ S02;
1567 tmpResult |= D03 ^ S03;
1568 tmpResult |= D04 ^ S04;
1569 tmpResult |= D05 ^ S05;
1570 tmpResult |= D06 ^ S06;
1571 tmpResult |= D07 ^ S07;
1572 tmpResult |= D08 ^ S08;
1573 tmpResult |= D09 ^ S09;
1574 tmpResult |= D10 ^ S10;
1575 tmpResult |= D11 ^ S11;
1576 tmpResult |= D12 ^ S12;
1577 tmpResult |= D13 ^ S13;
1578 tmpResult |= D14 ^ S14;
1579 tmpResult |= D15 ^ S15;
1580 tmpResult |= D16 ^ S16;
1581 tmpResult |= D17 ^ S17;
1582 tmpResult |= D18 ^ S18;
1583 tmpResult |= D19 ^ S19;
1584 tmpResult |= D20 ^ S20;
1585 tmpResult |= D21 ^ S21;
1586 tmpResult |= D22 ^ S22;
1587 tmpResult |= D23 ^ S23;
1588 tmpResult |= D24 ^ S24;
1589 tmpResult |= D25 ^ S25;
1590 tmpResult |= D26 ^ S26;
1591 tmpResult |= D27 ^ S27;
1592 tmpResult |= D28 ^ S28;
1593 tmpResult |= D29 ^ S29;
1594 tmpResult |= D30 ^ S30;
1595 tmpResult |= D31 ^ S31;
1596 tmpResult |= D32 ^ S32;
1597 tmpResult |= D33 ^ S33;
1598 tmpResult |= D34 ^ S34;
1599 tmpResult |= D35 ^ S35;
1600 tmpResult |= D36 ^ S36;
1601 tmpResult |= D37 ^ S37;
1602 tmpResult |= D38 ^ S38;
1603 tmpResult |= D39 ^ S39;
1604 tmpResult |= D40 ^ S40;
1605 tmpResult |= D41 ^ S41;
1606 tmpResult |= D42 ^ S42;
1607 tmpResult |= D43 ^ S43;
1608 tmpResult |= D44 ^ S44;
1609 tmpResult |= D45 ^ S45;
1610 tmpResult |= D46 ^ S46;
1611 tmpResult |= D47 ^ S47;
1613 if (tmpResult == 0xffffffff) return;
1615 tmpResult |= D48 ^ S48;
1616 tmpResult |= D49 ^ S49;
1617 tmpResult |= D50 ^ S50;
1618 tmpResult |= D51 ^ S51;
1619 tmpResult |= D52 ^ S52;
1620 tmpResult |= D53 ^ S53;
1621 tmpResult |= D54 ^ S54;
1622 tmpResult |= D55 ^ S55;
1623 tmpResult |= D56 ^ S56;
1624 tmpResult |= D57 ^ S57;
1625 tmpResult |= D58 ^ S58;
1626 tmpResult |= D59 ^ S59;
1627 tmpResult |= D60 ^ S60;
1628 tmpResult |= D61 ^ S61;
1629 tmpResult |= D62 ^ S62;
1630 tmpResult |= D63 ^ S63;
1632 if (tmpResult == 0xffffffff) return;
1634 const u32 slice = 31 - clz (~tmpResult);
1636 #include VECT_COMPARE_S
1640 // transpose bitslice base : easy because no overlapping buffers
1641 // mod : attention race conditions, need different buffers for *in and *out
1644 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_tb (__global pw_t *pws)
1646 const u32 gid = get_global_id (0);
1648 const u32 w0 = pws[gid].i[0];
1649 const u32 w1 = pws[gid].i[1];
1651 const u32 w0s = (w0 << 1) & 0xfefefefe;
1652 const u32 w1s = (w1 << 1) & 0xfefefefe;
1655 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1657 pws[gid].i[j + 0 + 0] = -((w0s >> (i + 7)) & 1);
1658 pws[gid].i[j + 0 + 1] = -((w0s >> (i + 6)) & 1);
1659 pws[gid].i[j + 0 + 2] = -((w0s >> (i + 5)) & 1);
1660 pws[gid].i[j + 0 + 3] = -((w0s >> (i + 4)) & 1);
1661 pws[gid].i[j + 0 + 4] = -((w0s >> (i + 3)) & 1);
1662 pws[gid].i[j + 0 + 5] = -((w0s >> (i + 2)) & 1);
1663 pws[gid].i[j + 0 + 6] = -((w0s >> (i + 1)) & 1);
1667 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1669 pws[gid].i[j + 28 + 0] = -((w1s >> (i + 7)) & 1);
1670 pws[gid].i[j + 28 + 1] = -((w1s >> (i + 6)) & 1);
1671 pws[gid].i[j + 28 + 2] = -((w1s >> (i + 5)) & 1);
1672 pws[gid].i[j + 28 + 3] = -((w1s >> (i + 4)) & 1);
1673 pws[gid].i[j + 28 + 4] = -((w1s >> (i + 3)) & 1);
1674 pws[gid].i[j + 28 + 5] = -((w1s >> (i + 2)) & 1);
1675 pws[gid].i[j + 28 + 6] = -((w1s >> (i + 1)) & 1);
1679 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
1681 const u32 gid = get_global_id (0);
1682 const u32 lid = get_local_id (0);
1684 const u32 block = gid / 32;
1685 const u32 slice = gid % 32;
1687 const u32 w0 = mod[gid];
1689 const u32 w0s = (w0 << 1) & 0xfefefefe;
1692 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1694 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
1695 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
1696 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
1697 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
1698 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
1699 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
1700 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
1704 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1710 const u32 gid = get_global_id (0);
1711 const u32 lid = get_local_id (0);
1712 const u32 vid = get_local_id (1);
1714 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1715 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1717 __local u32 s_S[64];
1721 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1725 s_S[32 + vid] = -((s1 >> vid) & 1);
1728 barrier (CLK_LOCAL_MEM_FENCE);
1730 if (gid >= gid_max) return;
1736 m01500m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1739 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1743 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1747 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1753 const u32 gid = get_global_id (0);
1754 const u32 lid = get_local_id (0);
1755 const u32 vid = get_local_id (1);
1757 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1758 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1760 __local u32 s_S[64];
1764 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1768 s_S[32 + vid] = -((s1 >> vid) & 1);
1771 barrier (CLK_LOCAL_MEM_FENCE);
1773 if (gid >= gid_max) return;
1779 m01500s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1782 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1786 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)