2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
30 #include "include/kernel_functions.c"
31 #include "types_amd.c"
32 #include "common_amd.c"
35 #define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c"
36 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c"
40 #define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c"
41 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c"
45 #define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c"
46 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c"
49 #define KXX_DECL volatile
50 #define sXXX_DECL volatile
52 //#define myselx(a,b,c) ((c) ? (b) : (a))
53 //#define myselx(a,b,c) ((b & c) | (a & ~c))
54 #define myselx(a,b,c) bitselect ((a), (b), (c))
57 * Bitslice DES S-boxes making use of a vector conditional select operation
58 * (e.g., vsel on PowerPC with AltiVec).
60 * Gate counts: 36 33 33 26 35 34 34 32
63 * Several same-gate-count expressions for each S-box are included (for use on
64 * different CPUs/GPUs).
66 * These Boolean expressions corresponding to DES S-boxes have been generated
67 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
68 * John the Ripper password cracker: http://www.openwall.com/john/
69 * Being mathematical formulas, they are not copyrighted and are free for reuse
72 * This file (a specific representation of the S-box expressions, surrounding
73 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
74 * Redistribution and use in source and binary forms, with or without
75 * modification, are permitted. (This is a heavily cut-down "BSD license".)
77 * The effort has been sponsored by Rapid7: http://www.rapid7.com
80 #define vnot(dst, a) (dst) = ~(a)
81 #define vand(dst, a, b) (dst) = (a) & (b)
82 #define vor(dst, a, b) (dst) = (a) | (b)
83 #define vandn(dst, a, b) (dst) = (a) & ~(b)
84 #define vxor(dst, a, b) (dst) = (a) ^ (b)
85 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
88 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
89 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
91 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
93 u32 x55AFD1B7, x3C3C69C3, x6993B874;
94 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
95 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
96 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
97 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
98 u32 x0DBCE883, x3A25A215, x37994A96;
99 u32 x8A487EA7, x8B480F07, xB96C2D16;
102 vsel(x0F0F3333, a3, a2, a5);
103 vxor(x3C3C3C3C, a2, a3);
104 vor(x55FF55FF, a1, a4);
105 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
106 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
107 vxor(x09FCB7C0, a4, x0903B73F);
108 vxor(x5CA9E295, a1, x09FCB7C0);
110 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
111 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
112 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
114 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
115 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
116 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
117 vxor(x529E962D, x0F0F3333, x5D91A51E);
119 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
120 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
121 vsel(x428679F3, a5, x4B8771A3, x529E962D);
122 vxor(x6B68D433, x29EEADC0, x428679F3);
124 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
125 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
126 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
127 vnot(x94D83B6C, x6B27C493);
128 vsel(x0, x94D83B6C, x6B68D433, a6);
129 vxor(*out1, *out1, x0);
131 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
132 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
133 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
134 vxor(xD6E19C32, x529E962D, x847F0A1F);
135 vsel(x1, xD6E19C32, x5CA9E295, a6);
136 vxor(*out2, *out2, x1);
138 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
139 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
140 vxor(x37994A96, x0DBCE883, x3A25A215);
141 vsel(x3, x37994A96, x529E962D, a6);
142 vxor(*out4, *out4, x3);
144 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
145 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
146 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
147 vsel(x2, xB96C2D16, x6993B874, a6);
148 vxor(*out3, *out3, x2);
152 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
153 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
155 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
156 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
157 u32 x0F5AF03C, x6600FF56, x87A5F09C;
158 u32 xA55A963C, x3C69C30F, xB44BC32D;
159 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
160 u32 xB46C662D, x278DB412, xB66CB43B;
161 u32 xD2DC4E52, x27993333, xD2994E33;
162 u32 x278D0F2D, x2E0E547B, x09976748;
165 vsel(x55553333, a1, a3, a6);
166 vsel(x0055FF33, a6, x55553333, a5);
167 vsel(x33270F03, a3, a4, x0055FF33);
168 vxor(x66725A56, a1, x33270F03);
169 vxor(x00FFFF00, a5, a6);
170 vxor(x668DA556, x66725A56, x00FFFF00);
172 vsel(x0F0F5A56, a4, x66725A56, a6);
173 vnot(xF0F0A5A9, x0F0F5A56);
174 vxor(xA5A5969A, x55553333, xF0F0A5A9);
175 vxor(xA55A699A, x00FFFF00, xA5A5969A);
176 vsel(x1, xA55A699A, x668DA556, a2);
177 vxor(*out2, *out2, x1);
179 vxor(x0F5AF03C, a4, x0055FF33);
180 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
181 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
183 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
184 vxor(x3C69C30F, a3, x0F5AF03C);
185 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
187 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
188 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
189 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
190 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
191 vsel(x0, x996C66D2, xB44BC32D, a2);
192 vxor(*out1, *out1, x0);
194 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
195 vsel(x278DB412, x668DA556, xA5A5969A, a1);
196 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
198 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
199 vsel(x27993333, x278DB412, a3, x0055FF33);
200 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
201 vsel(x3, x87A5F09C, xD2994E33, a2);
202 vxor(*out4, *out4, x3);
204 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
205 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
206 vxor(x09976748, x27993333, x2E0E547B);
207 vsel(x2, xB66CB43B, x09976748, a2);
208 vxor(*out3, *out3, x2);
212 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
213 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
215 u32 x0F330F33, x0F33F0CC, x5A66A599;
216 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
217 u32 x556BA09E, x665A93AC, x99A56C53;
218 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
219 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
220 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
221 u32 xD6599874, x05330022, xD2699876;
222 u32 x665F9364, xD573F0F2, xB32C6396;
225 vsel(x0F330F33, a4, a3, a5);
226 vxor(x0F33F0CC, a6, x0F330F33);
227 vxor(x5A66A599, a2, x0F33F0CC);
229 vsel(x2111B7BB, a3, a6, x5A66A599);
230 vsel(x03FF3033, a5, a3, x0F33F0CC);
231 vsel(x05BB50EE, a5, x0F33F0CC, a2);
232 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
233 vxor(x265E97A4, x2111B7BB, x074F201F);
235 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
236 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
237 vnot(x99A56C53, x665A93AC);
238 vsel(x1, x265E97A4, x99A56C53, a1);
239 vxor(*out2, *out2, x1);
241 vxor(x25A1A797, x03FF3033, x265E97A4);
242 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
243 vsel(x66559355, x665A93AC, a2, a5);
244 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
246 vxor(x9A5A5C60, x03FF3033, x99A56C53);
247 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
248 vxor(x87698DB4, x5713754C, xD07AF8F8);
249 vxor(xE13C1EE1, x66559355, x87698DB4);
251 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
252 vsel(x655B905E, x66559355, x05BB50EE, a4);
253 vsel(x00A55CFF, a5, a6, x9A5A5C60);
254 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
255 vsel(x0, x9E49915E, xE13C1EE1, a1);
256 vxor(*out1, *out1, x0);
258 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
259 vand(x05330022, x0F330F33, x05BB50EE);
260 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
261 vsel(x3, x5A66A599, xD2699876, a1);
262 vxor(*out4, *out4, x3);
264 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
265 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
266 vxor(xB32C6396, x665F9364, xD573F0F2);
267 vsel(x2, xB32C6396, x47B135C6, a1);
268 vxor(*out3, *out3, x2);
272 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
273 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
275 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
276 x0AF50F0F, x4CA36B59;
278 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
280 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
281 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
284 vsel(x0505AFAF, a5, a3, a1);
285 vsel(x0555AF55, x0505AFAF, a1, a4);
286 vxor(x0A5AA05A, a3, x0555AF55);
287 vsel(x46566456, a1, x0A5AA05A, a2);
288 vsel(x0A0A5F5F, a3, a5, a1);
289 vxor(x0AF55FA0, a4, x0A0A5F5F);
290 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
291 vxor(x4CA36B59, x46566456, x0AF50F0F);
293 vnot(xB35C94A6, x4CA36B59);
295 vsel(x01BB23BB, a4, a2, x0555AF55);
296 vxor(x5050FAFA, a1, x0505AFAF);
297 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
298 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
300 vnot(x56E9861E, xA91679E1);
302 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
303 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
304 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
305 vxor(xD2946D9A, x50E9FA1E, x827D9784);
306 vsel(x2, xD2946D9A, x4CA36B59, a6);
307 vxor(*out3, *out3, x2);
308 vsel(x3, xB35C94A6, xD2946D9A, a6);
309 vxor(*out4, *out4, x3);
311 vsel(x31F720B3, a2, a4, x0AF55FA0);
312 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
313 vxor(x4712A7AD, x56E9861E, x11FB21B3);
314 vxor(x9586CA37, xD2946D9A, x4712A7AD);
315 vsel(x0, x56E9861E, x9586CA37, a6);
316 vxor(*out1, *out1, x0);
317 vsel(x1, x9586CA37, xA91679E1, a6);
318 vxor(*out2, *out2, x1);
322 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
323 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
325 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
326 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
327 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
328 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
329 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
330 u32 x12E6283D, x9E47D3D4, x1A676AB4;
331 u32 x891556DF, xE5E77F82, x6CF2295D;
332 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
335 vsel(x550F550F, a1, a3, a5);
336 vnot(xAAF0AAF0, x550F550F);
337 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
338 vxor(x96C696C6, a2, xA5F5A5F5);
339 vxor(x00FFFF00, a5, a6);
340 vxor(x963969C6, x96C696C6, x00FFFF00);
342 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
343 vsel(xB73121F7, a2, x963969C6, x96C696C6);
344 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
345 vsel(x00558A5F, x1501DF0F, a5, a1);
346 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
348 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
349 vsel(x045157FD, a6, a1, x0679ED42);
350 vsel(xB32077FF, xB73121F7, a6, x045157FD);
351 vxor(x9D49D39C, x2E69A463, xB32077FF);
352 vsel(x2, x9D49D39C, x2E69A463, a4);
353 vxor(*out3, *out3, x2);
355 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
356 vsel(xF72577AF, xB32077FF, x550F550F, a1);
357 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
358 vsel(x1, x5BA4B81D, x963969C6, a4);
359 vxor(*out2, *out2, x1);
361 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
362 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
363 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
364 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
366 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
367 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
368 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
370 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
371 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
372 vxor(x6CF2295D, x891556DF, xE5E77F82);
373 vsel(x3, x1A35669A, x6CF2295D, a4);
374 vxor(*out4, *out4, x3);
376 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
377 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
378 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
379 vsel(x0, x369CC1D6, x1A676AB4, a4);
380 vxor(*out1, *out1, x0);
384 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
385 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
387 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
388 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
389 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
390 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
391 u32 x142956AB, x455D45DF, x1C3EE619;
392 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
393 u32 x840DBB67, x6DA19C1E, x925E63E1;
394 u32 x9C3CA761, x257A75D5, xB946D2B4;
397 vsel(x555500FF, a1, a4, a5);
398 vxor(x666633CC, a2, x555500FF);
399 vsel(x606F30CF, x666633CC, a4, a3);
400 vxor(x353A659A, a1, x606F30CF);
401 vxor(x353A9A65, a5, x353A659A);
402 vnot(xCAC5659A, x353A9A65);
404 vsel(x353A6565, x353A659A, x353A9A65, a4);
405 vsel(x0A3F0A6F, a3, a4, x353A6565);
406 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
407 vxor(x5963A3C6, x353A9A65, x6C5939A3);
409 vsel(x35FF659A, a4, x353A659A, x353A6565);
410 vxor(x3AF06A95, a3, x35FF659A);
411 vsel(x05CF0A9F, a4, a3, x353A9A65);
412 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
414 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
415 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
416 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
417 vsel(x0, xCAC5659A, x942D9A67, a6);
418 vxor(*out1, *out1, x0);
420 vsel(x142956AB, x353A659A, x942D9A67, a2);
421 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
422 vxor(x1C3EE619, x5963A3C6, x455D45DF);
423 vsel(x3, x5963A3C6, x1C3EE619, a6);
424 vxor(*out4, *out4, x3);
426 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
427 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
428 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
429 vxor(x69A49C79, x555500FF, x3CF19C86);
431 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
432 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
433 vnot(x925E63E1, x6DA19C1E);
434 vsel(x1, x925E63E1, x69A49C79, a6);
435 vxor(*out2, *out2, x1);
437 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
438 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
439 vxor(xB946D2B4, x9C3CA761, x257A75D5);
440 vsel(x2, x16E94A97, xB946D2B4, a6);
441 vxor(*out3, *out3, x2);
445 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
446 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
448 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
449 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
450 u32 x738F9C63, x11EF9867, x26DA9867;
451 u32 x4B4B9C63, x4B666663, x4E639396;
452 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
453 u32 xD728827B, x6698807B, x699C585B;
454 u32 x738C847B, xA4A71E18, x74878E78;
455 u32 x333D9639, x74879639, x8B7869C6;
458 vsel(x44447777, a2, a6, a3);
459 vxor(x4B4B7878, a4, x44447777);
460 vsel(x22772277, a3, a5, a2);
461 vsel(x0505F5F5, a6, a2, a4);
462 vsel(x220522F5, x22772277, x0505F5F5, a5);
463 vxor(x694E5A8D, x4B4B7878, x220522F5);
465 vxor(x00FFFF00, a5, a6);
466 vxor(x66666666, a2, a3);
467 vsel(x32353235, a3, x220522F5, a4);
468 vsel(x26253636, x66666666, x32353235, x4B4B7878);
469 vxor(x26DAC936, x00FFFF00, x26253636);
470 vsel(x0, x26DAC936, x694E5A8D, a1);
471 vxor(*out1, *out1, x0);
473 vxor(x738F9C63, a2, x26DAC936);
474 vsel(x11EF9867, x738F9C63, a5, x66666666);
475 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
477 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
478 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
479 vxor(x4E639396, x0505F5F5, x4B666663);
481 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
483 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
484 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
485 vsel(x1, xB14EE41D, x26DA9867, a1);
486 vxor(*out2, *out2, x1);
488 vxor(xD728827B, x66666666, xB14EE41D);
489 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
490 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
491 vsel(x2, x699C585B, x4E639396, a1);
492 vxor(*out3, *out3, x2);
494 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
495 vxor(xA4A71E18, x738F9C63, xD728827B);
496 vsel(x74878E78, x738C847B, xA4A71E18, a4);
498 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
499 vsel(x74879639, x74878E78, x333D9639, a6);
500 vnot(x8B7869C6, x74879639);
501 vsel(x3, x74878E78, x8B7869C6, a1);
502 vxor(*out4, *out4, x3);
506 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
507 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
509 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
510 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
511 u32 x3001F74E, x30555745, x693CD926;
512 u32 x0C0CD926, x0C3F25E9, x38D696A5;
514 u32 x03D2117B, xC778395B, xCB471CB2;
515 u32 x5425B13F, x56B3803F, x919AE965;
516 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
519 vsel(x0505F5F5, a5, a1, a3);
520 vxor(x05FAF50A, a4, x0505F5F5);
521 vsel(x0F0F00FF, a3, a4, a5);
522 vsel(x22227777, a2, a5, a1);
523 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
524 vxor(x34E9B34C, a2, x07DA807F);
526 vsel(x00FFF00F, x05FAF50A, a4, a3);
527 vsel(x0033FCCF, a5, x00FFF00F, a2);
528 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
529 vsel(x0C0C3F3F, a3, a5, a2);
530 vxor(x59698E63, x5565B15C, x0C0C3F3F);
532 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
533 vsel(x30555745, x3001F74E, a1, x00FFF00F);
534 vxor(x693CD926, x59698E63, x30555745);
535 vsel(x2, x693CD926, x59698E63, a6);
536 vxor(*out3, *out3, x2);
538 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
539 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
540 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
542 vnot(xC729695A, x38D696A5);
544 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
545 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
546 vxor(xCB471CB2, x0C3F25E9, xC778395B);
547 vsel(x1, xCB471CB2, x34E9B34C, a6);
548 vxor(*out2, *out2, x1);
550 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
551 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
552 vxor(x919AE965, xC729695A, x56B3803F);
553 vsel(x3, xC729695A, x919AE965, a6);
554 vxor(*out4, *out4, x3);
556 vsel(x17B3023F, x07DA807F, a2, x59698E63);
557 vor(x75555755, a1, x30555745);
558 vxor(x62E6556A, x17B3023F, x75555755);
559 vxor(xA59E6C31, xC778395B, x62E6556A);
560 vsel(x0, xA59E6C31, x38D696A5, a6);
561 vxor(*out1, *out1, x0);
564 #define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; }
600 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
601 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
602 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
603 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
604 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
605 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
606 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
607 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
608 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
609 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
610 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
611 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
612 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
613 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
614 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
615 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
617 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
619 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
620 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
621 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
622 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
623 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
624 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
625 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
626 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
627 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
628 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
629 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
630 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
632 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
633 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
634 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
635 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
636 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
637 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
638 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
639 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
642 * descrypt uses all zero data input, so we can optimize this first round of des
647 s1(k00, k01, k02, k03, k04, k05, &D08, &D16, &D22, &D30);
648 s2(k06, k07, k08, k09, k10, k11, &D12, &D27, &D01, &D17);
649 s3(k12, k13, k14, k15, k16, k17, &D23, &D15, &D29, &D05);
650 s4(k18, k19, k20, k21, k22, k23, &D25, &D19, &D09, &D00);
651 s5(k24, k25, k26, k27, k28, k29, &D07, &D13, &D24, &D02);
652 s6(k30, k31, k32, k33, k34, k35, &D03, &D28, &D10, &D18);
653 s7(k36, k37, k38, k39, k40, k41, &D31, &D11, &D21, &D06);
654 s8(k42, k43, k44, k45, k46, k47, &D04, &D26, &D14, &D20);
658 for (u32 i = 1; i < 16; i++)
662 case 0: KEYSET00; break;
663 case 1: KEYSET01; break;
664 case 2: KEYSET02; break;
665 case 3: KEYSET03; break;
666 case 4: KEYSET04; break;
667 case 5: KEYSET05; break;
668 case 6: KEYSET06; break;
669 case 7: KEYSET07; break;
670 case 8: KEYSET10; break;
671 case 9: KEYSET11; break;
672 case 10: KEYSET12; break;
673 case 11: KEYSET13; break;
674 case 12: KEYSET14; break;
675 case 13: KEYSET15; break;
676 case 14: KEYSET16; break;
677 case 15: KEYSET17; break;
680 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
681 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
682 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
683 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
684 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
685 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
686 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
687 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
694 for (u32 ii = 1; ii < 25; ii++)
696 for (u32 i = 0; i < 16; i++)
700 case 0: KEYSET00; break;
701 case 1: KEYSET01; break;
702 case 2: KEYSET02; break;
703 case 3: KEYSET03; break;
704 case 4: KEYSET04; break;
705 case 5: KEYSET05; break;
706 case 6: KEYSET06; break;
707 case 7: KEYSET07; break;
708 case 8: KEYSET10; break;
709 case 9: KEYSET11; break;
710 case 10: KEYSET12; break;
711 case 11: KEYSET13; break;
712 case 12: KEYSET14; break;
713 case 13: KEYSET15; break;
714 case 14: KEYSET16; break;
715 case 15: KEYSET17; break;
718 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
719 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
720 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
721 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
722 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
723 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
724 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
725 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
736 static void transpose32c (u32 data[32])
738 #define swap(x,y,j,m) \
739 t = ((x) ^ ((y) >> (j))) & (m); \
741 (y) = (y) ^ (t << (j));
745 swap (data[ 0], data[16], 16, 0x0000ffff);
746 swap (data[ 1], data[17], 16, 0x0000ffff);
747 swap (data[ 2], data[18], 16, 0x0000ffff);
748 swap (data[ 3], data[19], 16, 0x0000ffff);
749 swap (data[ 4], data[20], 16, 0x0000ffff);
750 swap (data[ 5], data[21], 16, 0x0000ffff);
751 swap (data[ 6], data[22], 16, 0x0000ffff);
752 swap (data[ 7], data[23], 16, 0x0000ffff);
753 swap (data[ 8], data[24], 16, 0x0000ffff);
754 swap (data[ 9], data[25], 16, 0x0000ffff);
755 swap (data[10], data[26], 16, 0x0000ffff);
756 swap (data[11], data[27], 16, 0x0000ffff);
757 swap (data[12], data[28], 16, 0x0000ffff);
758 swap (data[13], data[29], 16, 0x0000ffff);
759 swap (data[14], data[30], 16, 0x0000ffff);
760 swap (data[15], data[31], 16, 0x0000ffff);
761 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
762 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
763 swap (data[ 2], data[10], 8, 0x00ff00ff);
764 swap (data[ 3], data[11], 8, 0x00ff00ff);
765 swap (data[ 4], data[12], 8, 0x00ff00ff);
766 swap (data[ 5], data[13], 8, 0x00ff00ff);
767 swap (data[ 6], data[14], 8, 0x00ff00ff);
768 swap (data[ 7], data[15], 8, 0x00ff00ff);
769 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
770 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
771 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
772 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
773 swap (data[ 0], data[ 2], 2, 0x33333333);
774 swap (data[ 1], data[ 3], 2, 0x33333333);
775 swap (data[ 0], data[ 1], 1, 0x55555555);
776 swap (data[ 2], data[ 3], 1, 0x55555555);
777 swap (data[ 4], data[ 6], 2, 0x33333333);
778 swap (data[ 5], data[ 7], 2, 0x33333333);
779 swap (data[ 4], data[ 5], 1, 0x55555555);
780 swap (data[ 6], data[ 7], 1, 0x55555555);
781 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
782 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
783 swap (data[10], data[14], 4, 0x0f0f0f0f);
784 swap (data[11], data[15], 4, 0x0f0f0f0f);
785 swap (data[ 8], data[10], 2, 0x33333333);
786 swap (data[ 9], data[11], 2, 0x33333333);
787 swap (data[ 8], data[ 9], 1, 0x55555555);
788 swap (data[10], data[11], 1, 0x55555555);
789 swap (data[12], data[14], 2, 0x33333333);
790 swap (data[13], data[15], 2, 0x33333333);
791 swap (data[12], data[13], 1, 0x55555555);
792 swap (data[14], data[15], 1, 0x55555555);
793 swap (data[16], data[24], 8, 0x00ff00ff);
794 swap (data[17], data[25], 8, 0x00ff00ff);
795 swap (data[18], data[26], 8, 0x00ff00ff);
796 swap (data[19], data[27], 8, 0x00ff00ff);
797 swap (data[20], data[28], 8, 0x00ff00ff);
798 swap (data[21], data[29], 8, 0x00ff00ff);
799 swap (data[22], data[30], 8, 0x00ff00ff);
800 swap (data[23], data[31], 8, 0x00ff00ff);
801 swap (data[16], data[20], 4, 0x0f0f0f0f);
802 swap (data[17], data[21], 4, 0x0f0f0f0f);
803 swap (data[18], data[22], 4, 0x0f0f0f0f);
804 swap (data[19], data[23], 4, 0x0f0f0f0f);
805 swap (data[16], data[18], 2, 0x33333333);
806 swap (data[17], data[19], 2, 0x33333333);
807 swap (data[16], data[17], 1, 0x55555555);
808 swap (data[18], data[19], 1, 0x55555555);
809 swap (data[20], data[22], 2, 0x33333333);
810 swap (data[21], data[23], 2, 0x33333333);
811 swap (data[20], data[21], 1, 0x55555555);
812 swap (data[22], data[23], 1, 0x55555555);
813 swap (data[24], data[28], 4, 0x0f0f0f0f);
814 swap (data[25], data[29], 4, 0x0f0f0f0f);
815 swap (data[26], data[30], 4, 0x0f0f0f0f);
816 swap (data[27], data[31], 4, 0x0f0f0f0f);
817 swap (data[24], data[26], 2, 0x33333333);
818 swap (data[25], data[27], 2, 0x33333333);
819 swap (data[24], data[25], 1, 0x55555555);
820 swap (data[26], data[27], 1, 0x55555555);
821 swap (data[28], data[30], 2, 0x33333333);
822 swap (data[29], data[31], 2, 0x33333333);
823 swap (data[28], data[29], 1, 0x55555555);
824 swap (data[30], data[31], 1, 0x55555555);
827 static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
833 const u32 gid = get_global_id (0);
834 const u32 lid = get_local_id (0);
840 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
846 const u32 K00 = pws[gid].i[ 0];
847 const u32 K01 = pws[gid].i[ 1];
848 const u32 K02 = pws[gid].i[ 2];
849 const u32 K03 = pws[gid].i[ 3];
850 const u32 K04 = pws[gid].i[ 4];
851 const u32 K05 = pws[gid].i[ 5];
852 const u32 K06 = pws[gid].i[ 6];
853 const u32 K07 = pws[gid].i[ 7];
854 const u32 K08 = pws[gid].i[ 8];
855 const u32 K09 = pws[gid].i[ 9];
856 const u32 K10 = pws[gid].i[10];
857 const u32 K11 = pws[gid].i[11];
858 const u32 K12 = pws[gid].i[12];
859 const u32 K13 = pws[gid].i[13];
860 const u32 K14 = pws[gid].i[14];
861 const u32 K15 = pws[gid].i[15];
862 const u32 K16 = pws[gid].i[16];
863 const u32 K17 = pws[gid].i[17];
864 const u32 K18 = pws[gid].i[18];
865 const u32 K19 = pws[gid].i[19];
866 const u32 K20 = pws[gid].i[20];
867 const u32 K21 = pws[gid].i[21];
868 const u32 K22 = pws[gid].i[22];
869 const u32 K23 = pws[gid].i[23];
870 const u32 K24 = pws[gid].i[24];
871 const u32 K25 = pws[gid].i[25];
872 const u32 K26 = pws[gid].i[26];
873 const u32 K27 = pws[gid].i[27];
874 const u32 K28 = pws[gid].i[28];
875 const u32 K29 = pws[gid].i[29];
876 const u32 K30 = pws[gid].i[30];
877 const u32 K31 = pws[gid].i[31];
878 const u32 K32 = pws[gid].i[32];
879 const u32 K33 = pws[gid].i[33];
880 const u32 K34 = pws[gid].i[34];
881 const u32 K35 = pws[gid].i[35];
882 const u32 K36 = pws[gid].i[36];
883 const u32 K37 = pws[gid].i[37];
884 const u32 K38 = pws[gid].i[38];
885 const u32 K39 = pws[gid].i[39];
886 const u32 K40 = pws[gid].i[40];
887 const u32 K41 = pws[gid].i[41];
888 const u32 K42 = pws[gid].i[42];
889 const u32 K43 = pws[gid].i[43];
890 const u32 K44 = pws[gid].i[44];
891 const u32 K45 = pws[gid].i[45];
892 const u32 K46 = pws[gid].i[46];
893 const u32 K47 = pws[gid].i[47];
894 const u32 K48 = pws[gid].i[48];
895 const u32 K49 = pws[gid].i[49];
896 const u32 K50 = pws[gid].i[50];
897 const u32 K51 = pws[gid].i[51];
898 const u32 K52 = pws[gid].i[52];
899 const u32 K53 = pws[gid].i[53];
900 const u32 K54 = pws[gid].i[54];
901 const u32 K55 = pws[gid].i[55];
907 const u32 bf_loops = bfs_cnt;
909 const u32 pc_pos = get_local_id (1);
911 const u32 il_pos = pc_pos * 32;
942 k00 |= words_buf_r[pc_pos].b[ 0];
943 k01 |= words_buf_r[pc_pos].b[ 1];
944 k02 |= words_buf_r[pc_pos].b[ 2];
945 k03 |= words_buf_r[pc_pos].b[ 3];
946 k04 |= words_buf_r[pc_pos].b[ 4];
947 k05 |= words_buf_r[pc_pos].b[ 5];
948 k06 |= words_buf_r[pc_pos].b[ 6];
949 k07 |= words_buf_r[pc_pos].b[ 7];
950 k08 |= words_buf_r[pc_pos].b[ 8];
951 k09 |= words_buf_r[pc_pos].b[ 9];
952 k10 |= words_buf_r[pc_pos].b[10];
953 k11 |= words_buf_r[pc_pos].b[11];
954 k12 |= words_buf_r[pc_pos].b[12];
955 k13 |= words_buf_r[pc_pos].b[13];
956 k14 |= words_buf_r[pc_pos].b[14];
957 k15 |= words_buf_r[pc_pos].b[15];
958 k16 |= words_buf_r[pc_pos].b[16];
959 k17 |= words_buf_r[pc_pos].b[17];
960 k18 |= words_buf_r[pc_pos].b[18];
961 k19 |= words_buf_r[pc_pos].b[19];
962 k20 |= words_buf_r[pc_pos].b[20];
963 k21 |= words_buf_r[pc_pos].b[21];
964 k22 |= words_buf_r[pc_pos].b[22];
965 k23 |= words_buf_r[pc_pos].b[23];
966 k24 |= words_buf_r[pc_pos].b[24];
967 k25 |= words_buf_r[pc_pos].b[25];
968 k26 |= words_buf_r[pc_pos].b[26];
969 k27 |= words_buf_r[pc_pos].b[27];
1039 k00, k01, k02, k03, k04, k05, k06,
1040 k07, k08, k09, k10, k11, k12, k13,
1041 k14, k15, k16, k17, k18, k19, k20,
1042 k21, k22, k23, k24, k25, k26, k27,
1043 K28, K29, K30, K31, K32, K33, K34,
1044 K35, K36, K37, K38, K39, K40, K41,
1045 K42, K43, K44, K45, K46, K47, K48,
1046 K49, K50, K51, K52, K53, K54, K55,
1047 D00, D01, D02, D03, D04, D05, D06, D07,
1048 D08, D09, D10, D11, D12, D13, D14, D15,
1049 D16, D17, D18, D19, D20, D21, D22, D23,
1050 D24, D25, D26, D27, D28, D29, D30, D31,
1051 D32, D33, D34, D35, D36, D37, D38, D39,
1052 D40, D41, D42, D43, D44, D45, D46, D47,
1053 D48, D49, D50, D51, D52, D53, D54, D55,
1054 D56, D57, D58, D59, D60, D61, D62, D63
1124 if (digests_cnt < 16)
1126 for (u32 d = 0; d < digests_cnt; d++)
1128 const u32 final_hash_pos = digests_offset + d;
1130 if (hashes_shown[final_hash_pos]) continue;
1134 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1135 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1140 for (int i = 0; i < 32; i++)
1142 const u32 b0 = -((search[0] >> i) & 1);
1143 const u32 b1 = -((search[1] >> i) & 1);
1145 tmpResult |= out[ 0 + i] ^ b0;
1146 tmpResult |= out[32 + i] ^ b1;
1149 if (tmpResult == 0xffffffff) continue;
1151 const u32 slice = 31 - clz (~tmpResult);
1153 const u32x r0 = search[0];
1154 const u32x r1 = search[1];
1158 #include VECT_COMPARE_M
1167 for (int i = 0; i < 32; i++)
1169 out0[i] = out[ 0 + 31 - i];
1170 out1[i] = out[32 + 31 - i];
1173 transpose32c (out0);
1174 transpose32c (out1);
1177 for (int slice = 0; slice < 32; slice++)
1179 const u32x r0 = out0[31 - slice];
1180 const u32x r1 = out1[31 - slice];
1184 #include VECT_COMPARE_M
1189 static void m01500s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1195 const u32 gid = get_global_id (0);
1196 const u32 lid = get_local_id (0);
1202 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1277 const u32 K00 = pws[gid].i[ 0];
1278 const u32 K01 = pws[gid].i[ 1];
1279 const u32 K02 = pws[gid].i[ 2];
1280 const u32 K03 = pws[gid].i[ 3];
1281 const u32 K04 = pws[gid].i[ 4];
1282 const u32 K05 = pws[gid].i[ 5];
1283 const u32 K06 = pws[gid].i[ 6];
1284 const u32 K07 = pws[gid].i[ 7];
1285 const u32 K08 = pws[gid].i[ 8];
1286 const u32 K09 = pws[gid].i[ 9];
1287 const u32 K10 = pws[gid].i[10];
1288 const u32 K11 = pws[gid].i[11];
1289 const u32 K12 = pws[gid].i[12];
1290 const u32 K13 = pws[gid].i[13];
1291 const u32 K14 = pws[gid].i[14];
1292 const u32 K15 = pws[gid].i[15];
1293 const u32 K16 = pws[gid].i[16];
1294 const u32 K17 = pws[gid].i[17];
1295 const u32 K18 = pws[gid].i[18];
1296 const u32 K19 = pws[gid].i[19];
1297 const u32 K20 = pws[gid].i[20];
1298 const u32 K21 = pws[gid].i[21];
1299 const u32 K22 = pws[gid].i[22];
1300 const u32 K23 = pws[gid].i[23];
1301 const u32 K24 = pws[gid].i[24];
1302 const u32 K25 = pws[gid].i[25];
1303 const u32 K26 = pws[gid].i[26];
1304 const u32 K27 = pws[gid].i[27];
1305 const u32 K28 = pws[gid].i[28];
1306 const u32 K29 = pws[gid].i[29];
1307 const u32 K30 = pws[gid].i[30];
1308 const u32 K31 = pws[gid].i[31];
1309 const u32 K32 = pws[gid].i[32];
1310 const u32 K33 = pws[gid].i[33];
1311 const u32 K34 = pws[gid].i[34];
1312 const u32 K35 = pws[gid].i[35];
1313 const u32 K36 = pws[gid].i[36];
1314 const u32 K37 = pws[gid].i[37];
1315 const u32 K38 = pws[gid].i[38];
1316 const u32 K39 = pws[gid].i[39];
1317 const u32 K40 = pws[gid].i[40];
1318 const u32 K41 = pws[gid].i[41];
1319 const u32 K42 = pws[gid].i[42];
1320 const u32 K43 = pws[gid].i[43];
1321 const u32 K44 = pws[gid].i[44];
1322 const u32 K45 = pws[gid].i[45];
1323 const u32 K46 = pws[gid].i[46];
1324 const u32 K47 = pws[gid].i[47];
1325 const u32 K48 = pws[gid].i[48];
1326 const u32 K49 = pws[gid].i[49];
1327 const u32 K50 = pws[gid].i[50];
1328 const u32 K51 = pws[gid].i[51];
1329 const u32 K52 = pws[gid].i[52];
1330 const u32 K53 = pws[gid].i[53];
1331 const u32 K54 = pws[gid].i[54];
1332 const u32 K55 = pws[gid].i[55];
1338 const u32 pc_pos = get_local_id (1);
1340 const u32 il_pos = pc_pos * 32;
1371 k00 |= words_buf_r[pc_pos].b[ 0];
1372 k01 |= words_buf_r[pc_pos].b[ 1];
1373 k02 |= words_buf_r[pc_pos].b[ 2];
1374 k03 |= words_buf_r[pc_pos].b[ 3];
1375 k04 |= words_buf_r[pc_pos].b[ 4];
1376 k05 |= words_buf_r[pc_pos].b[ 5];
1377 k06 |= words_buf_r[pc_pos].b[ 6];
1378 k07 |= words_buf_r[pc_pos].b[ 7];
1379 k08 |= words_buf_r[pc_pos].b[ 8];
1380 k09 |= words_buf_r[pc_pos].b[ 9];
1381 k10 |= words_buf_r[pc_pos].b[10];
1382 k11 |= words_buf_r[pc_pos].b[11];
1383 k12 |= words_buf_r[pc_pos].b[12];
1384 k13 |= words_buf_r[pc_pos].b[13];
1385 k14 |= words_buf_r[pc_pos].b[14];
1386 k15 |= words_buf_r[pc_pos].b[15];
1387 k16 |= words_buf_r[pc_pos].b[16];
1388 k17 |= words_buf_r[pc_pos].b[17];
1389 k18 |= words_buf_r[pc_pos].b[18];
1390 k19 |= words_buf_r[pc_pos].b[19];
1391 k20 |= words_buf_r[pc_pos].b[20];
1392 k21 |= words_buf_r[pc_pos].b[21];
1393 k22 |= words_buf_r[pc_pos].b[22];
1394 k23 |= words_buf_r[pc_pos].b[23];
1395 k24 |= words_buf_r[pc_pos].b[24];
1396 k25 |= words_buf_r[pc_pos].b[25];
1397 k26 |= words_buf_r[pc_pos].b[26];
1398 k27 |= words_buf_r[pc_pos].b[27];
1468 k00, k01, k02, k03, k04, k05, k06,
1469 k07, k08, k09, k10, k11, k12, k13,
1470 k14, k15, k16, k17, k18, k19, k20,
1471 k21, k22, k23, k24, k25, k26, k27,
1472 K28, K29, K30, K31, K32, K33, K34,
1473 K35, K36, K37, K38, K39, K40, K41,
1474 K42, K43, K44, K45, K46, K47, K48,
1475 K49, K50, K51, K52, K53, K54, K55,
1476 D00, D01, D02, D03, D04, D05, D06, D07,
1477 D08, D09, D10, D11, D12, D13, D14, D15,
1478 D16, D17, D18, D19, D20, D21, D22, D23,
1479 D24, D25, D26, D27, D28, D29, D30, D31,
1480 D32, D33, D34, D35, D36, D37, D38, D39,
1481 D40, D41, D42, D43, D44, D45, D46, D47,
1482 D48, D49, D50, D51, D52, D53, D54, D55,
1483 D56, D57, D58, D59, D60, D61, D62, D63
1488 tmpResult |= D00 ^ S00;
1489 tmpResult |= D01 ^ S01;
1490 tmpResult |= D02 ^ S02;
1491 tmpResult |= D03 ^ S03;
1492 tmpResult |= D04 ^ S04;
1493 tmpResult |= D05 ^ S05;
1494 tmpResult |= D06 ^ S06;
1495 tmpResult |= D07 ^ S07;
1496 tmpResult |= D08 ^ S08;
1497 tmpResult |= D09 ^ S09;
1498 tmpResult |= D10 ^ S10;
1499 tmpResult |= D11 ^ S11;
1500 tmpResult |= D12 ^ S12;
1501 tmpResult |= D13 ^ S13;
1502 tmpResult |= D14 ^ S14;
1503 tmpResult |= D15 ^ S15;
1504 tmpResult |= D16 ^ S16;
1505 tmpResult |= D17 ^ S17;
1506 tmpResult |= D18 ^ S18;
1507 tmpResult |= D19 ^ S19;
1508 tmpResult |= D20 ^ S20;
1509 tmpResult |= D21 ^ S21;
1510 tmpResult |= D22 ^ S22;
1511 tmpResult |= D23 ^ S23;
1512 tmpResult |= D24 ^ S24;
1513 tmpResult |= D25 ^ S25;
1514 tmpResult |= D26 ^ S26;
1515 tmpResult |= D27 ^ S27;
1516 tmpResult |= D28 ^ S28;
1517 tmpResult |= D29 ^ S29;
1518 tmpResult |= D30 ^ S30;
1519 tmpResult |= D31 ^ S31;
1520 tmpResult |= D32 ^ S32;
1521 tmpResult |= D33 ^ S33;
1522 tmpResult |= D34 ^ S34;
1523 tmpResult |= D35 ^ S35;
1524 tmpResult |= D36 ^ S36;
1525 tmpResult |= D37 ^ S37;
1526 tmpResult |= D38 ^ S38;
1527 tmpResult |= D39 ^ S39;
1528 tmpResult |= D40 ^ S40;
1529 tmpResult |= D41 ^ S41;
1530 tmpResult |= D42 ^ S42;
1531 tmpResult |= D43 ^ S43;
1532 tmpResult |= D44 ^ S44;
1533 tmpResult |= D45 ^ S45;
1534 tmpResult |= D46 ^ S46;
1535 tmpResult |= D47 ^ S47;
1537 if (tmpResult == 0xffffffff) return;
1539 tmpResult |= D48 ^ S48;
1540 tmpResult |= D49 ^ S49;
1541 tmpResult |= D50 ^ S50;
1542 tmpResult |= D51 ^ S51;
1543 tmpResult |= D52 ^ S52;
1544 tmpResult |= D53 ^ S53;
1545 tmpResult |= D54 ^ S54;
1546 tmpResult |= D55 ^ S55;
1547 tmpResult |= D56 ^ S56;
1548 tmpResult |= D57 ^ S57;
1549 tmpResult |= D58 ^ S58;
1550 tmpResult |= D59 ^ S59;
1551 tmpResult |= D60 ^ S60;
1552 tmpResult |= D61 ^ S61;
1553 tmpResult |= D62 ^ S62;
1554 tmpResult |= D63 ^ S63;
1556 if (tmpResult == 0xffffffff) return;
1558 const u32 slice = 31 - clz (~tmpResult);
1560 #include VECT_COMPARE_S
1564 // transpose bitslice base : easy because no overlapping buffers
1565 // mod : attention race conditions, need different buffers for *in and *out
1568 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_tb (__global pw_t *pws)
1570 const u32 gid = get_global_id (0);
1572 const u32 w0 = pws[gid].i[0];
1573 const u32 w1 = pws[gid].i[1];
1575 const u32 w0s = (w0 << 1) & 0xfefefefe;
1576 const u32 w1s = (w1 << 1) & 0xfefefefe;
1579 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1581 pws[gid].i[j + 0 + 0] = -((w0s >> (i + 7)) & 1);
1582 pws[gid].i[j + 0 + 1] = -((w0s >> (i + 6)) & 1);
1583 pws[gid].i[j + 0 + 2] = -((w0s >> (i + 5)) & 1);
1584 pws[gid].i[j + 0 + 3] = -((w0s >> (i + 4)) & 1);
1585 pws[gid].i[j + 0 + 4] = -((w0s >> (i + 3)) & 1);
1586 pws[gid].i[j + 0 + 5] = -((w0s >> (i + 2)) & 1);
1587 pws[gid].i[j + 0 + 6] = -((w0s >> (i + 1)) & 1);
1591 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1593 pws[gid].i[j + 28 + 0] = -((w1s >> (i + 7)) & 1);
1594 pws[gid].i[j + 28 + 1] = -((w1s >> (i + 6)) & 1);
1595 pws[gid].i[j + 28 + 2] = -((w1s >> (i + 5)) & 1);
1596 pws[gid].i[j + 28 + 3] = -((w1s >> (i + 4)) & 1);
1597 pws[gid].i[j + 28 + 4] = -((w1s >> (i + 3)) & 1);
1598 pws[gid].i[j + 28 + 5] = -((w1s >> (i + 2)) & 1);
1599 pws[gid].i[j + 28 + 6] = -((w1s >> (i + 1)) & 1);
1603 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
1605 const u32 gid = get_global_id (0);
1606 const u32 lid = get_local_id (0);
1608 const u32 block = gid / 32;
1609 const u32 slice = gid % 32;
1611 const u32 w0 = mod[gid];
1613 const u32 w0s = (w0 << 1) & 0xfefefefe;
1616 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1618 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
1619 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
1620 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
1621 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
1622 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
1623 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
1624 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
1628 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1634 const u32 gid = get_global_id (0);
1635 const u32 lid = get_local_id (0);
1636 const u32 vid = get_local_id (1);
1638 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1639 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1641 __local u32 s_S[64];
1645 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1649 s_S[32 + vid] = -((s1 >> vid) & 1);
1652 barrier (CLK_LOCAL_MEM_FENCE);
1654 if (gid >= gid_max) return;
1660 m01500m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1663 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1667 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1671 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1677 const u32 gid = get_global_id (0);
1678 const u32 lid = get_local_id (0);
1679 const u32 vid = get_local_id (1);
1681 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1682 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1684 __local u32 s_S[64];
1688 s_S[ 0 + vid] = -((s0 >> vid) & 1);
1692 s_S[32 + vid] = -((s1 >> vid) & 1);
1695 barrier (CLK_LOCAL_MEM_FENCE);
1697 if (gid >= gid_max) return;
1703 m01500s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1706 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1710 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)