2 * Author......: Jens Steube <jens.steube@gmail.com>
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
29 #include "include/kernel_functions.c"
30 #include "types_amd.c"
31 #include "common_amd.c"
34 #define VECT_COMPARE_S "check_single_vect1_comp4_warp.c"
35 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c"
39 #define VECT_COMPARE_S "check_single_vect2_comp4_warp.c"
40 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c"
44 #define VECT_COMPARE_S "check_single_vect4_comp4_warp.c"
45 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c"
48 #define PERM_OP(a,b,tt,n,m) \
58 #define HPERM_OP(a,tt,n,m) \
64 tt = tt >> (16 + n); \
70 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
71 PERM_OP (l, r, tt, 16, 0x0000ffff); \
72 PERM_OP (r, l, tt, 2, 0x33333333); \
73 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
74 PERM_OP (r, l, tt, 1, 0x55555555); \
79 PERM_OP (l, r, tt, 1, 0x55555555); \
80 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
81 PERM_OP (l, r, tt, 2, 0x33333333); \
82 PERM_OP (r, l, tt, 16, 0x0000ffff); \
83 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
86 __constant u32 c_SPtrans[8][64] =
89 0x02080800, 0x00080000, 0x02000002, 0x02080802,
90 0x02000000, 0x00080802, 0x00080002, 0x02000002,
91 0x00080802, 0x02080800, 0x02080000, 0x00000802,
92 0x02000802, 0x02000000, 0x00000000, 0x00080002,
93 0x00080000, 0x00000002, 0x02000800, 0x00080800,
94 0x02080802, 0x02080000, 0x00000802, 0x02000800,
95 0x00000002, 0x00000800, 0x00080800, 0x02080002,
96 0x00000800, 0x02000802, 0x02080002, 0x00000000,
97 0x00000000, 0x02080802, 0x02000800, 0x00080002,
98 0x02080800, 0x00080000, 0x00000802, 0x02000800,
99 0x02080002, 0x00000800, 0x00080800, 0x02000002,
100 0x00080802, 0x00000002, 0x02000002, 0x02080000,
101 0x02080802, 0x00080800, 0x02080000, 0x02000802,
102 0x02000000, 0x00000802, 0x00080002, 0x00000000,
103 0x00080000, 0x02000000, 0x02000802, 0x02080800,
104 0x00000002, 0x02080002, 0x00000800, 0x00080802,
106 0x40108010, 0x00000000, 0x00108000, 0x40100000,
107 0x40000010, 0x00008010, 0x40008000, 0x00108000,
108 0x00008000, 0x40100010, 0x00000010, 0x40008000,
109 0x00100010, 0x40108000, 0x40100000, 0x00000010,
110 0x00100000, 0x40008010, 0x40100010, 0x00008000,
111 0x00108010, 0x40000000, 0x00000000, 0x00100010,
112 0x40008010, 0x00108010, 0x40108000, 0x40000010,
113 0x40000000, 0x00100000, 0x00008010, 0x40108010,
114 0x00100010, 0x40108000, 0x40008000, 0x00108010,
115 0x40108010, 0x00100010, 0x40000010, 0x00000000,
116 0x40000000, 0x00008010, 0x00100000, 0x40100010,
117 0x00008000, 0x40000000, 0x00108010, 0x40008010,
118 0x40108000, 0x00008000, 0x00000000, 0x40000010,
119 0x00000010, 0x40108010, 0x00108000, 0x40100000,
120 0x40100010, 0x00100000, 0x00008010, 0x40008000,
121 0x40008010, 0x00000010, 0x40100000, 0x00108000,
123 0x04000001, 0x04040100, 0x00000100, 0x04000101,
124 0x00040001, 0x04000000, 0x04000101, 0x00040100,
125 0x04000100, 0x00040000, 0x04040000, 0x00000001,
126 0x04040101, 0x00000101, 0x00000001, 0x04040001,
127 0x00000000, 0x00040001, 0x04040100, 0x00000100,
128 0x00000101, 0x04040101, 0x00040000, 0x04000001,
129 0x04040001, 0x04000100, 0x00040101, 0x04040000,
130 0x00040100, 0x00000000, 0x04000000, 0x00040101,
131 0x04040100, 0x00000100, 0x00000001, 0x00040000,
132 0x00000101, 0x00040001, 0x04040000, 0x04000101,
133 0x00000000, 0x04040100, 0x00040100, 0x04040001,
134 0x00040001, 0x04000000, 0x04040101, 0x00000001,
135 0x00040101, 0x04000001, 0x04000000, 0x04040101,
136 0x00040000, 0x04000100, 0x04000101, 0x00040100,
137 0x04000100, 0x00000000, 0x04040001, 0x00000101,
138 0x04000001, 0x00040101, 0x00000100, 0x04040000,
140 0x00401008, 0x10001000, 0x00000008, 0x10401008,
141 0x00000000, 0x10400000, 0x10001008, 0x00400008,
142 0x10401000, 0x10000008, 0x10000000, 0x00001008,
143 0x10000008, 0x00401008, 0x00400000, 0x10000000,
144 0x10400008, 0x00401000, 0x00001000, 0x00000008,
145 0x00401000, 0x10001008, 0x10400000, 0x00001000,
146 0x00001008, 0x00000000, 0x00400008, 0x10401000,
147 0x10001000, 0x10400008, 0x10401008, 0x00400000,
148 0x10400008, 0x00001008, 0x00400000, 0x10000008,
149 0x00401000, 0x10001000, 0x00000008, 0x10400000,
150 0x10001008, 0x00000000, 0x00001000, 0x00400008,
151 0x00000000, 0x10400008, 0x10401000, 0x00001000,
152 0x10000000, 0x10401008, 0x00401008, 0x00400000,
153 0x10401008, 0x00000008, 0x10001000, 0x00401008,
154 0x00400008, 0x00401000, 0x10400000, 0x10001008,
155 0x00001008, 0x10000000, 0x10000008, 0x10401000,
157 0x08000000, 0x00010000, 0x00000400, 0x08010420,
158 0x08010020, 0x08000400, 0x00010420, 0x08010000,
159 0x00010000, 0x00000020, 0x08000020, 0x00010400,
160 0x08000420, 0x08010020, 0x08010400, 0x00000000,
161 0x00010400, 0x08000000, 0x00010020, 0x00000420,
162 0x08000400, 0x00010420, 0x00000000, 0x08000020,
163 0x00000020, 0x08000420, 0x08010420, 0x00010020,
164 0x08010000, 0x00000400, 0x00000420, 0x08010400,
165 0x08010400, 0x08000420, 0x00010020, 0x08010000,
166 0x00010000, 0x00000020, 0x08000020, 0x08000400,
167 0x08000000, 0x00010400, 0x08010420, 0x00000000,
168 0x00010420, 0x08000000, 0x00000400, 0x00010020,
169 0x08000420, 0x00000400, 0x00000000, 0x08010420,
170 0x08010020, 0x08010400, 0x00000420, 0x00010000,
171 0x00010400, 0x08010020, 0x08000400, 0x00000420,
172 0x00000020, 0x00010420, 0x08010000, 0x08000020,
174 0x80000040, 0x00200040, 0x00000000, 0x80202000,
175 0x00200040, 0x00002000, 0x80002040, 0x00200000,
176 0x00002040, 0x80202040, 0x00202000, 0x80000000,
177 0x80002000, 0x80000040, 0x80200000, 0x00202040,
178 0x00200000, 0x80002040, 0x80200040, 0x00000000,
179 0x00002000, 0x00000040, 0x80202000, 0x80200040,
180 0x80202040, 0x80200000, 0x80000000, 0x00002040,
181 0x00000040, 0x00202000, 0x00202040, 0x80002000,
182 0x00002040, 0x80000000, 0x80002000, 0x00202040,
183 0x80202000, 0x00200040, 0x00000000, 0x80002000,
184 0x80000000, 0x00002000, 0x80200040, 0x00200000,
185 0x00200040, 0x80202040, 0x00202000, 0x00000040,
186 0x80202040, 0x00202000, 0x00200000, 0x80002040,
187 0x80000040, 0x80200000, 0x00202040, 0x00000000,
188 0x00002000, 0x80000040, 0x80002040, 0x80202000,
189 0x80200000, 0x00002040, 0x00000040, 0x80200040,
191 0x00004000, 0x00000200, 0x01000200, 0x01000004,
192 0x01004204, 0x00004004, 0x00004200, 0x00000000,
193 0x01000000, 0x01000204, 0x00000204, 0x01004000,
194 0x00000004, 0x01004200, 0x01004000, 0x00000204,
195 0x01000204, 0x00004000, 0x00004004, 0x01004204,
196 0x00000000, 0x01000200, 0x01000004, 0x00004200,
197 0x01004004, 0x00004204, 0x01004200, 0x00000004,
198 0x00004204, 0x01004004, 0x00000200, 0x01000000,
199 0x00004204, 0x01004000, 0x01004004, 0x00000204,
200 0x00004000, 0x00000200, 0x01000000, 0x01004004,
201 0x01000204, 0x00004204, 0x00004200, 0x00000000,
202 0x00000200, 0x01000004, 0x00000004, 0x01000200,
203 0x00000000, 0x01000204, 0x01000200, 0x00004200,
204 0x00000204, 0x00004000, 0x01004204, 0x01000000,
205 0x01004200, 0x00000004, 0x00004004, 0x01004204,
206 0x01000004, 0x01004200, 0x01004000, 0x00004004,
208 0x20800080, 0x20820000, 0x00020080, 0x00000000,
209 0x20020000, 0x00800080, 0x20800000, 0x20820080,
210 0x00000080, 0x20000000, 0x00820000, 0x00020080,
211 0x00820080, 0x20020080, 0x20000080, 0x20800000,
212 0x00020000, 0x00820080, 0x00800080, 0x20020000,
213 0x20820080, 0x20000080, 0x00000000, 0x00820000,
214 0x20000000, 0x00800000, 0x20020080, 0x20800080,
215 0x00800000, 0x00020000, 0x20820000, 0x00000080,
216 0x00800000, 0x00020000, 0x20000080, 0x20820080,
217 0x00020080, 0x20000000, 0x00000000, 0x00820000,
218 0x20800080, 0x20020080, 0x20020000, 0x00800080,
219 0x20820000, 0x00000080, 0x00800080, 0x20020000,
220 0x20820080, 0x00800000, 0x20800000, 0x20000080,
221 0x00820000, 0x00020080, 0x20020080, 0x20800000,
222 0x00000080, 0x20820000, 0x00820080, 0x00000000,
223 0x20000000, 0x20800080, 0x00020000, 0x00820080,
226 __constant u32 c_skb[8][64] =
228 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
229 0x00000000, 0x00000010, 0x20000000, 0x20000010,
230 0x00010000, 0x00010010, 0x20010000, 0x20010010,
231 0x00000800, 0x00000810, 0x20000800, 0x20000810,
232 0x00010800, 0x00010810, 0x20010800, 0x20010810,
233 0x00000020, 0x00000030, 0x20000020, 0x20000030,
234 0x00010020, 0x00010030, 0x20010020, 0x20010030,
235 0x00000820, 0x00000830, 0x20000820, 0x20000830,
236 0x00010820, 0x00010830, 0x20010820, 0x20010830,
237 0x00080000, 0x00080010, 0x20080000, 0x20080010,
238 0x00090000, 0x00090010, 0x20090000, 0x20090010,
239 0x00080800, 0x00080810, 0x20080800, 0x20080810,
240 0x00090800, 0x00090810, 0x20090800, 0x20090810,
241 0x00080020, 0x00080030, 0x20080020, 0x20080030,
242 0x00090020, 0x00090030, 0x20090020, 0x20090030,
243 0x00080820, 0x00080830, 0x20080820, 0x20080830,
244 0x00090820, 0x00090830, 0x20090820, 0x20090830,
245 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
246 0x00000000, 0x02000000, 0x00002000, 0x02002000,
247 0x00200000, 0x02200000, 0x00202000, 0x02202000,
248 0x00000004, 0x02000004, 0x00002004, 0x02002004,
249 0x00200004, 0x02200004, 0x00202004, 0x02202004,
250 0x00000400, 0x02000400, 0x00002400, 0x02002400,
251 0x00200400, 0x02200400, 0x00202400, 0x02202400,
252 0x00000404, 0x02000404, 0x00002404, 0x02002404,
253 0x00200404, 0x02200404, 0x00202404, 0x02202404,
254 0x10000000, 0x12000000, 0x10002000, 0x12002000,
255 0x10200000, 0x12200000, 0x10202000, 0x12202000,
256 0x10000004, 0x12000004, 0x10002004, 0x12002004,
257 0x10200004, 0x12200004, 0x10202004, 0x12202004,
258 0x10000400, 0x12000400, 0x10002400, 0x12002400,
259 0x10200400, 0x12200400, 0x10202400, 0x12202400,
260 0x10000404, 0x12000404, 0x10002404, 0x12002404,
261 0x10200404, 0x12200404, 0x10202404, 0x12202404,
262 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
263 0x00000000, 0x00000001, 0x00040000, 0x00040001,
264 0x01000000, 0x01000001, 0x01040000, 0x01040001,
265 0x00000002, 0x00000003, 0x00040002, 0x00040003,
266 0x01000002, 0x01000003, 0x01040002, 0x01040003,
267 0x00000200, 0x00000201, 0x00040200, 0x00040201,
268 0x01000200, 0x01000201, 0x01040200, 0x01040201,
269 0x00000202, 0x00000203, 0x00040202, 0x00040203,
270 0x01000202, 0x01000203, 0x01040202, 0x01040203,
271 0x08000000, 0x08000001, 0x08040000, 0x08040001,
272 0x09000000, 0x09000001, 0x09040000, 0x09040001,
273 0x08000002, 0x08000003, 0x08040002, 0x08040003,
274 0x09000002, 0x09000003, 0x09040002, 0x09040003,
275 0x08000200, 0x08000201, 0x08040200, 0x08040201,
276 0x09000200, 0x09000201, 0x09040200, 0x09040201,
277 0x08000202, 0x08000203, 0x08040202, 0x08040203,
278 0x09000202, 0x09000203, 0x09040202, 0x09040203,
279 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
280 0x00000000, 0x00100000, 0x00000100, 0x00100100,
281 0x00000008, 0x00100008, 0x00000108, 0x00100108,
282 0x00001000, 0x00101000, 0x00001100, 0x00101100,
283 0x00001008, 0x00101008, 0x00001108, 0x00101108,
284 0x04000000, 0x04100000, 0x04000100, 0x04100100,
285 0x04000008, 0x04100008, 0x04000108, 0x04100108,
286 0x04001000, 0x04101000, 0x04001100, 0x04101100,
287 0x04001008, 0x04101008, 0x04001108, 0x04101108,
288 0x00020000, 0x00120000, 0x00020100, 0x00120100,
289 0x00020008, 0x00120008, 0x00020108, 0x00120108,
290 0x00021000, 0x00121000, 0x00021100, 0x00121100,
291 0x00021008, 0x00121008, 0x00021108, 0x00121108,
292 0x04020000, 0x04120000, 0x04020100, 0x04120100,
293 0x04020008, 0x04120008, 0x04020108, 0x04120108,
294 0x04021000, 0x04121000, 0x04021100, 0x04121100,
295 0x04021008, 0x04121008, 0x04021108, 0x04121108,
296 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
297 0x00000000, 0x10000000, 0x00010000, 0x10010000,
298 0x00000004, 0x10000004, 0x00010004, 0x10010004,
299 0x20000000, 0x30000000, 0x20010000, 0x30010000,
300 0x20000004, 0x30000004, 0x20010004, 0x30010004,
301 0x00100000, 0x10100000, 0x00110000, 0x10110000,
302 0x00100004, 0x10100004, 0x00110004, 0x10110004,
303 0x20100000, 0x30100000, 0x20110000, 0x30110000,
304 0x20100004, 0x30100004, 0x20110004, 0x30110004,
305 0x00001000, 0x10001000, 0x00011000, 0x10011000,
306 0x00001004, 0x10001004, 0x00011004, 0x10011004,
307 0x20001000, 0x30001000, 0x20011000, 0x30011000,
308 0x20001004, 0x30001004, 0x20011004, 0x30011004,
309 0x00101000, 0x10101000, 0x00111000, 0x10111000,
310 0x00101004, 0x10101004, 0x00111004, 0x10111004,
311 0x20101000, 0x30101000, 0x20111000, 0x30111000,
312 0x20101004, 0x30101004, 0x20111004, 0x30111004,
313 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
314 0x00000000, 0x08000000, 0x00000008, 0x08000008,
315 0x00000400, 0x08000400, 0x00000408, 0x08000408,
316 0x00020000, 0x08020000, 0x00020008, 0x08020008,
317 0x00020400, 0x08020400, 0x00020408, 0x08020408,
318 0x00000001, 0x08000001, 0x00000009, 0x08000009,
319 0x00000401, 0x08000401, 0x00000409, 0x08000409,
320 0x00020001, 0x08020001, 0x00020009, 0x08020009,
321 0x00020401, 0x08020401, 0x00020409, 0x08020409,
322 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
323 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
324 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
325 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
326 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
327 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
328 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
329 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
330 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
331 0x00000000, 0x00000100, 0x00080000, 0x00080100,
332 0x01000000, 0x01000100, 0x01080000, 0x01080100,
333 0x00000010, 0x00000110, 0x00080010, 0x00080110,
334 0x01000010, 0x01000110, 0x01080010, 0x01080110,
335 0x00200000, 0x00200100, 0x00280000, 0x00280100,
336 0x01200000, 0x01200100, 0x01280000, 0x01280100,
337 0x00200010, 0x00200110, 0x00280010, 0x00280110,
338 0x01200010, 0x01200110, 0x01280010, 0x01280110,
339 0x00000200, 0x00000300, 0x00080200, 0x00080300,
340 0x01000200, 0x01000300, 0x01080200, 0x01080300,
341 0x00000210, 0x00000310, 0x00080210, 0x00080310,
342 0x01000210, 0x01000310, 0x01080210, 0x01080310,
343 0x00200200, 0x00200300, 0x00280200, 0x00280300,
344 0x01200200, 0x01200300, 0x01280200, 0x01280300,
345 0x00200210, 0x00200310, 0x00280210, 0x00280310,
346 0x01200210, 0x01200310, 0x01280210, 0x01280310,
347 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
348 0x00000000, 0x04000000, 0x00040000, 0x04040000,
349 0x00000002, 0x04000002, 0x00040002, 0x04040002,
350 0x00002000, 0x04002000, 0x00042000, 0x04042000,
351 0x00002002, 0x04002002, 0x00042002, 0x04042002,
352 0x00000020, 0x04000020, 0x00040020, 0x04040020,
353 0x00000022, 0x04000022, 0x00040022, 0x04040022,
354 0x00002020, 0x04002020, 0x00042020, 0x04042020,
355 0x00002022, 0x04002022, 0x00042022, 0x04042022,
356 0x00000800, 0x04000800, 0x00040800, 0x04040800,
357 0x00000802, 0x04000802, 0x00040802, 0x04040802,
358 0x00002800, 0x04002800, 0x00042800, 0x04042800,
359 0x00002802, 0x04002802, 0x00042802, 0x04042802,
360 0x00000820, 0x04000820, 0x00040820, 0x04040820,
361 0x00000822, 0x04000822, 0x00040822, 0x04040822,
362 0x00002820, 0x04002820, 0x00042820, 0x04042820,
363 0x00002822, 0x04002822, 0x00042822, 0x04042822
366 __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
367 __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
370 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
374 #define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
378 #define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
381 static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64])
394 for (u32 i = 0; i < 16; i += 2)
400 t = Kd[i + 0] ^ rotl32 (r, 28u);
403 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
404 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
405 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
406 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
407 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
408 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
409 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
410 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
413 t = Kd[i + 1] ^ rotl32 (l, 28u);
416 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
417 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
418 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
419 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
420 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
421 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
422 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
423 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
435 static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64])
439 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
440 HPERM_OP (c, tt, 2, 0xcccc0000);
441 HPERM_OP (d, tt, 2, 0xcccc0000);
442 PERM_OP (d, c, tt, 1, 0x55555555);
443 PERM_OP (c, d, tt, 8, 0x00ff00ff);
444 PERM_OP (d, c, tt, 1, 0x55555555);
446 d = ((d & 0x000000ff) << 16)
447 | ((d & 0x0000ff00) << 0)
448 | ((d & 0x00ff0000) >> 16)
449 | ((c & 0xf0000000) >> 4);
454 for (u32 i = 0; i < 16; i++)
456 c = c >> shifts3s0[i] | c << shifts3s1[i];
457 d = d >> shifts3s0[i] | d << shifts3s1[i];
462 u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
463 | BOX ((((c >> 6) & 0x03)
464 | ((c >> 7) & 0x3c)), 1, s_skb)
465 | BOX ((((c >> 13) & 0x0f)
466 | ((c >> 14) & 0x30)), 2, s_skb)
467 | BOX ((((c >> 20) & 0x01)
469 | ((c >> 22) & 0x38)), 3, s_skb);
471 u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
472 | BOX ((((d >> 7) & 0x03)
473 | ((d >> 8) & 0x3c)), 5, s_skb)
474 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
475 | BOX ((((d >> 21) & 0x0f)
476 | ((d >> 22) & 0x30)), 7, s_skb);
478 #if defined cl_amd_media_ops
479 Kc[i] = amd_bytealign (t, s << 16, 2);
480 Kd[i] = amd_bytealign (t >> 16, s, 2);
482 Kc[i] = ((t << 16) | (s & 0x0000ffff));
483 Kd[i] = ((s >> 16) | (t & 0xffff0000));
486 Kc[i] = rotl32 (Kc[i], 2u);
487 Kd[i] = rotl32 (Kd[i], 2u);
491 static void overwrite_at (u32x sw[16], const u32x w0, const u32 salt_len)
493 #if defined cl_amd_media_ops
498 case 1: sw[0] = amd_bytealign (w0, sw[0] << 24, 3);
499 sw[1] = amd_bytealign (sw[1] >> 8, w0, 3);
501 case 2: sw[0] = amd_bytealign (w0, sw[0] << 16, 2);
502 sw[1] = amd_bytealign (sw[1] >> 16, w0, 2);
504 case 3: sw[0] = amd_bytealign (w0, sw[0] << 8, 1);
505 sw[1] = amd_bytealign (sw[1] >> 24, w0, 1);
509 case 5: sw[1] = amd_bytealign (w0, sw[1] << 24, 3);
510 sw[2] = amd_bytealign (sw[2] >> 8, w0, 3);
512 case 6: sw[1] = amd_bytealign (w0, sw[1] << 16, 2);
513 sw[2] = amd_bytealign (sw[2] >> 16, w0, 2);
515 case 7: sw[1] = amd_bytealign (w0, sw[1] << 8, 1);
516 sw[2] = amd_bytealign (sw[2] >> 24, w0, 1);
520 case 9: sw[2] = amd_bytealign (w0, sw[2] << 24, 3);
521 sw[3] = amd_bytealign (sw[3] >> 8, w0, 3);
523 case 10: sw[2] = amd_bytealign (w0, sw[2] << 16, 2);
524 sw[3] = amd_bytealign (sw[3] >> 16, w0, 2);
526 case 11: sw[2] = amd_bytealign (w0, sw[2] << 8, 1);
527 sw[3] = amd_bytealign (sw[3] >> 24, w0, 1);
531 case 13: sw[3] = amd_bytealign (w0, sw[3] << 24, 3);
532 sw[4] = amd_bytealign (sw[4] >> 8, w0, 3);
534 case 14: sw[3] = amd_bytealign (w0, sw[3] << 16, 2);
535 sw[4] = amd_bytealign (sw[4] >> 16, w0, 2);
537 case 15: sw[3] = amd_bytealign (w0, sw[3] << 8, 1);
538 sw[4] = amd_bytealign (sw[4] >> 24, w0, 1);
542 case 17: sw[4] = amd_bytealign (w0, sw[4] << 24, 3);
543 sw[5] = amd_bytealign (sw[5] >> 8, w0, 3);
545 case 18: sw[4] = amd_bytealign (w0, sw[4] << 16, 2);
546 sw[5] = amd_bytealign (sw[5] >> 16, w0, 2);
548 case 19: sw[4] = amd_bytealign (w0, sw[4] << 8, 1);
549 sw[5] = amd_bytealign (sw[5] >> 24, w0, 1);
553 case 21: sw[5] = amd_bytealign (w0, sw[5] << 24, 3);
554 sw[6] = amd_bytealign (sw[6] >> 8, w0, 3);
556 case 22: sw[5] = amd_bytealign (w0, sw[5] << 16, 2);
557 sw[6] = amd_bytealign (sw[6] >> 16, w0, 2);
559 case 23: sw[5] = amd_bytealign (w0, sw[5] << 8, 1);
560 sw[6] = amd_bytealign (sw[6] >> 24, w0, 1);
564 case 25: sw[6] = amd_bytealign (w0, sw[6] << 24, 3);
565 sw[7] = amd_bytealign (sw[7] >> 8, w0, 3);
567 case 26: sw[6] = amd_bytealign (w0, sw[6] << 16, 2);
568 sw[7] = amd_bytealign (sw[7] >> 16, w0, 2);
570 case 27: sw[6] = amd_bytealign (w0, sw[6] << 8, 1);
571 sw[7] = amd_bytealign (sw[7] >> 24, w0, 1);
575 case 29: sw[7] = amd_bytealign (w0, sw[7] << 24, 3);
576 sw[8] = amd_bytealign (sw[8] >> 8, w0, 3);
578 case 30: sw[7] = amd_bytealign (w0, sw[7] << 16, 2);
579 sw[8] = amd_bytealign (sw[8] >> 16, w0, 2);
581 case 31: sw[7] = amd_bytealign (w0, sw[7] << 8, 1);
582 sw[8] = amd_bytealign (sw[8] >> 24, w0, 1);
590 case 1: sw[0] = (sw[0] & 0x000000ff) | (w0 << 8);
591 sw[1] = (sw[1] & 0xffffff00) | (w0 >> 24);
593 case 2: sw[0] = (sw[0] & 0x0000ffff) | (w0 << 16);
594 sw[1] = (sw[1] & 0xffff0000) | (w0 >> 16);
596 case 3: sw[0] = (sw[0] & 0x00ffffff) | (w0 << 24);
597 sw[1] = (sw[1] & 0xff000000) | (w0 >> 8);
601 case 5: sw[1] = (sw[1] & 0x000000ff) | (w0 << 8);
602 sw[2] = (sw[2] & 0xffffff00) | (w0 >> 24);
604 case 6: sw[1] = (sw[1] & 0x0000ffff) | (w0 << 16);
605 sw[2] = (sw[2] & 0xffff0000) | (w0 >> 16);
607 case 7: sw[1] = (sw[1] & 0x00ffffff) | (w0 << 24);
608 sw[2] = (sw[2] & 0xff000000) | (w0 >> 8);
612 case 9: sw[2] = (sw[2] & 0x000000ff) | (w0 << 8);
613 sw[3] = (sw[3] & 0xffffff00) | (w0 >> 24);
615 case 10: sw[2] = (sw[2] & 0x0000ffff) | (w0 << 16);
616 sw[3] = (sw[3] & 0xffff0000) | (w0 >> 16);
618 case 11: sw[2] = (sw[2] & 0x00ffffff) | (w0 << 24);
619 sw[3] = (sw[3] & 0xff000000) | (w0 >> 8);
623 case 13: sw[3] = (sw[3] & 0x000000ff) | (w0 << 8);
624 sw[4] = (sw[4] & 0xffffff00) | (w0 >> 24);
626 case 14: sw[3] = (sw[3] & 0x0000ffff) | (w0 << 16);
627 sw[4] = (sw[4] & 0xffff0000) | (w0 >> 16);
629 case 15: sw[3] = (sw[3] & 0x00ffffff) | (w0 << 24);
630 sw[4] = (sw[4] & 0xff000000) | (w0 >> 8);
634 case 17: sw[4] = (sw[4] & 0x000000ff) | (w0 << 8);
635 sw[5] = (sw[5] & 0xffffff00) | (w0 >> 24);
637 case 18: sw[4] = (sw[4] & 0x0000ffff) | (w0 << 16);
638 sw[5] = (sw[5] & 0xffff0000) | (w0 >> 16);
640 case 19: sw[4] = (sw[4] & 0x00ffffff) | (w0 << 24);
641 sw[5] = (sw[5] & 0xff000000) | (w0 >> 8);
645 case 21: sw[5] = (sw[5] & 0x000000ff) | (w0 << 8);
646 sw[6] = (sw[6] & 0xffffff00) | (w0 >> 24);
648 case 22: sw[5] = (sw[5] & 0x0000ffff) | (w0 << 16);
649 sw[6] = (sw[6] & 0xffff0000) | (w0 >> 16);
651 case 23: sw[5] = (sw[5] & 0x00ffffff) | (w0 << 24);
652 sw[6] = (sw[6] & 0xff000000) | (w0 >> 8);
656 case 25: sw[6] = (sw[6] & 0x000000ff) | (w0 << 8);
657 sw[7] = (sw[7] & 0xffffff00) | (w0 >> 24);
659 case 26: sw[6] = (sw[6] & 0x0000ffff) | (w0 << 16);
660 sw[7] = (sw[7] & 0xffff0000) | (w0 >> 16);
662 case 27: sw[6] = (sw[6] & 0x00ffffff) | (w0 << 24);
663 sw[7] = (sw[7] & 0xff000000) | (w0 >> 8);
667 case 29: sw[7] = (sw[7] & 0x000000ff) | (w0 << 8);
668 sw[8] = (sw[8] & 0xffffff00) | (w0 >> 24);
670 case 30: sw[7] = (sw[7] & 0x0000ffff) | (w0 << 16);
671 sw[8] = (sw[8] & 0xffff0000) | (w0 >> 16);
673 case 31: sw[7] = (sw[7] & 0x00ffffff) | (w0 << 24);
674 sw[8] = (sw[8] & 0xff000000) | (w0 >> 8);
680 static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
686 const u32 gid = get_global_id (0);
687 const u32 lid = get_local_id (0);
695 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
696 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
697 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
698 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
702 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
703 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
704 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
705 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
714 const u32 salt_len = salt_bufs[salt_pos].salt_len;
716 const u32 salt_word_len = (salt_len + pw_len) * 2;
744 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
746 w0_t[0] |= salt_buf0[0];
747 w0_t[1] |= salt_buf0[1];
748 w0_t[2] |= salt_buf0[2];
749 w0_t[3] |= salt_buf0[3];
750 w1_t[0] |= salt_buf1[0];
751 w1_t[1] |= salt_buf1[1];
752 w1_t[2] |= salt_buf1[2];
753 w1_t[3] |= salt_buf1[3];
754 w2_t[0] |= salt_buf2[0];
755 w2_t[1] |= salt_buf2[1];
756 w2_t[2] |= salt_buf2[2];
757 w2_t[3] |= salt_buf2[3];
786 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
790 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
792 const u32x w0r = words_buf_r[il_pos];
794 const u32x w0 = w0l | w0r;
796 overwrite_at (dst, w0, salt_len);
799 * precompute key1 since key is static: 0x0123456789abcdef
800 * plus LEFT_ROTATE by 2
842 * key1 (generate key)
850 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
854 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
855 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
860 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
864 * key2 (generate hash)
867 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
872 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
876 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
877 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
882 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
889 const u32x r0 = iv[0];
890 const u32x r1 = iv[1];
894 #include VECT_COMPARE_M
898 static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
904 const u32 gid = get_global_id (0);
905 const u32 lid = get_local_id (0);
913 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
914 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
915 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
916 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
920 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
921 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
922 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
923 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
932 const u32 salt_len = salt_bufs[salt_pos].salt_len;
934 const u32 salt_word_len = (salt_len + pw_len) * 2;
962 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
964 w0_t[0] |= salt_buf0[0];
965 w0_t[1] |= salt_buf0[1];
966 w0_t[2] |= salt_buf0[2];
967 w0_t[3] |= salt_buf0[3];
968 w1_t[0] |= salt_buf1[0];
969 w1_t[1] |= salt_buf1[1];
970 w1_t[2] |= salt_buf1[2];
971 w1_t[3] |= salt_buf1[3];
972 w2_t[0] |= salt_buf2[0];
973 w2_t[1] |= salt_buf2[1];
974 w2_t[2] |= salt_buf2[2];
975 w2_t[3] |= salt_buf2[3];
1004 const u32 search[4] =
1006 digests_buf[digests_offset].digest_buf[DGST_R0],
1007 digests_buf[digests_offset].digest_buf[DGST_R1],
1008 digests_buf[digests_offset].digest_buf[DGST_R2],
1009 digests_buf[digests_offset].digest_buf[DGST_R3]
1016 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
1020 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
1022 const u32x w0r = words_buf_r[il_pos];
1024 const u32x w0 = w0l | w0r;
1026 overwrite_at (dst, w0, salt_len);
1029 * precompute key1 since key is static: 0x0123456789abcdef
1030 * plus LEFT_ROTATE by 2
1035 Kc[ 0] = 0x64649040;
1036 Kc[ 1] = 0x14909858;
1037 Kc[ 2] = 0xc4b44888;
1038 Kc[ 3] = 0x9094e438;
1039 Kc[ 4] = 0xd8a004f0;
1040 Kc[ 5] = 0xa8f02810;
1041 Kc[ 6] = 0xc84048d8;
1042 Kc[ 7] = 0x68d804a8;
1043 Kc[ 8] = 0x0490e40c;
1044 Kc[ 9] = 0xac183024;
1045 Kc[10] = 0x24c07c10;
1046 Kc[11] = 0x8c88c038;
1047 Kc[12] = 0xc048c824;
1048 Kc[13] = 0x4c0470a8;
1049 Kc[14] = 0x584020b4;
1050 Kc[15] = 0x00742c4c;
1054 Kd[ 0] = 0xa42ce40c;
1055 Kd[ 1] = 0x64689858;
1056 Kd[ 2] = 0x484050b8;
1057 Kd[ 3] = 0xe8184814;
1058 Kd[ 4] = 0x405cc070;
1059 Kd[ 5] = 0xa010784c;
1060 Kd[ 6] = 0x6074a800;
1061 Kd[ 7] = 0x80701c1c;
1062 Kd[ 8] = 0x9cd49430;
1063 Kd[ 9] = 0x4c8ce078;
1064 Kd[10] = 0x5c18c088;
1065 Kd[11] = 0x28a8a4c8;
1066 Kd[12] = 0x3c180838;
1067 Kd[13] = 0xb0b86c20;
1068 Kd[14] = 0xac84a094;
1069 Kd[15] = 0x4ce0c0c4;
1072 * key1 (generate key)
1080 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1084 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1085 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1090 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1094 * key2 (generate hash)
1097 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1102 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1106 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1107 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1112 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1119 const u32x r0 = iv[0];
1120 const u32x r1 = iv[1];
1124 #include VECT_COMPARE_S
1128 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1130 __local u32 s_SPtrans[8][64];
1132 __local u32 s_skb[8][64];
1138 const u32 gid = get_global_id (0);
1139 const u32 lid = get_local_id (0);
1143 w[ 0] = pws[gid].i[ 0];
1144 w[ 1] = pws[gid].i[ 1];
1145 w[ 2] = pws[gid].i[ 2];
1146 w[ 3] = pws[gid].i[ 3];
1160 const u32 pw_len = pws[gid].pw_len;
1166 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1167 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1168 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1169 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1170 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1171 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1172 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1173 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1175 s_skb[0][lid] = c_skb[0][lid];
1176 s_skb[1][lid] = c_skb[1][lid];
1177 s_skb[2][lid] = c_skb[2][lid];
1178 s_skb[3][lid] = c_skb[3][lid];
1179 s_skb[4][lid] = c_skb[4][lid];
1180 s_skb[5][lid] = c_skb[5][lid];
1181 s_skb[6][lid] = c_skb[6][lid];
1182 s_skb[7][lid] = c_skb[7][lid];
1184 barrier (CLK_LOCAL_MEM_FENCE);
1186 if (gid >= gid_max) return;
1192 m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1195 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1197 __local u32 s_SPtrans[8][64];
1199 __local u32 s_skb[8][64];
1205 const u32 gid = get_global_id (0);
1206 const u32 lid = get_local_id (0);
1210 w[ 0] = pws[gid].i[ 0];
1211 w[ 1] = pws[gid].i[ 1];
1212 w[ 2] = pws[gid].i[ 2];
1213 w[ 3] = pws[gid].i[ 3];
1214 w[ 4] = pws[gid].i[ 4];
1215 w[ 5] = pws[gid].i[ 5];
1216 w[ 6] = pws[gid].i[ 6];
1217 w[ 7] = pws[gid].i[ 7];
1227 const u32 pw_len = pws[gid].pw_len;
1233 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1234 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1235 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1236 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1237 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1238 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1239 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1240 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1242 s_skb[0][lid] = c_skb[0][lid];
1243 s_skb[1][lid] = c_skb[1][lid];
1244 s_skb[2][lid] = c_skb[2][lid];
1245 s_skb[3][lid] = c_skb[3][lid];
1246 s_skb[4][lid] = c_skb[4][lid];
1247 s_skb[5][lid] = c_skb[5][lid];
1248 s_skb[6][lid] = c_skb[6][lid];
1249 s_skb[7][lid] = c_skb[7][lid];
1251 barrier (CLK_LOCAL_MEM_FENCE);
1253 if (gid >= gid_max) return;
1259 m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1262 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1266 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1268 __local u32 s_SPtrans[8][64];
1270 __local u32 s_skb[8][64];
1276 const u32 gid = get_global_id (0);
1277 const u32 lid = get_local_id (0);
1281 w[ 0] = pws[gid].i[ 0];
1282 w[ 1] = pws[gid].i[ 1];
1283 w[ 2] = pws[gid].i[ 2];
1284 w[ 3] = pws[gid].i[ 3];
1298 const u32 pw_len = pws[gid].pw_len;
1304 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1305 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1306 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1307 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1308 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1309 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1310 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1311 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1313 s_skb[0][lid] = c_skb[0][lid];
1314 s_skb[1][lid] = c_skb[1][lid];
1315 s_skb[2][lid] = c_skb[2][lid];
1316 s_skb[3][lid] = c_skb[3][lid];
1317 s_skb[4][lid] = c_skb[4][lid];
1318 s_skb[5][lid] = c_skb[5][lid];
1319 s_skb[6][lid] = c_skb[6][lid];
1320 s_skb[7][lid] = c_skb[7][lid];
1322 barrier (CLK_LOCAL_MEM_FENCE);
1324 if (gid >= gid_max) return;
1330 m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1333 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1335 __local u32 s_SPtrans[8][64];
1337 __local u32 s_skb[8][64];
1343 const u32 gid = get_global_id (0);
1344 const u32 lid = get_local_id (0);
1348 w[ 0] = pws[gid].i[ 0];
1349 w[ 1] = pws[gid].i[ 1];
1350 w[ 2] = pws[gid].i[ 2];
1351 w[ 3] = pws[gid].i[ 3];
1352 w[ 4] = pws[gid].i[ 4];
1353 w[ 5] = pws[gid].i[ 5];
1354 w[ 6] = pws[gid].i[ 6];
1355 w[ 7] = pws[gid].i[ 7];
1365 const u32 pw_len = pws[gid].pw_len;
1371 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1372 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1373 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1374 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1375 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1376 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1377 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1378 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1380 s_skb[0][lid] = c_skb[0][lid];
1381 s_skb[1][lid] = c_skb[1][lid];
1382 s_skb[2][lid] = c_skb[2][lid];
1383 s_skb[3][lid] = c_skb[3][lid];
1384 s_skb[4][lid] = c_skb[4][lid];
1385 s_skb[5][lid] = c_skb[5][lid];
1386 s_skb[6][lid] = c_skb[6][lid];
1387 s_skb[7][lid] = c_skb[7][lid];
1389 barrier (CLK_LOCAL_MEM_FENCE);
1391 if (gid >= gid_max) return;
1397 m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1400 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)