2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 #define PERM_OP(a,b,tt,n,m) \
33 #define HPERM_OP(a,tt,n,m) \
39 tt = tt >> (16 + n); \
45 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
46 PERM_OP (l, r, tt, 16, 0x0000ffff); \
47 PERM_OP (r, l, tt, 2, 0x33333333); \
48 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
49 PERM_OP (r, l, tt, 1, 0x55555555); \
54 PERM_OP (l, r, tt, 1, 0x55555555); \
55 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
56 PERM_OP (l, r, tt, 2, 0x33333333); \
57 PERM_OP (r, l, tt, 16, 0x0000ffff); \
58 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
61 __constant u32 c_SPtrans[8][64] =
64 0x02080800, 0x00080000, 0x02000002, 0x02080802,
65 0x02000000, 0x00080802, 0x00080002, 0x02000002,
66 0x00080802, 0x02080800, 0x02080000, 0x00000802,
67 0x02000802, 0x02000000, 0x00000000, 0x00080002,
68 0x00080000, 0x00000002, 0x02000800, 0x00080800,
69 0x02080802, 0x02080000, 0x00000802, 0x02000800,
70 0x00000002, 0x00000800, 0x00080800, 0x02080002,
71 0x00000800, 0x02000802, 0x02080002, 0x00000000,
72 0x00000000, 0x02080802, 0x02000800, 0x00080002,
73 0x02080800, 0x00080000, 0x00000802, 0x02000800,
74 0x02080002, 0x00000800, 0x00080800, 0x02000002,
75 0x00080802, 0x00000002, 0x02000002, 0x02080000,
76 0x02080802, 0x00080800, 0x02080000, 0x02000802,
77 0x02000000, 0x00000802, 0x00080002, 0x00000000,
78 0x00080000, 0x02000000, 0x02000802, 0x02080800,
79 0x00000002, 0x02080002, 0x00000800, 0x00080802,
81 0x40108010, 0x00000000, 0x00108000, 0x40100000,
82 0x40000010, 0x00008010, 0x40008000, 0x00108000,
83 0x00008000, 0x40100010, 0x00000010, 0x40008000,
84 0x00100010, 0x40108000, 0x40100000, 0x00000010,
85 0x00100000, 0x40008010, 0x40100010, 0x00008000,
86 0x00108010, 0x40000000, 0x00000000, 0x00100010,
87 0x40008010, 0x00108010, 0x40108000, 0x40000010,
88 0x40000000, 0x00100000, 0x00008010, 0x40108010,
89 0x00100010, 0x40108000, 0x40008000, 0x00108010,
90 0x40108010, 0x00100010, 0x40000010, 0x00000000,
91 0x40000000, 0x00008010, 0x00100000, 0x40100010,
92 0x00008000, 0x40000000, 0x00108010, 0x40008010,
93 0x40108000, 0x00008000, 0x00000000, 0x40000010,
94 0x00000010, 0x40108010, 0x00108000, 0x40100000,
95 0x40100010, 0x00100000, 0x00008010, 0x40008000,
96 0x40008010, 0x00000010, 0x40100000, 0x00108000,
98 0x04000001, 0x04040100, 0x00000100, 0x04000101,
99 0x00040001, 0x04000000, 0x04000101, 0x00040100,
100 0x04000100, 0x00040000, 0x04040000, 0x00000001,
101 0x04040101, 0x00000101, 0x00000001, 0x04040001,
102 0x00000000, 0x00040001, 0x04040100, 0x00000100,
103 0x00000101, 0x04040101, 0x00040000, 0x04000001,
104 0x04040001, 0x04000100, 0x00040101, 0x04040000,
105 0x00040100, 0x00000000, 0x04000000, 0x00040101,
106 0x04040100, 0x00000100, 0x00000001, 0x00040000,
107 0x00000101, 0x00040001, 0x04040000, 0x04000101,
108 0x00000000, 0x04040100, 0x00040100, 0x04040001,
109 0x00040001, 0x04000000, 0x04040101, 0x00000001,
110 0x00040101, 0x04000001, 0x04000000, 0x04040101,
111 0x00040000, 0x04000100, 0x04000101, 0x00040100,
112 0x04000100, 0x00000000, 0x04040001, 0x00000101,
113 0x04000001, 0x00040101, 0x00000100, 0x04040000,
115 0x00401008, 0x10001000, 0x00000008, 0x10401008,
116 0x00000000, 0x10400000, 0x10001008, 0x00400008,
117 0x10401000, 0x10000008, 0x10000000, 0x00001008,
118 0x10000008, 0x00401008, 0x00400000, 0x10000000,
119 0x10400008, 0x00401000, 0x00001000, 0x00000008,
120 0x00401000, 0x10001008, 0x10400000, 0x00001000,
121 0x00001008, 0x00000000, 0x00400008, 0x10401000,
122 0x10001000, 0x10400008, 0x10401008, 0x00400000,
123 0x10400008, 0x00001008, 0x00400000, 0x10000008,
124 0x00401000, 0x10001000, 0x00000008, 0x10400000,
125 0x10001008, 0x00000000, 0x00001000, 0x00400008,
126 0x00000000, 0x10400008, 0x10401000, 0x00001000,
127 0x10000000, 0x10401008, 0x00401008, 0x00400000,
128 0x10401008, 0x00000008, 0x10001000, 0x00401008,
129 0x00400008, 0x00401000, 0x10400000, 0x10001008,
130 0x00001008, 0x10000000, 0x10000008, 0x10401000,
132 0x08000000, 0x00010000, 0x00000400, 0x08010420,
133 0x08010020, 0x08000400, 0x00010420, 0x08010000,
134 0x00010000, 0x00000020, 0x08000020, 0x00010400,
135 0x08000420, 0x08010020, 0x08010400, 0x00000000,
136 0x00010400, 0x08000000, 0x00010020, 0x00000420,
137 0x08000400, 0x00010420, 0x00000000, 0x08000020,
138 0x00000020, 0x08000420, 0x08010420, 0x00010020,
139 0x08010000, 0x00000400, 0x00000420, 0x08010400,
140 0x08010400, 0x08000420, 0x00010020, 0x08010000,
141 0x00010000, 0x00000020, 0x08000020, 0x08000400,
142 0x08000000, 0x00010400, 0x08010420, 0x00000000,
143 0x00010420, 0x08000000, 0x00000400, 0x00010020,
144 0x08000420, 0x00000400, 0x00000000, 0x08010420,
145 0x08010020, 0x08010400, 0x00000420, 0x00010000,
146 0x00010400, 0x08010020, 0x08000400, 0x00000420,
147 0x00000020, 0x00010420, 0x08010000, 0x08000020,
149 0x80000040, 0x00200040, 0x00000000, 0x80202000,
150 0x00200040, 0x00002000, 0x80002040, 0x00200000,
151 0x00002040, 0x80202040, 0x00202000, 0x80000000,
152 0x80002000, 0x80000040, 0x80200000, 0x00202040,
153 0x00200000, 0x80002040, 0x80200040, 0x00000000,
154 0x00002000, 0x00000040, 0x80202000, 0x80200040,
155 0x80202040, 0x80200000, 0x80000000, 0x00002040,
156 0x00000040, 0x00202000, 0x00202040, 0x80002000,
157 0x00002040, 0x80000000, 0x80002000, 0x00202040,
158 0x80202000, 0x00200040, 0x00000000, 0x80002000,
159 0x80000000, 0x00002000, 0x80200040, 0x00200000,
160 0x00200040, 0x80202040, 0x00202000, 0x00000040,
161 0x80202040, 0x00202000, 0x00200000, 0x80002040,
162 0x80000040, 0x80200000, 0x00202040, 0x00000000,
163 0x00002000, 0x80000040, 0x80002040, 0x80202000,
164 0x80200000, 0x00002040, 0x00000040, 0x80200040,
166 0x00004000, 0x00000200, 0x01000200, 0x01000004,
167 0x01004204, 0x00004004, 0x00004200, 0x00000000,
168 0x01000000, 0x01000204, 0x00000204, 0x01004000,
169 0x00000004, 0x01004200, 0x01004000, 0x00000204,
170 0x01000204, 0x00004000, 0x00004004, 0x01004204,
171 0x00000000, 0x01000200, 0x01000004, 0x00004200,
172 0x01004004, 0x00004204, 0x01004200, 0x00000004,
173 0x00004204, 0x01004004, 0x00000200, 0x01000000,
174 0x00004204, 0x01004000, 0x01004004, 0x00000204,
175 0x00004000, 0x00000200, 0x01000000, 0x01004004,
176 0x01000204, 0x00004204, 0x00004200, 0x00000000,
177 0x00000200, 0x01000004, 0x00000004, 0x01000200,
178 0x00000000, 0x01000204, 0x01000200, 0x00004200,
179 0x00000204, 0x00004000, 0x01004204, 0x01000000,
180 0x01004200, 0x00000004, 0x00004004, 0x01004204,
181 0x01000004, 0x01004200, 0x01004000, 0x00004004,
183 0x20800080, 0x20820000, 0x00020080, 0x00000000,
184 0x20020000, 0x00800080, 0x20800000, 0x20820080,
185 0x00000080, 0x20000000, 0x00820000, 0x00020080,
186 0x00820080, 0x20020080, 0x20000080, 0x20800000,
187 0x00020000, 0x00820080, 0x00800080, 0x20020000,
188 0x20820080, 0x20000080, 0x00000000, 0x00820000,
189 0x20000000, 0x00800000, 0x20020080, 0x20800080,
190 0x00800000, 0x00020000, 0x20820000, 0x00000080,
191 0x00800000, 0x00020000, 0x20000080, 0x20820080,
192 0x00020080, 0x20000000, 0x00000000, 0x00820000,
193 0x20800080, 0x20020080, 0x20020000, 0x00800080,
194 0x20820000, 0x00000080, 0x00800080, 0x20020000,
195 0x20820080, 0x00800000, 0x20800000, 0x20000080,
196 0x00820000, 0x00020080, 0x20020080, 0x20800000,
197 0x00000080, 0x20820000, 0x00820080, 0x00000000,
198 0x20000000, 0x20800080, 0x00020000, 0x00820080,
201 __constant u32 c_skb[8][64] =
203 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
204 0x00000000, 0x00000010, 0x20000000, 0x20000010,
205 0x00010000, 0x00010010, 0x20010000, 0x20010010,
206 0x00000800, 0x00000810, 0x20000800, 0x20000810,
207 0x00010800, 0x00010810, 0x20010800, 0x20010810,
208 0x00000020, 0x00000030, 0x20000020, 0x20000030,
209 0x00010020, 0x00010030, 0x20010020, 0x20010030,
210 0x00000820, 0x00000830, 0x20000820, 0x20000830,
211 0x00010820, 0x00010830, 0x20010820, 0x20010830,
212 0x00080000, 0x00080010, 0x20080000, 0x20080010,
213 0x00090000, 0x00090010, 0x20090000, 0x20090010,
214 0x00080800, 0x00080810, 0x20080800, 0x20080810,
215 0x00090800, 0x00090810, 0x20090800, 0x20090810,
216 0x00080020, 0x00080030, 0x20080020, 0x20080030,
217 0x00090020, 0x00090030, 0x20090020, 0x20090030,
218 0x00080820, 0x00080830, 0x20080820, 0x20080830,
219 0x00090820, 0x00090830, 0x20090820, 0x20090830,
220 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
221 0x00000000, 0x02000000, 0x00002000, 0x02002000,
222 0x00200000, 0x02200000, 0x00202000, 0x02202000,
223 0x00000004, 0x02000004, 0x00002004, 0x02002004,
224 0x00200004, 0x02200004, 0x00202004, 0x02202004,
225 0x00000400, 0x02000400, 0x00002400, 0x02002400,
226 0x00200400, 0x02200400, 0x00202400, 0x02202400,
227 0x00000404, 0x02000404, 0x00002404, 0x02002404,
228 0x00200404, 0x02200404, 0x00202404, 0x02202404,
229 0x10000000, 0x12000000, 0x10002000, 0x12002000,
230 0x10200000, 0x12200000, 0x10202000, 0x12202000,
231 0x10000004, 0x12000004, 0x10002004, 0x12002004,
232 0x10200004, 0x12200004, 0x10202004, 0x12202004,
233 0x10000400, 0x12000400, 0x10002400, 0x12002400,
234 0x10200400, 0x12200400, 0x10202400, 0x12202400,
235 0x10000404, 0x12000404, 0x10002404, 0x12002404,
236 0x10200404, 0x12200404, 0x10202404, 0x12202404,
237 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
238 0x00000000, 0x00000001, 0x00040000, 0x00040001,
239 0x01000000, 0x01000001, 0x01040000, 0x01040001,
240 0x00000002, 0x00000003, 0x00040002, 0x00040003,
241 0x01000002, 0x01000003, 0x01040002, 0x01040003,
242 0x00000200, 0x00000201, 0x00040200, 0x00040201,
243 0x01000200, 0x01000201, 0x01040200, 0x01040201,
244 0x00000202, 0x00000203, 0x00040202, 0x00040203,
245 0x01000202, 0x01000203, 0x01040202, 0x01040203,
246 0x08000000, 0x08000001, 0x08040000, 0x08040001,
247 0x09000000, 0x09000001, 0x09040000, 0x09040001,
248 0x08000002, 0x08000003, 0x08040002, 0x08040003,
249 0x09000002, 0x09000003, 0x09040002, 0x09040003,
250 0x08000200, 0x08000201, 0x08040200, 0x08040201,
251 0x09000200, 0x09000201, 0x09040200, 0x09040201,
252 0x08000202, 0x08000203, 0x08040202, 0x08040203,
253 0x09000202, 0x09000203, 0x09040202, 0x09040203,
254 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
255 0x00000000, 0x00100000, 0x00000100, 0x00100100,
256 0x00000008, 0x00100008, 0x00000108, 0x00100108,
257 0x00001000, 0x00101000, 0x00001100, 0x00101100,
258 0x00001008, 0x00101008, 0x00001108, 0x00101108,
259 0x04000000, 0x04100000, 0x04000100, 0x04100100,
260 0x04000008, 0x04100008, 0x04000108, 0x04100108,
261 0x04001000, 0x04101000, 0x04001100, 0x04101100,
262 0x04001008, 0x04101008, 0x04001108, 0x04101108,
263 0x00020000, 0x00120000, 0x00020100, 0x00120100,
264 0x00020008, 0x00120008, 0x00020108, 0x00120108,
265 0x00021000, 0x00121000, 0x00021100, 0x00121100,
266 0x00021008, 0x00121008, 0x00021108, 0x00121108,
267 0x04020000, 0x04120000, 0x04020100, 0x04120100,
268 0x04020008, 0x04120008, 0x04020108, 0x04120108,
269 0x04021000, 0x04121000, 0x04021100, 0x04121100,
270 0x04021008, 0x04121008, 0x04021108, 0x04121108,
271 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
272 0x00000000, 0x10000000, 0x00010000, 0x10010000,
273 0x00000004, 0x10000004, 0x00010004, 0x10010004,
274 0x20000000, 0x30000000, 0x20010000, 0x30010000,
275 0x20000004, 0x30000004, 0x20010004, 0x30010004,
276 0x00100000, 0x10100000, 0x00110000, 0x10110000,
277 0x00100004, 0x10100004, 0x00110004, 0x10110004,
278 0x20100000, 0x30100000, 0x20110000, 0x30110000,
279 0x20100004, 0x30100004, 0x20110004, 0x30110004,
280 0x00001000, 0x10001000, 0x00011000, 0x10011000,
281 0x00001004, 0x10001004, 0x00011004, 0x10011004,
282 0x20001000, 0x30001000, 0x20011000, 0x30011000,
283 0x20001004, 0x30001004, 0x20011004, 0x30011004,
284 0x00101000, 0x10101000, 0x00111000, 0x10111000,
285 0x00101004, 0x10101004, 0x00111004, 0x10111004,
286 0x20101000, 0x30101000, 0x20111000, 0x30111000,
287 0x20101004, 0x30101004, 0x20111004, 0x30111004,
288 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
289 0x00000000, 0x08000000, 0x00000008, 0x08000008,
290 0x00000400, 0x08000400, 0x00000408, 0x08000408,
291 0x00020000, 0x08020000, 0x00020008, 0x08020008,
292 0x00020400, 0x08020400, 0x00020408, 0x08020408,
293 0x00000001, 0x08000001, 0x00000009, 0x08000009,
294 0x00000401, 0x08000401, 0x00000409, 0x08000409,
295 0x00020001, 0x08020001, 0x00020009, 0x08020009,
296 0x00020401, 0x08020401, 0x00020409, 0x08020409,
297 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
298 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
299 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
300 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
301 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
302 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
303 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
304 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
305 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
306 0x00000000, 0x00000100, 0x00080000, 0x00080100,
307 0x01000000, 0x01000100, 0x01080000, 0x01080100,
308 0x00000010, 0x00000110, 0x00080010, 0x00080110,
309 0x01000010, 0x01000110, 0x01080010, 0x01080110,
310 0x00200000, 0x00200100, 0x00280000, 0x00280100,
311 0x01200000, 0x01200100, 0x01280000, 0x01280100,
312 0x00200010, 0x00200110, 0x00280010, 0x00280110,
313 0x01200010, 0x01200110, 0x01280010, 0x01280110,
314 0x00000200, 0x00000300, 0x00080200, 0x00080300,
315 0x01000200, 0x01000300, 0x01080200, 0x01080300,
316 0x00000210, 0x00000310, 0x00080210, 0x00080310,
317 0x01000210, 0x01000310, 0x01080210, 0x01080310,
318 0x00200200, 0x00200300, 0x00280200, 0x00280300,
319 0x01200200, 0x01200300, 0x01280200, 0x01280300,
320 0x00200210, 0x00200310, 0x00280210, 0x00280310,
321 0x01200210, 0x01200310, 0x01280210, 0x01280310,
322 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
323 0x00000000, 0x04000000, 0x00040000, 0x04040000,
324 0x00000002, 0x04000002, 0x00040002, 0x04040002,
325 0x00002000, 0x04002000, 0x00042000, 0x04042000,
326 0x00002002, 0x04002002, 0x00042002, 0x04042002,
327 0x00000020, 0x04000020, 0x00040020, 0x04040020,
328 0x00000022, 0x04000022, 0x00040022, 0x04040022,
329 0x00002020, 0x04002020, 0x00042020, 0x04042020,
330 0x00002022, 0x04002022, 0x00042022, 0x04042022,
331 0x00000800, 0x04000800, 0x00040800, 0x04040800,
332 0x00000802, 0x04000802, 0x00040802, 0x04040802,
333 0x00002800, 0x04002800, 0x00042800, 0x04042800,
334 0x00002802, 0x04002802, 0x00042802, 0x04042802,
335 0x00000820, 0x04000820, 0x00040820, 0x04040820,
336 0x00000822, 0x04000822, 0x00040822, 0x04040822,
337 0x00002820, 0x04002820, 0x00042820, 0x04042820,
338 0x00002822, 0x04002822, 0x00042822, 0x04042822
341 __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
342 __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
344 #define BOX(i,n,S) (S)[(n)][(i)]
346 static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64])
359 for (u32 i = 0; i < 16; i += 2)
365 t = Kd[i + 0] ^ rotl32 (r, 28u);
368 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
369 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
370 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
371 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
372 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
373 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
374 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
375 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
378 t = Kd[i + 1] ^ rotl32 (l, 28u);
381 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
382 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
383 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
384 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
385 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
386 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
387 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
388 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
400 static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64])
404 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
405 HPERM_OP (c, tt, 2, 0xcccc0000);
406 HPERM_OP (d, tt, 2, 0xcccc0000);
407 PERM_OP (d, c, tt, 1, 0x55555555);
408 PERM_OP (c, d, tt, 8, 0x00ff00ff);
409 PERM_OP (d, c, tt, 1, 0x55555555);
411 d = ((d & 0x000000ff) << 16)
412 | ((d & 0x0000ff00) << 0)
413 | ((d & 0x00ff0000) >> 16)
414 | ((c & 0xf0000000) >> 4);
419 for (u32 i = 0; i < 16; i++)
421 c = c >> shifts3s0[i] | c << shifts3s1[i];
422 d = d >> shifts3s0[i] | d << shifts3s1[i];
427 u32 s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
428 | BOX ((((c >> 6) & 0x03)
429 | ((c >> 7) & 0x3c)), 1, s_skb)
430 | BOX ((((c >> 13) & 0x0f)
431 | ((c >> 14) & 0x30)), 2, s_skb)
432 | BOX ((((c >> 20) & 0x01)
434 | ((c >> 22) & 0x38)), 3, s_skb);
436 u32 t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
437 | BOX ((((d >> 7) & 0x03)
438 | ((d >> 8) & 0x3c)), 5, s_skb)
439 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
440 | BOX ((((d >> 21) & 0x0f)
441 | ((d >> 22) & 0x30)), 7, s_skb);
443 #if defined cl_amd_media_ops
444 Kc[i] = amd_bytealign (t, s << 16, 2);
445 Kd[i] = amd_bytealign (t >> 16, s, 2);
447 Kc[i] = ((t << 16) | (s & 0x0000ffff));
448 Kd[i] = ((s >> 16) | (t & 0xffff0000));
451 Kc[i] = rotl32 (Kc[i], 2u);
452 Kd[i] = rotl32 (Kd[i], 2u);
456 static void overwrite_at (u32 sw[16], const u32 w0, const u32 salt_len)
458 #if defined cl_amd_media_ops
463 case 1: sw[0] = amd_bytealign (w0, sw[0] << 24, 3);
464 sw[1] = amd_bytealign (sw[1] >> 8, w0, 3);
466 case 2: sw[0] = amd_bytealign (w0, sw[0] << 16, 2);
467 sw[1] = amd_bytealign (sw[1] >> 16, w0, 2);
469 case 3: sw[0] = amd_bytealign (w0, sw[0] << 8, 1);
470 sw[1] = amd_bytealign (sw[1] >> 24, w0, 1);
474 case 5: sw[1] = amd_bytealign (w0, sw[1] << 24, 3);
475 sw[2] = amd_bytealign (sw[2] >> 8, w0, 3);
477 case 6: sw[1] = amd_bytealign (w0, sw[1] << 16, 2);
478 sw[2] = amd_bytealign (sw[2] >> 16, w0, 2);
480 case 7: sw[1] = amd_bytealign (w0, sw[1] << 8, 1);
481 sw[2] = amd_bytealign (sw[2] >> 24, w0, 1);
485 case 9: sw[2] = amd_bytealign (w0, sw[2] << 24, 3);
486 sw[3] = amd_bytealign (sw[3] >> 8, w0, 3);
488 case 10: sw[2] = amd_bytealign (w0, sw[2] << 16, 2);
489 sw[3] = amd_bytealign (sw[3] >> 16, w0, 2);
491 case 11: sw[2] = amd_bytealign (w0, sw[2] << 8, 1);
492 sw[3] = amd_bytealign (sw[3] >> 24, w0, 1);
496 case 13: sw[3] = amd_bytealign (w0, sw[3] << 24, 3);
497 sw[4] = amd_bytealign (sw[4] >> 8, w0, 3);
499 case 14: sw[3] = amd_bytealign (w0, sw[3] << 16, 2);
500 sw[4] = amd_bytealign (sw[4] >> 16, w0, 2);
502 case 15: sw[3] = amd_bytealign (w0, sw[3] << 8, 1);
503 sw[4] = amd_bytealign (sw[4] >> 24, w0, 1);
507 case 17: sw[4] = amd_bytealign (w0, sw[4] << 24, 3);
508 sw[5] = amd_bytealign (sw[5] >> 8, w0, 3);
510 case 18: sw[4] = amd_bytealign (w0, sw[4] << 16, 2);
511 sw[5] = amd_bytealign (sw[5] >> 16, w0, 2);
513 case 19: sw[4] = amd_bytealign (w0, sw[4] << 8, 1);
514 sw[5] = amd_bytealign (sw[5] >> 24, w0, 1);
518 case 21: sw[5] = amd_bytealign (w0, sw[5] << 24, 3);
519 sw[6] = amd_bytealign (sw[6] >> 8, w0, 3);
521 case 22: sw[5] = amd_bytealign (w0, sw[5] << 16, 2);
522 sw[6] = amd_bytealign (sw[6] >> 16, w0, 2);
524 case 23: sw[5] = amd_bytealign (w0, sw[5] << 8, 1);
525 sw[6] = amd_bytealign (sw[6] >> 24, w0, 1);
529 case 25: sw[6] = amd_bytealign (w0, sw[6] << 24, 3);
530 sw[7] = amd_bytealign (sw[7] >> 8, w0, 3);
532 case 26: sw[6] = amd_bytealign (w0, sw[6] << 16, 2);
533 sw[7] = amd_bytealign (sw[7] >> 16, w0, 2);
535 case 27: sw[6] = amd_bytealign (w0, sw[6] << 8, 1);
536 sw[7] = amd_bytealign (sw[7] >> 24, w0, 1);
540 case 29: sw[7] = amd_bytealign (w0, sw[7] << 24, 3);
541 sw[8] = amd_bytealign (sw[8] >> 8, w0, 3);
543 case 30: sw[7] = amd_bytealign (w0, sw[7] << 16, 2);
544 sw[8] = amd_bytealign (sw[8] >> 16, w0, 2);
546 case 31: sw[7] = amd_bytealign (w0, sw[7] << 8, 1);
547 sw[8] = amd_bytealign (sw[8] >> 24, w0, 1);
555 case 1: sw[0] = (sw[0] & 0x000000ff) | (w0 << 8);
556 sw[1] = (sw[1] & 0xffffff00) | (w0 >> 24);
558 case 2: sw[0] = (sw[0] & 0x0000ffff) | (w0 << 16);
559 sw[1] = (sw[1] & 0xffff0000) | (w0 >> 16);
561 case 3: sw[0] = (sw[0] & 0x00ffffff) | (w0 << 24);
562 sw[1] = (sw[1] & 0xff000000) | (w0 >> 8);
566 case 5: sw[1] = (sw[1] & 0x000000ff) | (w0 << 8);
567 sw[2] = (sw[2] & 0xffffff00) | (w0 >> 24);
569 case 6: sw[1] = (sw[1] & 0x0000ffff) | (w0 << 16);
570 sw[2] = (sw[2] & 0xffff0000) | (w0 >> 16);
572 case 7: sw[1] = (sw[1] & 0x00ffffff) | (w0 << 24);
573 sw[2] = (sw[2] & 0xff000000) | (w0 >> 8);
577 case 9: sw[2] = (sw[2] & 0x000000ff) | (w0 << 8);
578 sw[3] = (sw[3] & 0xffffff00) | (w0 >> 24);
580 case 10: sw[2] = (sw[2] & 0x0000ffff) | (w0 << 16);
581 sw[3] = (sw[3] & 0xffff0000) | (w0 >> 16);
583 case 11: sw[2] = (sw[2] & 0x00ffffff) | (w0 << 24);
584 sw[3] = (sw[3] & 0xff000000) | (w0 >> 8);
588 case 13: sw[3] = (sw[3] & 0x000000ff) | (w0 << 8);
589 sw[4] = (sw[4] & 0xffffff00) | (w0 >> 24);
591 case 14: sw[3] = (sw[3] & 0x0000ffff) | (w0 << 16);
592 sw[4] = (sw[4] & 0xffff0000) | (w0 >> 16);
594 case 15: sw[3] = (sw[3] & 0x00ffffff) | (w0 << 24);
595 sw[4] = (sw[4] & 0xff000000) | (w0 >> 8);
599 case 17: sw[4] = (sw[4] & 0x000000ff) | (w0 << 8);
600 sw[5] = (sw[5] & 0xffffff00) | (w0 >> 24);
602 case 18: sw[4] = (sw[4] & 0x0000ffff) | (w0 << 16);
603 sw[5] = (sw[5] & 0xffff0000) | (w0 >> 16);
605 case 19: sw[4] = (sw[4] & 0x00ffffff) | (w0 << 24);
606 sw[5] = (sw[5] & 0xff000000) | (w0 >> 8);
610 case 21: sw[5] = (sw[5] & 0x000000ff) | (w0 << 8);
611 sw[6] = (sw[6] & 0xffffff00) | (w0 >> 24);
613 case 22: sw[5] = (sw[5] & 0x0000ffff) | (w0 << 16);
614 sw[6] = (sw[6] & 0xffff0000) | (w0 >> 16);
616 case 23: sw[5] = (sw[5] & 0x00ffffff) | (w0 << 24);
617 sw[6] = (sw[6] & 0xff000000) | (w0 >> 8);
621 case 25: sw[6] = (sw[6] & 0x000000ff) | (w0 << 8);
622 sw[7] = (sw[7] & 0xffffff00) | (w0 >> 24);
624 case 26: sw[6] = (sw[6] & 0x0000ffff) | (w0 << 16);
625 sw[7] = (sw[7] & 0xffff0000) | (w0 >> 16);
627 case 27: sw[6] = (sw[6] & 0x00ffffff) | (w0 << 24);
628 sw[7] = (sw[7] & 0xff000000) | (w0 >> 8);
632 case 29: sw[7] = (sw[7] & 0x000000ff) | (w0 << 8);
633 sw[8] = (sw[8] & 0xffffff00) | (w0 >> 24);
635 case 30: sw[7] = (sw[7] & 0x0000ffff) | (w0 << 16);
636 sw[8] = (sw[8] & 0xffff0000) | (w0 >> 16);
638 case 31: sw[7] = (sw[7] & 0x00ffffff) | (w0 << 24);
639 sw[8] = (sw[8] & 0xff000000) | (w0 >> 8);
645 static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
651 const u32 gid = get_global_id (0);
652 const u32 lid = get_local_id (0);
660 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
661 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
662 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
663 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
667 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
668 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
669 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
670 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
679 const u32 salt_len = salt_bufs[salt_pos].salt_len;
681 const u32 salt_word_len = (salt_len + pw_len) * 2;
709 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
711 w0_t[0] |= salt_buf0[0];
712 w0_t[1] |= salt_buf0[1];
713 w0_t[2] |= salt_buf0[2];
714 w0_t[3] |= salt_buf0[3];
715 w1_t[0] |= salt_buf1[0];
716 w1_t[1] |= salt_buf1[1];
717 w1_t[2] |= salt_buf1[2];
718 w1_t[3] |= salt_buf1[3];
719 w2_t[0] |= salt_buf2[0];
720 w2_t[1] |= salt_buf2[1];
721 w2_t[2] |= salt_buf2[2];
722 w2_t[3] |= salt_buf2[3];
753 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
755 const u32 w0r = words_buf_r[il_pos];
757 const u32 w0 = w0l | w0r;
759 overwrite_at (dst, w0, salt_len);
762 * precompute key1 since key is static: 0x0123456789abcdef
763 * plus LEFT_ROTATE by 2
805 * key1 (generate key)
813 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
817 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
818 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
823 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
827 * key2 (generate hash)
830 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
835 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
839 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
840 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
845 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
852 const u32 r0 = iv[0];
853 const u32 r1 = iv[1];
861 static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
867 const u32 gid = get_global_id (0);
868 const u32 lid = get_local_id (0);
876 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
877 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
878 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
879 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
883 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
884 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
885 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
886 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
895 const u32 salt_len = salt_bufs[salt_pos].salt_len;
897 const u32 salt_word_len = (salt_len + pw_len) * 2;
925 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
927 w0_t[0] |= salt_buf0[0];
928 w0_t[1] |= salt_buf0[1];
929 w0_t[2] |= salt_buf0[2];
930 w0_t[3] |= salt_buf0[3];
931 w1_t[0] |= salt_buf1[0];
932 w1_t[1] |= salt_buf1[1];
933 w1_t[2] |= salt_buf1[2];
934 w1_t[3] |= salt_buf1[3];
935 w2_t[0] |= salt_buf2[0];
936 w2_t[1] |= salt_buf2[1];
937 w2_t[2] |= salt_buf2[2];
938 w2_t[3] |= salt_buf2[3];
967 const u32 search[4] =
969 digests_buf[digests_offset].digest_buf[DGST_R0],
970 digests_buf[digests_offset].digest_buf[DGST_R1],
971 digests_buf[digests_offset].digest_buf[DGST_R2],
972 digests_buf[digests_offset].digest_buf[DGST_R3]
981 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
983 const u32 w0r = words_buf_r[il_pos];
985 const u32 w0 = w0l | w0r;
987 overwrite_at (dst, w0, salt_len);
990 * precompute key1 since key is static: 0x0123456789abcdef
991 * plus LEFT_ROTATE by 2
1000 Kc[ 4] = 0xd8a004f0;
1001 Kc[ 5] = 0xa8f02810;
1002 Kc[ 6] = 0xc84048d8;
1003 Kc[ 7] = 0x68d804a8;
1004 Kc[ 8] = 0x0490e40c;
1005 Kc[ 9] = 0xac183024;
1006 Kc[10] = 0x24c07c10;
1007 Kc[11] = 0x8c88c038;
1008 Kc[12] = 0xc048c824;
1009 Kc[13] = 0x4c0470a8;
1010 Kc[14] = 0x584020b4;
1011 Kc[15] = 0x00742c4c;
1015 Kd[ 0] = 0xa42ce40c;
1016 Kd[ 1] = 0x64689858;
1017 Kd[ 2] = 0x484050b8;
1018 Kd[ 3] = 0xe8184814;
1019 Kd[ 4] = 0x405cc070;
1020 Kd[ 5] = 0xa010784c;
1021 Kd[ 6] = 0x6074a800;
1022 Kd[ 7] = 0x80701c1c;
1023 Kd[ 8] = 0x9cd49430;
1024 Kd[ 9] = 0x4c8ce078;
1025 Kd[10] = 0x5c18c088;
1026 Kd[11] = 0x28a8a4c8;
1027 Kd[12] = 0x3c180838;
1028 Kd[13] = 0xb0b86c20;
1029 Kd[14] = 0xac84a094;
1030 Kd[15] = 0x4ce0c0c4;
1033 * key1 (generate key)
1041 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1045 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1046 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1051 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1055 * key2 (generate hash)
1058 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1063 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1067 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1068 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1073 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1080 const u32 r0 = iv[0];
1081 const u32 r1 = iv[1];
1089 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1091 __local u32 s_SPtrans[8][64];
1093 __local u32 s_skb[8][64];
1099 const u32 gid = get_global_id (0);
1100 const u32 lid = get_local_id (0);
1104 w[ 0] = pws[gid].i[ 0];
1105 w[ 1] = pws[gid].i[ 1];
1106 w[ 2] = pws[gid].i[ 2];
1107 w[ 3] = pws[gid].i[ 3];
1121 const u32 pw_len = pws[gid].pw_len;
1127 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1128 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1129 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1130 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1131 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1132 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1133 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1134 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1136 s_skb[0][lid] = c_skb[0][lid];
1137 s_skb[1][lid] = c_skb[1][lid];
1138 s_skb[2][lid] = c_skb[2][lid];
1139 s_skb[3][lid] = c_skb[3][lid];
1140 s_skb[4][lid] = c_skb[4][lid];
1141 s_skb[5][lid] = c_skb[5][lid];
1142 s_skb[6][lid] = c_skb[6][lid];
1143 s_skb[7][lid] = c_skb[7][lid];
1145 barrier (CLK_LOCAL_MEM_FENCE);
1147 if (gid >= gid_max) return;
1153 m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1156 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1158 __local u32 s_SPtrans[8][64];
1160 __local u32 s_skb[8][64];
1166 const u32 gid = get_global_id (0);
1167 const u32 lid = get_local_id (0);
1171 w[ 0] = pws[gid].i[ 0];
1172 w[ 1] = pws[gid].i[ 1];
1173 w[ 2] = pws[gid].i[ 2];
1174 w[ 3] = pws[gid].i[ 3];
1175 w[ 4] = pws[gid].i[ 4];
1176 w[ 5] = pws[gid].i[ 5];
1177 w[ 6] = pws[gid].i[ 6];
1178 w[ 7] = pws[gid].i[ 7];
1188 const u32 pw_len = pws[gid].pw_len;
1194 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1195 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1196 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1197 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1198 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1199 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1200 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1201 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1203 s_skb[0][lid] = c_skb[0][lid];
1204 s_skb[1][lid] = c_skb[1][lid];
1205 s_skb[2][lid] = c_skb[2][lid];
1206 s_skb[3][lid] = c_skb[3][lid];
1207 s_skb[4][lid] = c_skb[4][lid];
1208 s_skb[5][lid] = c_skb[5][lid];
1209 s_skb[6][lid] = c_skb[6][lid];
1210 s_skb[7][lid] = c_skb[7][lid];
1212 barrier (CLK_LOCAL_MEM_FENCE);
1214 if (gid >= gid_max) return;
1220 m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1223 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1227 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1229 __local u32 s_SPtrans[8][64];
1231 __local u32 s_skb[8][64];
1237 const u32 gid = get_global_id (0);
1238 const u32 lid = get_local_id (0);
1242 w[ 0] = pws[gid].i[ 0];
1243 w[ 1] = pws[gid].i[ 1];
1244 w[ 2] = pws[gid].i[ 2];
1245 w[ 3] = pws[gid].i[ 3];
1259 const u32 pw_len = pws[gid].pw_len;
1265 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1266 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1267 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1268 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1269 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1270 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1271 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1272 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1274 s_skb[0][lid] = c_skb[0][lid];
1275 s_skb[1][lid] = c_skb[1][lid];
1276 s_skb[2][lid] = c_skb[2][lid];
1277 s_skb[3][lid] = c_skb[3][lid];
1278 s_skb[4][lid] = c_skb[4][lid];
1279 s_skb[5][lid] = c_skb[5][lid];
1280 s_skb[6][lid] = c_skb[6][lid];
1281 s_skb[7][lid] = c_skb[7][lid];
1283 barrier (CLK_LOCAL_MEM_FENCE);
1285 if (gid >= gid_max) return;
1291 m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1294 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1296 __local u32 s_SPtrans[8][64];
1298 __local u32 s_skb[8][64];
1304 const u32 gid = get_global_id (0);
1305 const u32 lid = get_local_id (0);
1309 w[ 0] = pws[gid].i[ 0];
1310 w[ 1] = pws[gid].i[ 1];
1311 w[ 2] = pws[gid].i[ 2];
1312 w[ 3] = pws[gid].i[ 3];
1313 w[ 4] = pws[gid].i[ 4];
1314 w[ 5] = pws[gid].i[ 5];
1315 w[ 6] = pws[gid].i[ 6];
1316 w[ 7] = pws[gid].i[ 7];
1326 const u32 pw_len = pws[gid].pw_len;
1332 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1333 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1334 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1335 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1336 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1337 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1338 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1339 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1341 s_skb[0][lid] = c_skb[0][lid];
1342 s_skb[1][lid] = c_skb[1][lid];
1343 s_skb[2][lid] = c_skb[2][lid];
1344 s_skb[3][lid] = c_skb[3][lid];
1345 s_skb[4][lid] = c_skb[4][lid];
1346 s_skb[5][lid] = c_skb[5][lid];
1347 s_skb[6][lid] = c_skb[6][lid];
1348 s_skb[7][lid] = c_skb[7][lid];
1350 barrier (CLK_LOCAL_MEM_FENCE);
1352 if (gid >= gid_max) return;
1358 m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1361 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)