2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 #define PERM_OP(a,b,tt,n,m) \
33 #define HPERM_OP(a,tt,n,m) \
39 tt = tt >> (16 + n); \
45 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
46 PERM_OP (l, r, tt, 16, 0x0000ffff); \
47 PERM_OP (r, l, tt, 2, 0x33333333); \
48 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
49 PERM_OP (r, l, tt, 1, 0x55555555); \
54 PERM_OP (l, r, tt, 1, 0x55555555); \
55 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
56 PERM_OP (l, r, tt, 2, 0x33333333); \
57 PERM_OP (r, l, tt, 16, 0x0000ffff); \
58 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
61 __constant u32 c_SPtrans[8][64] =
64 0x02080800, 0x00080000, 0x02000002, 0x02080802,
65 0x02000000, 0x00080802, 0x00080002, 0x02000002,
66 0x00080802, 0x02080800, 0x02080000, 0x00000802,
67 0x02000802, 0x02000000, 0x00000000, 0x00080002,
68 0x00080000, 0x00000002, 0x02000800, 0x00080800,
69 0x02080802, 0x02080000, 0x00000802, 0x02000800,
70 0x00000002, 0x00000800, 0x00080800, 0x02080002,
71 0x00000800, 0x02000802, 0x02080002, 0x00000000,
72 0x00000000, 0x02080802, 0x02000800, 0x00080002,
73 0x02080800, 0x00080000, 0x00000802, 0x02000800,
74 0x02080002, 0x00000800, 0x00080800, 0x02000002,
75 0x00080802, 0x00000002, 0x02000002, 0x02080000,
76 0x02080802, 0x00080800, 0x02080000, 0x02000802,
77 0x02000000, 0x00000802, 0x00080002, 0x00000000,
78 0x00080000, 0x02000000, 0x02000802, 0x02080800,
79 0x00000002, 0x02080002, 0x00000800, 0x00080802,
81 0x40108010, 0x00000000, 0x00108000, 0x40100000,
82 0x40000010, 0x00008010, 0x40008000, 0x00108000,
83 0x00008000, 0x40100010, 0x00000010, 0x40008000,
84 0x00100010, 0x40108000, 0x40100000, 0x00000010,
85 0x00100000, 0x40008010, 0x40100010, 0x00008000,
86 0x00108010, 0x40000000, 0x00000000, 0x00100010,
87 0x40008010, 0x00108010, 0x40108000, 0x40000010,
88 0x40000000, 0x00100000, 0x00008010, 0x40108010,
89 0x00100010, 0x40108000, 0x40008000, 0x00108010,
90 0x40108010, 0x00100010, 0x40000010, 0x00000000,
91 0x40000000, 0x00008010, 0x00100000, 0x40100010,
92 0x00008000, 0x40000000, 0x00108010, 0x40008010,
93 0x40108000, 0x00008000, 0x00000000, 0x40000010,
94 0x00000010, 0x40108010, 0x00108000, 0x40100000,
95 0x40100010, 0x00100000, 0x00008010, 0x40008000,
96 0x40008010, 0x00000010, 0x40100000, 0x00108000,
98 0x04000001, 0x04040100, 0x00000100, 0x04000101,
99 0x00040001, 0x04000000, 0x04000101, 0x00040100,
100 0x04000100, 0x00040000, 0x04040000, 0x00000001,
101 0x04040101, 0x00000101, 0x00000001, 0x04040001,
102 0x00000000, 0x00040001, 0x04040100, 0x00000100,
103 0x00000101, 0x04040101, 0x00040000, 0x04000001,
104 0x04040001, 0x04000100, 0x00040101, 0x04040000,
105 0x00040100, 0x00000000, 0x04000000, 0x00040101,
106 0x04040100, 0x00000100, 0x00000001, 0x00040000,
107 0x00000101, 0x00040001, 0x04040000, 0x04000101,
108 0x00000000, 0x04040100, 0x00040100, 0x04040001,
109 0x00040001, 0x04000000, 0x04040101, 0x00000001,
110 0x00040101, 0x04000001, 0x04000000, 0x04040101,
111 0x00040000, 0x04000100, 0x04000101, 0x00040100,
112 0x04000100, 0x00000000, 0x04040001, 0x00000101,
113 0x04000001, 0x00040101, 0x00000100, 0x04040000,
115 0x00401008, 0x10001000, 0x00000008, 0x10401008,
116 0x00000000, 0x10400000, 0x10001008, 0x00400008,
117 0x10401000, 0x10000008, 0x10000000, 0x00001008,
118 0x10000008, 0x00401008, 0x00400000, 0x10000000,
119 0x10400008, 0x00401000, 0x00001000, 0x00000008,
120 0x00401000, 0x10001008, 0x10400000, 0x00001000,
121 0x00001008, 0x00000000, 0x00400008, 0x10401000,
122 0x10001000, 0x10400008, 0x10401008, 0x00400000,
123 0x10400008, 0x00001008, 0x00400000, 0x10000008,
124 0x00401000, 0x10001000, 0x00000008, 0x10400000,
125 0x10001008, 0x00000000, 0x00001000, 0x00400008,
126 0x00000000, 0x10400008, 0x10401000, 0x00001000,
127 0x10000000, 0x10401008, 0x00401008, 0x00400000,
128 0x10401008, 0x00000008, 0x10001000, 0x00401008,
129 0x00400008, 0x00401000, 0x10400000, 0x10001008,
130 0x00001008, 0x10000000, 0x10000008, 0x10401000,
132 0x08000000, 0x00010000, 0x00000400, 0x08010420,
133 0x08010020, 0x08000400, 0x00010420, 0x08010000,
134 0x00010000, 0x00000020, 0x08000020, 0x00010400,
135 0x08000420, 0x08010020, 0x08010400, 0x00000000,
136 0x00010400, 0x08000000, 0x00010020, 0x00000420,
137 0x08000400, 0x00010420, 0x00000000, 0x08000020,
138 0x00000020, 0x08000420, 0x08010420, 0x00010020,
139 0x08010000, 0x00000400, 0x00000420, 0x08010400,
140 0x08010400, 0x08000420, 0x00010020, 0x08010000,
141 0x00010000, 0x00000020, 0x08000020, 0x08000400,
142 0x08000000, 0x00010400, 0x08010420, 0x00000000,
143 0x00010420, 0x08000000, 0x00000400, 0x00010020,
144 0x08000420, 0x00000400, 0x00000000, 0x08010420,
145 0x08010020, 0x08010400, 0x00000420, 0x00010000,
146 0x00010400, 0x08010020, 0x08000400, 0x00000420,
147 0x00000020, 0x00010420, 0x08010000, 0x08000020,
149 0x80000040, 0x00200040, 0x00000000, 0x80202000,
150 0x00200040, 0x00002000, 0x80002040, 0x00200000,
151 0x00002040, 0x80202040, 0x00202000, 0x80000000,
152 0x80002000, 0x80000040, 0x80200000, 0x00202040,
153 0x00200000, 0x80002040, 0x80200040, 0x00000000,
154 0x00002000, 0x00000040, 0x80202000, 0x80200040,
155 0x80202040, 0x80200000, 0x80000000, 0x00002040,
156 0x00000040, 0x00202000, 0x00202040, 0x80002000,
157 0x00002040, 0x80000000, 0x80002000, 0x00202040,
158 0x80202000, 0x00200040, 0x00000000, 0x80002000,
159 0x80000000, 0x00002000, 0x80200040, 0x00200000,
160 0x00200040, 0x80202040, 0x00202000, 0x00000040,
161 0x80202040, 0x00202000, 0x00200000, 0x80002040,
162 0x80000040, 0x80200000, 0x00202040, 0x00000000,
163 0x00002000, 0x80000040, 0x80002040, 0x80202000,
164 0x80200000, 0x00002040, 0x00000040, 0x80200040,
166 0x00004000, 0x00000200, 0x01000200, 0x01000004,
167 0x01004204, 0x00004004, 0x00004200, 0x00000000,
168 0x01000000, 0x01000204, 0x00000204, 0x01004000,
169 0x00000004, 0x01004200, 0x01004000, 0x00000204,
170 0x01000204, 0x00004000, 0x00004004, 0x01004204,
171 0x00000000, 0x01000200, 0x01000004, 0x00004200,
172 0x01004004, 0x00004204, 0x01004200, 0x00000004,
173 0x00004204, 0x01004004, 0x00000200, 0x01000000,
174 0x00004204, 0x01004000, 0x01004004, 0x00000204,
175 0x00004000, 0x00000200, 0x01000000, 0x01004004,
176 0x01000204, 0x00004204, 0x00004200, 0x00000000,
177 0x00000200, 0x01000004, 0x00000004, 0x01000200,
178 0x00000000, 0x01000204, 0x01000200, 0x00004200,
179 0x00000204, 0x00004000, 0x01004204, 0x01000000,
180 0x01004200, 0x00000004, 0x00004004, 0x01004204,
181 0x01000004, 0x01004200, 0x01004000, 0x00004004,
183 0x20800080, 0x20820000, 0x00020080, 0x00000000,
184 0x20020000, 0x00800080, 0x20800000, 0x20820080,
185 0x00000080, 0x20000000, 0x00820000, 0x00020080,
186 0x00820080, 0x20020080, 0x20000080, 0x20800000,
187 0x00020000, 0x00820080, 0x00800080, 0x20020000,
188 0x20820080, 0x20000080, 0x00000000, 0x00820000,
189 0x20000000, 0x00800000, 0x20020080, 0x20800080,
190 0x00800000, 0x00020000, 0x20820000, 0x00000080,
191 0x00800000, 0x00020000, 0x20000080, 0x20820080,
192 0x00020080, 0x20000000, 0x00000000, 0x00820000,
193 0x20800080, 0x20020080, 0x20020000, 0x00800080,
194 0x20820000, 0x00000080, 0x00800080, 0x20020000,
195 0x20820080, 0x00800000, 0x20800000, 0x20000080,
196 0x00820000, 0x00020080, 0x20020080, 0x20800000,
197 0x00000080, 0x20820000, 0x00820080, 0x00000000,
198 0x20000000, 0x20800080, 0x00020000, 0x00820080,
201 __constant u32 c_skb[8][64] =
203 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
204 0x00000000, 0x00000010, 0x20000000, 0x20000010,
205 0x00010000, 0x00010010, 0x20010000, 0x20010010,
206 0x00000800, 0x00000810, 0x20000800, 0x20000810,
207 0x00010800, 0x00010810, 0x20010800, 0x20010810,
208 0x00000020, 0x00000030, 0x20000020, 0x20000030,
209 0x00010020, 0x00010030, 0x20010020, 0x20010030,
210 0x00000820, 0x00000830, 0x20000820, 0x20000830,
211 0x00010820, 0x00010830, 0x20010820, 0x20010830,
212 0x00080000, 0x00080010, 0x20080000, 0x20080010,
213 0x00090000, 0x00090010, 0x20090000, 0x20090010,
214 0x00080800, 0x00080810, 0x20080800, 0x20080810,
215 0x00090800, 0x00090810, 0x20090800, 0x20090810,
216 0x00080020, 0x00080030, 0x20080020, 0x20080030,
217 0x00090020, 0x00090030, 0x20090020, 0x20090030,
218 0x00080820, 0x00080830, 0x20080820, 0x20080830,
219 0x00090820, 0x00090830, 0x20090820, 0x20090830,
220 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
221 0x00000000, 0x02000000, 0x00002000, 0x02002000,
222 0x00200000, 0x02200000, 0x00202000, 0x02202000,
223 0x00000004, 0x02000004, 0x00002004, 0x02002004,
224 0x00200004, 0x02200004, 0x00202004, 0x02202004,
225 0x00000400, 0x02000400, 0x00002400, 0x02002400,
226 0x00200400, 0x02200400, 0x00202400, 0x02202400,
227 0x00000404, 0x02000404, 0x00002404, 0x02002404,
228 0x00200404, 0x02200404, 0x00202404, 0x02202404,
229 0x10000000, 0x12000000, 0x10002000, 0x12002000,
230 0x10200000, 0x12200000, 0x10202000, 0x12202000,
231 0x10000004, 0x12000004, 0x10002004, 0x12002004,
232 0x10200004, 0x12200004, 0x10202004, 0x12202004,
233 0x10000400, 0x12000400, 0x10002400, 0x12002400,
234 0x10200400, 0x12200400, 0x10202400, 0x12202400,
235 0x10000404, 0x12000404, 0x10002404, 0x12002404,
236 0x10200404, 0x12200404, 0x10202404, 0x12202404,
237 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
238 0x00000000, 0x00000001, 0x00040000, 0x00040001,
239 0x01000000, 0x01000001, 0x01040000, 0x01040001,
240 0x00000002, 0x00000003, 0x00040002, 0x00040003,
241 0x01000002, 0x01000003, 0x01040002, 0x01040003,
242 0x00000200, 0x00000201, 0x00040200, 0x00040201,
243 0x01000200, 0x01000201, 0x01040200, 0x01040201,
244 0x00000202, 0x00000203, 0x00040202, 0x00040203,
245 0x01000202, 0x01000203, 0x01040202, 0x01040203,
246 0x08000000, 0x08000001, 0x08040000, 0x08040001,
247 0x09000000, 0x09000001, 0x09040000, 0x09040001,
248 0x08000002, 0x08000003, 0x08040002, 0x08040003,
249 0x09000002, 0x09000003, 0x09040002, 0x09040003,
250 0x08000200, 0x08000201, 0x08040200, 0x08040201,
251 0x09000200, 0x09000201, 0x09040200, 0x09040201,
252 0x08000202, 0x08000203, 0x08040202, 0x08040203,
253 0x09000202, 0x09000203, 0x09040202, 0x09040203,
254 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
255 0x00000000, 0x00100000, 0x00000100, 0x00100100,
256 0x00000008, 0x00100008, 0x00000108, 0x00100108,
257 0x00001000, 0x00101000, 0x00001100, 0x00101100,
258 0x00001008, 0x00101008, 0x00001108, 0x00101108,
259 0x04000000, 0x04100000, 0x04000100, 0x04100100,
260 0x04000008, 0x04100008, 0x04000108, 0x04100108,
261 0x04001000, 0x04101000, 0x04001100, 0x04101100,
262 0x04001008, 0x04101008, 0x04001108, 0x04101108,
263 0x00020000, 0x00120000, 0x00020100, 0x00120100,
264 0x00020008, 0x00120008, 0x00020108, 0x00120108,
265 0x00021000, 0x00121000, 0x00021100, 0x00121100,
266 0x00021008, 0x00121008, 0x00021108, 0x00121108,
267 0x04020000, 0x04120000, 0x04020100, 0x04120100,
268 0x04020008, 0x04120008, 0x04020108, 0x04120108,
269 0x04021000, 0x04121000, 0x04021100, 0x04121100,
270 0x04021008, 0x04121008, 0x04021108, 0x04121108,
271 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
272 0x00000000, 0x10000000, 0x00010000, 0x10010000,
273 0x00000004, 0x10000004, 0x00010004, 0x10010004,
274 0x20000000, 0x30000000, 0x20010000, 0x30010000,
275 0x20000004, 0x30000004, 0x20010004, 0x30010004,
276 0x00100000, 0x10100000, 0x00110000, 0x10110000,
277 0x00100004, 0x10100004, 0x00110004, 0x10110004,
278 0x20100000, 0x30100000, 0x20110000, 0x30110000,
279 0x20100004, 0x30100004, 0x20110004, 0x30110004,
280 0x00001000, 0x10001000, 0x00011000, 0x10011000,
281 0x00001004, 0x10001004, 0x00011004, 0x10011004,
282 0x20001000, 0x30001000, 0x20011000, 0x30011000,
283 0x20001004, 0x30001004, 0x20011004, 0x30011004,
284 0x00101000, 0x10101000, 0x00111000, 0x10111000,
285 0x00101004, 0x10101004, 0x00111004, 0x10111004,
286 0x20101000, 0x30101000, 0x20111000, 0x30111000,
287 0x20101004, 0x30101004, 0x20111004, 0x30111004,
288 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
289 0x00000000, 0x08000000, 0x00000008, 0x08000008,
290 0x00000400, 0x08000400, 0x00000408, 0x08000408,
291 0x00020000, 0x08020000, 0x00020008, 0x08020008,
292 0x00020400, 0x08020400, 0x00020408, 0x08020408,
293 0x00000001, 0x08000001, 0x00000009, 0x08000009,
294 0x00000401, 0x08000401, 0x00000409, 0x08000409,
295 0x00020001, 0x08020001, 0x00020009, 0x08020009,
296 0x00020401, 0x08020401, 0x00020409, 0x08020409,
297 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
298 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
299 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
300 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
301 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
302 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
303 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
304 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
305 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
306 0x00000000, 0x00000100, 0x00080000, 0x00080100,
307 0x01000000, 0x01000100, 0x01080000, 0x01080100,
308 0x00000010, 0x00000110, 0x00080010, 0x00080110,
309 0x01000010, 0x01000110, 0x01080010, 0x01080110,
310 0x00200000, 0x00200100, 0x00280000, 0x00280100,
311 0x01200000, 0x01200100, 0x01280000, 0x01280100,
312 0x00200010, 0x00200110, 0x00280010, 0x00280110,
313 0x01200010, 0x01200110, 0x01280010, 0x01280110,
314 0x00000200, 0x00000300, 0x00080200, 0x00080300,
315 0x01000200, 0x01000300, 0x01080200, 0x01080300,
316 0x00000210, 0x00000310, 0x00080210, 0x00080310,
317 0x01000210, 0x01000310, 0x01080210, 0x01080310,
318 0x00200200, 0x00200300, 0x00280200, 0x00280300,
319 0x01200200, 0x01200300, 0x01280200, 0x01280300,
320 0x00200210, 0x00200310, 0x00280210, 0x00280310,
321 0x01200210, 0x01200310, 0x01280210, 0x01280310,
322 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
323 0x00000000, 0x04000000, 0x00040000, 0x04040000,
324 0x00000002, 0x04000002, 0x00040002, 0x04040002,
325 0x00002000, 0x04002000, 0x00042000, 0x04042000,
326 0x00002002, 0x04002002, 0x00042002, 0x04042002,
327 0x00000020, 0x04000020, 0x00040020, 0x04040020,
328 0x00000022, 0x04000022, 0x00040022, 0x04040022,
329 0x00002020, 0x04002020, 0x00042020, 0x04042020,
330 0x00002022, 0x04002022, 0x00042022, 0x04042022,
331 0x00000800, 0x04000800, 0x00040800, 0x04040800,
332 0x00000802, 0x04000802, 0x00040802, 0x04040802,
333 0x00002800, 0x04002800, 0x00042800, 0x04042800,
334 0x00002802, 0x04002802, 0x00042802, 0x04042802,
335 0x00000820, 0x04000820, 0x00040820, 0x04040820,
336 0x00000822, 0x04000822, 0x00040822, 0x04040822,
337 0x00002820, 0x04002820, 0x00042820, 0x04042820,
338 0x00002822, 0x04002822, 0x00042822, 0x04042822
341 __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
342 __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
345 #define BOX(i,n,S) u32 ((S)[(n)][(i)])
349 #define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
353 #define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
356 static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64])
369 for (u32 i = 0; i < 16; i += 2)
375 t = Kd[i + 0] ^ rotl32 (r, 28u);
378 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
379 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
380 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
381 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
382 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
383 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
384 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
385 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
388 t = Kd[i + 1] ^ rotl32 (l, 28u);
391 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
392 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
393 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
394 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
395 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
396 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
397 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
398 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
410 static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64])
414 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
415 HPERM_OP (c, tt, 2, 0xcccc0000);
416 HPERM_OP (d, tt, 2, 0xcccc0000);
417 PERM_OP (d, c, tt, 1, 0x55555555);
418 PERM_OP (c, d, tt, 8, 0x00ff00ff);
419 PERM_OP (d, c, tt, 1, 0x55555555);
421 d = ((d & 0x000000ff) << 16)
422 | ((d & 0x0000ff00) << 0)
423 | ((d & 0x00ff0000) >> 16)
424 | ((c & 0xf0000000) >> 4);
429 for (u32 i = 0; i < 16; i++)
431 c = c >> shifts3s0[i] | c << shifts3s1[i];
432 d = d >> shifts3s0[i] | d << shifts3s1[i];
437 u32 s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
438 | BOX ((((c >> 6) & 0x03)
439 | ((c >> 7) & 0x3c)), 1, s_skb)
440 | BOX ((((c >> 13) & 0x0f)
441 | ((c >> 14) & 0x30)), 2, s_skb)
442 | BOX ((((c >> 20) & 0x01)
444 | ((c >> 22) & 0x38)), 3, s_skb);
446 u32 t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
447 | BOX ((((d >> 7) & 0x03)
448 | ((d >> 8) & 0x3c)), 5, s_skb)
449 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
450 | BOX ((((d >> 21) & 0x0f)
451 | ((d >> 22) & 0x30)), 7, s_skb);
453 #if defined cl_amd_media_ops
454 Kc[i] = amd_bytealign (t, s << 16, 2);
455 Kd[i] = amd_bytealign (t >> 16, s, 2);
457 Kc[i] = ((t << 16) | (s & 0x0000ffff));
458 Kd[i] = ((s >> 16) | (t & 0xffff0000));
461 Kc[i] = rotl32 (Kc[i], 2u);
462 Kd[i] = rotl32 (Kd[i], 2u);
466 static void overwrite_at (u32 sw[16], const u32 w0, const u32 salt_len)
468 #if defined cl_amd_media_ops
473 case 1: sw[0] = amd_bytealign (w0, sw[0] << 24, 3);
474 sw[1] = amd_bytealign (sw[1] >> 8, w0, 3);
476 case 2: sw[0] = amd_bytealign (w0, sw[0] << 16, 2);
477 sw[1] = amd_bytealign (sw[1] >> 16, w0, 2);
479 case 3: sw[0] = amd_bytealign (w0, sw[0] << 8, 1);
480 sw[1] = amd_bytealign (sw[1] >> 24, w0, 1);
484 case 5: sw[1] = amd_bytealign (w0, sw[1] << 24, 3);
485 sw[2] = amd_bytealign (sw[2] >> 8, w0, 3);
487 case 6: sw[1] = amd_bytealign (w0, sw[1] << 16, 2);
488 sw[2] = amd_bytealign (sw[2] >> 16, w0, 2);
490 case 7: sw[1] = amd_bytealign (w0, sw[1] << 8, 1);
491 sw[2] = amd_bytealign (sw[2] >> 24, w0, 1);
495 case 9: sw[2] = amd_bytealign (w0, sw[2] << 24, 3);
496 sw[3] = amd_bytealign (sw[3] >> 8, w0, 3);
498 case 10: sw[2] = amd_bytealign (w0, sw[2] << 16, 2);
499 sw[3] = amd_bytealign (sw[3] >> 16, w0, 2);
501 case 11: sw[2] = amd_bytealign (w0, sw[2] << 8, 1);
502 sw[3] = amd_bytealign (sw[3] >> 24, w0, 1);
506 case 13: sw[3] = amd_bytealign (w0, sw[3] << 24, 3);
507 sw[4] = amd_bytealign (sw[4] >> 8, w0, 3);
509 case 14: sw[3] = amd_bytealign (w0, sw[3] << 16, 2);
510 sw[4] = amd_bytealign (sw[4] >> 16, w0, 2);
512 case 15: sw[3] = amd_bytealign (w0, sw[3] << 8, 1);
513 sw[4] = amd_bytealign (sw[4] >> 24, w0, 1);
517 case 17: sw[4] = amd_bytealign (w0, sw[4] << 24, 3);
518 sw[5] = amd_bytealign (sw[5] >> 8, w0, 3);
520 case 18: sw[4] = amd_bytealign (w0, sw[4] << 16, 2);
521 sw[5] = amd_bytealign (sw[5] >> 16, w0, 2);
523 case 19: sw[4] = amd_bytealign (w0, sw[4] << 8, 1);
524 sw[5] = amd_bytealign (sw[5] >> 24, w0, 1);
528 case 21: sw[5] = amd_bytealign (w0, sw[5] << 24, 3);
529 sw[6] = amd_bytealign (sw[6] >> 8, w0, 3);
531 case 22: sw[5] = amd_bytealign (w0, sw[5] << 16, 2);
532 sw[6] = amd_bytealign (sw[6] >> 16, w0, 2);
534 case 23: sw[5] = amd_bytealign (w0, sw[5] << 8, 1);
535 sw[6] = amd_bytealign (sw[6] >> 24, w0, 1);
539 case 25: sw[6] = amd_bytealign (w0, sw[6] << 24, 3);
540 sw[7] = amd_bytealign (sw[7] >> 8, w0, 3);
542 case 26: sw[6] = amd_bytealign (w0, sw[6] << 16, 2);
543 sw[7] = amd_bytealign (sw[7] >> 16, w0, 2);
545 case 27: sw[6] = amd_bytealign (w0, sw[6] << 8, 1);
546 sw[7] = amd_bytealign (sw[7] >> 24, w0, 1);
550 case 29: sw[7] = amd_bytealign (w0, sw[7] << 24, 3);
551 sw[8] = amd_bytealign (sw[8] >> 8, w0, 3);
553 case 30: sw[7] = amd_bytealign (w0, sw[7] << 16, 2);
554 sw[8] = amd_bytealign (sw[8] >> 16, w0, 2);
556 case 31: sw[7] = amd_bytealign (w0, sw[7] << 8, 1);
557 sw[8] = amd_bytealign (sw[8] >> 24, w0, 1);
565 case 1: sw[0] = (sw[0] & 0x000000ff) | (w0 << 8);
566 sw[1] = (sw[1] & 0xffffff00) | (w0 >> 24);
568 case 2: sw[0] = (sw[0] & 0x0000ffff) | (w0 << 16);
569 sw[1] = (sw[1] & 0xffff0000) | (w0 >> 16);
571 case 3: sw[0] = (sw[0] & 0x00ffffff) | (w0 << 24);
572 sw[1] = (sw[1] & 0xff000000) | (w0 >> 8);
576 case 5: sw[1] = (sw[1] & 0x000000ff) | (w0 << 8);
577 sw[2] = (sw[2] & 0xffffff00) | (w0 >> 24);
579 case 6: sw[1] = (sw[1] & 0x0000ffff) | (w0 << 16);
580 sw[2] = (sw[2] & 0xffff0000) | (w0 >> 16);
582 case 7: sw[1] = (sw[1] & 0x00ffffff) | (w0 << 24);
583 sw[2] = (sw[2] & 0xff000000) | (w0 >> 8);
587 case 9: sw[2] = (sw[2] & 0x000000ff) | (w0 << 8);
588 sw[3] = (sw[3] & 0xffffff00) | (w0 >> 24);
590 case 10: sw[2] = (sw[2] & 0x0000ffff) | (w0 << 16);
591 sw[3] = (sw[3] & 0xffff0000) | (w0 >> 16);
593 case 11: sw[2] = (sw[2] & 0x00ffffff) | (w0 << 24);
594 sw[3] = (sw[3] & 0xff000000) | (w0 >> 8);
598 case 13: sw[3] = (sw[3] & 0x000000ff) | (w0 << 8);
599 sw[4] = (sw[4] & 0xffffff00) | (w0 >> 24);
601 case 14: sw[3] = (sw[3] & 0x0000ffff) | (w0 << 16);
602 sw[4] = (sw[4] & 0xffff0000) | (w0 >> 16);
604 case 15: sw[3] = (sw[3] & 0x00ffffff) | (w0 << 24);
605 sw[4] = (sw[4] & 0xff000000) | (w0 >> 8);
609 case 17: sw[4] = (sw[4] & 0x000000ff) | (w0 << 8);
610 sw[5] = (sw[5] & 0xffffff00) | (w0 >> 24);
612 case 18: sw[4] = (sw[4] & 0x0000ffff) | (w0 << 16);
613 sw[5] = (sw[5] & 0xffff0000) | (w0 >> 16);
615 case 19: sw[4] = (sw[4] & 0x00ffffff) | (w0 << 24);
616 sw[5] = (sw[5] & 0xff000000) | (w0 >> 8);
620 case 21: sw[5] = (sw[5] & 0x000000ff) | (w0 << 8);
621 sw[6] = (sw[6] & 0xffffff00) | (w0 >> 24);
623 case 22: sw[5] = (sw[5] & 0x0000ffff) | (w0 << 16);
624 sw[6] = (sw[6] & 0xffff0000) | (w0 >> 16);
626 case 23: sw[5] = (sw[5] & 0x00ffffff) | (w0 << 24);
627 sw[6] = (sw[6] & 0xff000000) | (w0 >> 8);
631 case 25: sw[6] = (sw[6] & 0x000000ff) | (w0 << 8);
632 sw[7] = (sw[7] & 0xffffff00) | (w0 >> 24);
634 case 26: sw[6] = (sw[6] & 0x0000ffff) | (w0 << 16);
635 sw[7] = (sw[7] & 0xffff0000) | (w0 >> 16);
637 case 27: sw[6] = (sw[6] & 0x00ffffff) | (w0 << 24);
638 sw[7] = (sw[7] & 0xff000000) | (w0 >> 8);
642 case 29: sw[7] = (sw[7] & 0x000000ff) | (w0 << 8);
643 sw[8] = (sw[8] & 0xffffff00) | (w0 >> 24);
645 case 30: sw[7] = (sw[7] & 0x0000ffff) | (w0 << 16);
646 sw[8] = (sw[8] & 0xffff0000) | (w0 >> 16);
648 case 31: sw[7] = (sw[7] & 0x00ffffff) | (w0 << 24);
649 sw[8] = (sw[8] & 0xff000000) | (w0 >> 8);
655 static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
661 const u32 gid = get_global_id (0);
662 const u32 lid = get_local_id (0);
670 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
671 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
672 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
673 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
677 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
678 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
679 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
680 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
689 const u32 salt_len = salt_bufs[salt_pos].salt_len;
691 const u32 salt_word_len = (salt_len + pw_len) * 2;
719 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
721 w0_t[0] |= salt_buf0[0];
722 w0_t[1] |= salt_buf0[1];
723 w0_t[2] |= salt_buf0[2];
724 w0_t[3] |= salt_buf0[3];
725 w1_t[0] |= salt_buf1[0];
726 w1_t[1] |= salt_buf1[1];
727 w1_t[2] |= salt_buf1[2];
728 w1_t[3] |= salt_buf1[3];
729 w2_t[0] |= salt_buf2[0];
730 w2_t[1] |= salt_buf2[1];
731 w2_t[2] |= salt_buf2[2];
732 w2_t[3] |= salt_buf2[3];
763 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
765 const u32 w0r = words_buf_r[il_pos];
767 const u32 w0 = w0l | w0r;
769 overwrite_at (dst, w0, salt_len);
772 * precompute key1 since key is static: 0x0123456789abcdef
773 * plus LEFT_ROTATE by 2
815 * key1 (generate key)
823 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
827 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
828 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
833 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
837 * key2 (generate hash)
840 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
845 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
849 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
850 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
855 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
862 const u32 r0 = iv[0];
863 const u32 r1 = iv[1];
871 static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
877 const u32 gid = get_global_id (0);
878 const u32 lid = get_local_id (0);
886 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
887 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
888 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
889 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
893 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
894 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
895 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
896 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
905 const u32 salt_len = salt_bufs[salt_pos].salt_len;
907 const u32 salt_word_len = (salt_len + pw_len) * 2;
935 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
937 w0_t[0] |= salt_buf0[0];
938 w0_t[1] |= salt_buf0[1];
939 w0_t[2] |= salt_buf0[2];
940 w0_t[3] |= salt_buf0[3];
941 w1_t[0] |= salt_buf1[0];
942 w1_t[1] |= salt_buf1[1];
943 w1_t[2] |= salt_buf1[2];
944 w1_t[3] |= salt_buf1[3];
945 w2_t[0] |= salt_buf2[0];
946 w2_t[1] |= salt_buf2[1];
947 w2_t[2] |= salt_buf2[2];
948 w2_t[3] |= salt_buf2[3];
977 const u32 search[4] =
979 digests_buf[digests_offset].digest_buf[DGST_R0],
980 digests_buf[digests_offset].digest_buf[DGST_R1],
981 digests_buf[digests_offset].digest_buf[DGST_R2],
982 digests_buf[digests_offset].digest_buf[DGST_R3]
991 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
993 const u32 w0r = words_buf_r[il_pos];
995 const u32 w0 = w0l | w0r;
997 overwrite_at (dst, w0, salt_len);
1000 * precompute key1 since key is static: 0x0123456789abcdef
1001 * plus LEFT_ROTATE by 2
1006 Kc[ 0] = 0x64649040;
1007 Kc[ 1] = 0x14909858;
1008 Kc[ 2] = 0xc4b44888;
1009 Kc[ 3] = 0x9094e438;
1010 Kc[ 4] = 0xd8a004f0;
1011 Kc[ 5] = 0xa8f02810;
1012 Kc[ 6] = 0xc84048d8;
1013 Kc[ 7] = 0x68d804a8;
1014 Kc[ 8] = 0x0490e40c;
1015 Kc[ 9] = 0xac183024;
1016 Kc[10] = 0x24c07c10;
1017 Kc[11] = 0x8c88c038;
1018 Kc[12] = 0xc048c824;
1019 Kc[13] = 0x4c0470a8;
1020 Kc[14] = 0x584020b4;
1021 Kc[15] = 0x00742c4c;
1025 Kd[ 0] = 0xa42ce40c;
1026 Kd[ 1] = 0x64689858;
1027 Kd[ 2] = 0x484050b8;
1028 Kd[ 3] = 0xe8184814;
1029 Kd[ 4] = 0x405cc070;
1030 Kd[ 5] = 0xa010784c;
1031 Kd[ 6] = 0x6074a800;
1032 Kd[ 7] = 0x80701c1c;
1033 Kd[ 8] = 0x9cd49430;
1034 Kd[ 9] = 0x4c8ce078;
1035 Kd[10] = 0x5c18c088;
1036 Kd[11] = 0x28a8a4c8;
1037 Kd[12] = 0x3c180838;
1038 Kd[13] = 0xb0b86c20;
1039 Kd[14] = 0xac84a094;
1040 Kd[15] = 0x4ce0c0c4;
1043 * key1 (generate key)
1051 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1055 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1056 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1061 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1065 * key2 (generate hash)
1068 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1073 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1077 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1078 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1083 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1090 const u32 r0 = iv[0];
1091 const u32 r1 = iv[1];
1099 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1101 __local u32 s_SPtrans[8][64];
1103 __local u32 s_skb[8][64];
1109 const u32 gid = get_global_id (0);
1110 const u32 lid = get_local_id (0);
1114 w[ 0] = pws[gid].i[ 0];
1115 w[ 1] = pws[gid].i[ 1];
1116 w[ 2] = pws[gid].i[ 2];
1117 w[ 3] = pws[gid].i[ 3];
1131 const u32 pw_len = pws[gid].pw_len;
1137 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1138 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1139 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1140 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1141 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1142 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1143 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1144 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1146 s_skb[0][lid] = c_skb[0][lid];
1147 s_skb[1][lid] = c_skb[1][lid];
1148 s_skb[2][lid] = c_skb[2][lid];
1149 s_skb[3][lid] = c_skb[3][lid];
1150 s_skb[4][lid] = c_skb[4][lid];
1151 s_skb[5][lid] = c_skb[5][lid];
1152 s_skb[6][lid] = c_skb[6][lid];
1153 s_skb[7][lid] = c_skb[7][lid];
1155 barrier (CLK_LOCAL_MEM_FENCE);
1157 if (gid >= gid_max) return;
1163 m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1166 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1168 __local u32 s_SPtrans[8][64];
1170 __local u32 s_skb[8][64];
1176 const u32 gid = get_global_id (0);
1177 const u32 lid = get_local_id (0);
1181 w[ 0] = pws[gid].i[ 0];
1182 w[ 1] = pws[gid].i[ 1];
1183 w[ 2] = pws[gid].i[ 2];
1184 w[ 3] = pws[gid].i[ 3];
1185 w[ 4] = pws[gid].i[ 4];
1186 w[ 5] = pws[gid].i[ 5];
1187 w[ 6] = pws[gid].i[ 6];
1188 w[ 7] = pws[gid].i[ 7];
1198 const u32 pw_len = pws[gid].pw_len;
1204 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1205 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1206 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1207 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1208 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1209 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1210 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1211 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1213 s_skb[0][lid] = c_skb[0][lid];
1214 s_skb[1][lid] = c_skb[1][lid];
1215 s_skb[2][lid] = c_skb[2][lid];
1216 s_skb[3][lid] = c_skb[3][lid];
1217 s_skb[4][lid] = c_skb[4][lid];
1218 s_skb[5][lid] = c_skb[5][lid];
1219 s_skb[6][lid] = c_skb[6][lid];
1220 s_skb[7][lid] = c_skb[7][lid];
1222 barrier (CLK_LOCAL_MEM_FENCE);
1224 if (gid >= gid_max) return;
1230 m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1233 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1237 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1239 __local u32 s_SPtrans[8][64];
1241 __local u32 s_skb[8][64];
1247 const u32 gid = get_global_id (0);
1248 const u32 lid = get_local_id (0);
1252 w[ 0] = pws[gid].i[ 0];
1253 w[ 1] = pws[gid].i[ 1];
1254 w[ 2] = pws[gid].i[ 2];
1255 w[ 3] = pws[gid].i[ 3];
1269 const u32 pw_len = pws[gid].pw_len;
1275 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1276 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1277 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1278 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1279 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1280 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1281 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1282 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1284 s_skb[0][lid] = c_skb[0][lid];
1285 s_skb[1][lid] = c_skb[1][lid];
1286 s_skb[2][lid] = c_skb[2][lid];
1287 s_skb[3][lid] = c_skb[3][lid];
1288 s_skb[4][lid] = c_skb[4][lid];
1289 s_skb[5][lid] = c_skb[5][lid];
1290 s_skb[6][lid] = c_skb[6][lid];
1291 s_skb[7][lid] = c_skb[7][lid];
1293 barrier (CLK_LOCAL_MEM_FENCE);
1295 if (gid >= gid_max) return;
1301 m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1304 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1306 __local u32 s_SPtrans[8][64];
1308 __local u32 s_skb[8][64];
1314 const u32 gid = get_global_id (0);
1315 const u32 lid = get_local_id (0);
1319 w[ 0] = pws[gid].i[ 0];
1320 w[ 1] = pws[gid].i[ 1];
1321 w[ 2] = pws[gid].i[ 2];
1322 w[ 3] = pws[gid].i[ 3];
1323 w[ 4] = pws[gid].i[ 4];
1324 w[ 5] = pws[gid].i[ 5];
1325 w[ 6] = pws[gid].i[ 6];
1326 w[ 7] = pws[gid].i[ 7];
1336 const u32 pw_len = pws[gid].pw_len;
1342 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1343 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1344 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1345 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1346 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1347 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1348 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1349 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1351 s_skb[0][lid] = c_skb[0][lid];
1352 s_skb[1][lid] = c_skb[1][lid];
1353 s_skb[2][lid] = c_skb[2][lid];
1354 s_skb[3][lid] = c_skb[3][lid];
1355 s_skb[4][lid] = c_skb[4][lid];
1356 s_skb[5][lid] = c_skb[5][lid];
1357 s_skb[6][lid] = c_skb[6][lid];
1358 s_skb[7][lid] = c_skb[7][lid];
1360 barrier (CLK_LOCAL_MEM_FENCE);
1362 if (gid >= gid_max) return;
1368 m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1371 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)