2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
31 #include "include/rp_gpu.h"
35 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
36 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
40 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
41 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
45 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
46 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
49 #define PERM_OP(a,b,tt,n,m) \
59 #define HPERM_OP(a,tt,n,m) \
65 tt = tt >> (16 + n); \
71 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
72 PERM_OP (l, r, tt, 16, 0x0000ffff); \
73 PERM_OP (r, l, tt, 2, 0x33333333); \
74 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
75 PERM_OP (r, l, tt, 1, 0x55555555); \
80 PERM_OP (l, r, tt, 1, 0x55555555); \
81 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
82 PERM_OP (l, r, tt, 2, 0x33333333); \
83 PERM_OP (r, l, tt, 16, 0x0000ffff); \
84 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
87 __constant u32 c_SPtrans[8][64] =
90 0x02080800, 0x00080000, 0x02000002, 0x02080802,
91 0x02000000, 0x00080802, 0x00080002, 0x02000002,
92 0x00080802, 0x02080800, 0x02080000, 0x00000802,
93 0x02000802, 0x02000000, 0x00000000, 0x00080002,
94 0x00080000, 0x00000002, 0x02000800, 0x00080800,
95 0x02080802, 0x02080000, 0x00000802, 0x02000800,
96 0x00000002, 0x00000800, 0x00080800, 0x02080002,
97 0x00000800, 0x02000802, 0x02080002, 0x00000000,
98 0x00000000, 0x02080802, 0x02000800, 0x00080002,
99 0x02080800, 0x00080000, 0x00000802, 0x02000800,
100 0x02080002, 0x00000800, 0x00080800, 0x02000002,
101 0x00080802, 0x00000002, 0x02000002, 0x02080000,
102 0x02080802, 0x00080800, 0x02080000, 0x02000802,
103 0x02000000, 0x00000802, 0x00080002, 0x00000000,
104 0x00080000, 0x02000000, 0x02000802, 0x02080800,
105 0x00000002, 0x02080002, 0x00000800, 0x00080802,
107 0x40108010, 0x00000000, 0x00108000, 0x40100000,
108 0x40000010, 0x00008010, 0x40008000, 0x00108000,
109 0x00008000, 0x40100010, 0x00000010, 0x40008000,
110 0x00100010, 0x40108000, 0x40100000, 0x00000010,
111 0x00100000, 0x40008010, 0x40100010, 0x00008000,
112 0x00108010, 0x40000000, 0x00000000, 0x00100010,
113 0x40008010, 0x00108010, 0x40108000, 0x40000010,
114 0x40000000, 0x00100000, 0x00008010, 0x40108010,
115 0x00100010, 0x40108000, 0x40008000, 0x00108010,
116 0x40108010, 0x00100010, 0x40000010, 0x00000000,
117 0x40000000, 0x00008010, 0x00100000, 0x40100010,
118 0x00008000, 0x40000000, 0x00108010, 0x40008010,
119 0x40108000, 0x00008000, 0x00000000, 0x40000010,
120 0x00000010, 0x40108010, 0x00108000, 0x40100000,
121 0x40100010, 0x00100000, 0x00008010, 0x40008000,
122 0x40008010, 0x00000010, 0x40100000, 0x00108000,
124 0x04000001, 0x04040100, 0x00000100, 0x04000101,
125 0x00040001, 0x04000000, 0x04000101, 0x00040100,
126 0x04000100, 0x00040000, 0x04040000, 0x00000001,
127 0x04040101, 0x00000101, 0x00000001, 0x04040001,
128 0x00000000, 0x00040001, 0x04040100, 0x00000100,
129 0x00000101, 0x04040101, 0x00040000, 0x04000001,
130 0x04040001, 0x04000100, 0x00040101, 0x04040000,
131 0x00040100, 0x00000000, 0x04000000, 0x00040101,
132 0x04040100, 0x00000100, 0x00000001, 0x00040000,
133 0x00000101, 0x00040001, 0x04040000, 0x04000101,
134 0x00000000, 0x04040100, 0x00040100, 0x04040001,
135 0x00040001, 0x04000000, 0x04040101, 0x00000001,
136 0x00040101, 0x04000001, 0x04000000, 0x04040101,
137 0x00040000, 0x04000100, 0x04000101, 0x00040100,
138 0x04000100, 0x00000000, 0x04040001, 0x00000101,
139 0x04000001, 0x00040101, 0x00000100, 0x04040000,
141 0x00401008, 0x10001000, 0x00000008, 0x10401008,
142 0x00000000, 0x10400000, 0x10001008, 0x00400008,
143 0x10401000, 0x10000008, 0x10000000, 0x00001008,
144 0x10000008, 0x00401008, 0x00400000, 0x10000000,
145 0x10400008, 0x00401000, 0x00001000, 0x00000008,
146 0x00401000, 0x10001008, 0x10400000, 0x00001000,
147 0x00001008, 0x00000000, 0x00400008, 0x10401000,
148 0x10001000, 0x10400008, 0x10401008, 0x00400000,
149 0x10400008, 0x00001008, 0x00400000, 0x10000008,
150 0x00401000, 0x10001000, 0x00000008, 0x10400000,
151 0x10001008, 0x00000000, 0x00001000, 0x00400008,
152 0x00000000, 0x10400008, 0x10401000, 0x00001000,
153 0x10000000, 0x10401008, 0x00401008, 0x00400000,
154 0x10401008, 0x00000008, 0x10001000, 0x00401008,
155 0x00400008, 0x00401000, 0x10400000, 0x10001008,
156 0x00001008, 0x10000000, 0x10000008, 0x10401000,
158 0x08000000, 0x00010000, 0x00000400, 0x08010420,
159 0x08010020, 0x08000400, 0x00010420, 0x08010000,
160 0x00010000, 0x00000020, 0x08000020, 0x00010400,
161 0x08000420, 0x08010020, 0x08010400, 0x00000000,
162 0x00010400, 0x08000000, 0x00010020, 0x00000420,
163 0x08000400, 0x00010420, 0x00000000, 0x08000020,
164 0x00000020, 0x08000420, 0x08010420, 0x00010020,
165 0x08010000, 0x00000400, 0x00000420, 0x08010400,
166 0x08010400, 0x08000420, 0x00010020, 0x08010000,
167 0x00010000, 0x00000020, 0x08000020, 0x08000400,
168 0x08000000, 0x00010400, 0x08010420, 0x00000000,
169 0x00010420, 0x08000000, 0x00000400, 0x00010020,
170 0x08000420, 0x00000400, 0x00000000, 0x08010420,
171 0x08010020, 0x08010400, 0x00000420, 0x00010000,
172 0x00010400, 0x08010020, 0x08000400, 0x00000420,
173 0x00000020, 0x00010420, 0x08010000, 0x08000020,
175 0x80000040, 0x00200040, 0x00000000, 0x80202000,
176 0x00200040, 0x00002000, 0x80002040, 0x00200000,
177 0x00002040, 0x80202040, 0x00202000, 0x80000000,
178 0x80002000, 0x80000040, 0x80200000, 0x00202040,
179 0x00200000, 0x80002040, 0x80200040, 0x00000000,
180 0x00002000, 0x00000040, 0x80202000, 0x80200040,
181 0x80202040, 0x80200000, 0x80000000, 0x00002040,
182 0x00000040, 0x00202000, 0x00202040, 0x80002000,
183 0x00002040, 0x80000000, 0x80002000, 0x00202040,
184 0x80202000, 0x00200040, 0x00000000, 0x80002000,
185 0x80000000, 0x00002000, 0x80200040, 0x00200000,
186 0x00200040, 0x80202040, 0x00202000, 0x00000040,
187 0x80202040, 0x00202000, 0x00200000, 0x80002040,
188 0x80000040, 0x80200000, 0x00202040, 0x00000000,
189 0x00002000, 0x80000040, 0x80002040, 0x80202000,
190 0x80200000, 0x00002040, 0x00000040, 0x80200040,
192 0x00004000, 0x00000200, 0x01000200, 0x01000004,
193 0x01004204, 0x00004004, 0x00004200, 0x00000000,
194 0x01000000, 0x01000204, 0x00000204, 0x01004000,
195 0x00000004, 0x01004200, 0x01004000, 0x00000204,
196 0x01000204, 0x00004000, 0x00004004, 0x01004204,
197 0x00000000, 0x01000200, 0x01000004, 0x00004200,
198 0x01004004, 0x00004204, 0x01004200, 0x00000004,
199 0x00004204, 0x01004004, 0x00000200, 0x01000000,
200 0x00004204, 0x01004000, 0x01004004, 0x00000204,
201 0x00004000, 0x00000200, 0x01000000, 0x01004004,
202 0x01000204, 0x00004204, 0x00004200, 0x00000000,
203 0x00000200, 0x01000004, 0x00000004, 0x01000200,
204 0x00000000, 0x01000204, 0x01000200, 0x00004200,
205 0x00000204, 0x00004000, 0x01004204, 0x01000000,
206 0x01004200, 0x00000004, 0x00004004, 0x01004204,
207 0x01000004, 0x01004200, 0x01004000, 0x00004004,
209 0x20800080, 0x20820000, 0x00020080, 0x00000000,
210 0x20020000, 0x00800080, 0x20800000, 0x20820080,
211 0x00000080, 0x20000000, 0x00820000, 0x00020080,
212 0x00820080, 0x20020080, 0x20000080, 0x20800000,
213 0x00020000, 0x00820080, 0x00800080, 0x20020000,
214 0x20820080, 0x20000080, 0x00000000, 0x00820000,
215 0x20000000, 0x00800000, 0x20020080, 0x20800080,
216 0x00800000, 0x00020000, 0x20820000, 0x00000080,
217 0x00800000, 0x00020000, 0x20000080, 0x20820080,
218 0x00020080, 0x20000000, 0x00000000, 0x00820000,
219 0x20800080, 0x20020080, 0x20020000, 0x00800080,
220 0x20820000, 0x00000080, 0x00800080, 0x20020000,
221 0x20820080, 0x00800000, 0x20800000, 0x20000080,
222 0x00820000, 0x00020080, 0x20020080, 0x20800000,
223 0x00000080, 0x20820000, 0x00820080, 0x00000000,
224 0x20000000, 0x20800080, 0x00020000, 0x00820080,
227 __constant u32 c_skb[8][64] =
229 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
230 0x00000000, 0x00000010, 0x20000000, 0x20000010,
231 0x00010000, 0x00010010, 0x20010000, 0x20010010,
232 0x00000800, 0x00000810, 0x20000800, 0x20000810,
233 0x00010800, 0x00010810, 0x20010800, 0x20010810,
234 0x00000020, 0x00000030, 0x20000020, 0x20000030,
235 0x00010020, 0x00010030, 0x20010020, 0x20010030,
236 0x00000820, 0x00000830, 0x20000820, 0x20000830,
237 0x00010820, 0x00010830, 0x20010820, 0x20010830,
238 0x00080000, 0x00080010, 0x20080000, 0x20080010,
239 0x00090000, 0x00090010, 0x20090000, 0x20090010,
240 0x00080800, 0x00080810, 0x20080800, 0x20080810,
241 0x00090800, 0x00090810, 0x20090800, 0x20090810,
242 0x00080020, 0x00080030, 0x20080020, 0x20080030,
243 0x00090020, 0x00090030, 0x20090020, 0x20090030,
244 0x00080820, 0x00080830, 0x20080820, 0x20080830,
245 0x00090820, 0x00090830, 0x20090820, 0x20090830,
246 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
247 0x00000000, 0x02000000, 0x00002000, 0x02002000,
248 0x00200000, 0x02200000, 0x00202000, 0x02202000,
249 0x00000004, 0x02000004, 0x00002004, 0x02002004,
250 0x00200004, 0x02200004, 0x00202004, 0x02202004,
251 0x00000400, 0x02000400, 0x00002400, 0x02002400,
252 0x00200400, 0x02200400, 0x00202400, 0x02202400,
253 0x00000404, 0x02000404, 0x00002404, 0x02002404,
254 0x00200404, 0x02200404, 0x00202404, 0x02202404,
255 0x10000000, 0x12000000, 0x10002000, 0x12002000,
256 0x10200000, 0x12200000, 0x10202000, 0x12202000,
257 0x10000004, 0x12000004, 0x10002004, 0x12002004,
258 0x10200004, 0x12200004, 0x10202004, 0x12202004,
259 0x10000400, 0x12000400, 0x10002400, 0x12002400,
260 0x10200400, 0x12200400, 0x10202400, 0x12202400,
261 0x10000404, 0x12000404, 0x10002404, 0x12002404,
262 0x10200404, 0x12200404, 0x10202404, 0x12202404,
263 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
264 0x00000000, 0x00000001, 0x00040000, 0x00040001,
265 0x01000000, 0x01000001, 0x01040000, 0x01040001,
266 0x00000002, 0x00000003, 0x00040002, 0x00040003,
267 0x01000002, 0x01000003, 0x01040002, 0x01040003,
268 0x00000200, 0x00000201, 0x00040200, 0x00040201,
269 0x01000200, 0x01000201, 0x01040200, 0x01040201,
270 0x00000202, 0x00000203, 0x00040202, 0x00040203,
271 0x01000202, 0x01000203, 0x01040202, 0x01040203,
272 0x08000000, 0x08000001, 0x08040000, 0x08040001,
273 0x09000000, 0x09000001, 0x09040000, 0x09040001,
274 0x08000002, 0x08000003, 0x08040002, 0x08040003,
275 0x09000002, 0x09000003, 0x09040002, 0x09040003,
276 0x08000200, 0x08000201, 0x08040200, 0x08040201,
277 0x09000200, 0x09000201, 0x09040200, 0x09040201,
278 0x08000202, 0x08000203, 0x08040202, 0x08040203,
279 0x09000202, 0x09000203, 0x09040202, 0x09040203,
280 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
281 0x00000000, 0x00100000, 0x00000100, 0x00100100,
282 0x00000008, 0x00100008, 0x00000108, 0x00100108,
283 0x00001000, 0x00101000, 0x00001100, 0x00101100,
284 0x00001008, 0x00101008, 0x00001108, 0x00101108,
285 0x04000000, 0x04100000, 0x04000100, 0x04100100,
286 0x04000008, 0x04100008, 0x04000108, 0x04100108,
287 0x04001000, 0x04101000, 0x04001100, 0x04101100,
288 0x04001008, 0x04101008, 0x04001108, 0x04101108,
289 0x00020000, 0x00120000, 0x00020100, 0x00120100,
290 0x00020008, 0x00120008, 0x00020108, 0x00120108,
291 0x00021000, 0x00121000, 0x00021100, 0x00121100,
292 0x00021008, 0x00121008, 0x00021108, 0x00121108,
293 0x04020000, 0x04120000, 0x04020100, 0x04120100,
294 0x04020008, 0x04120008, 0x04020108, 0x04120108,
295 0x04021000, 0x04121000, 0x04021100, 0x04121100,
296 0x04021008, 0x04121008, 0x04021108, 0x04121108,
297 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
298 0x00000000, 0x10000000, 0x00010000, 0x10010000,
299 0x00000004, 0x10000004, 0x00010004, 0x10010004,
300 0x20000000, 0x30000000, 0x20010000, 0x30010000,
301 0x20000004, 0x30000004, 0x20010004, 0x30010004,
302 0x00100000, 0x10100000, 0x00110000, 0x10110000,
303 0x00100004, 0x10100004, 0x00110004, 0x10110004,
304 0x20100000, 0x30100000, 0x20110000, 0x30110000,
305 0x20100004, 0x30100004, 0x20110004, 0x30110004,
306 0x00001000, 0x10001000, 0x00011000, 0x10011000,
307 0x00001004, 0x10001004, 0x00011004, 0x10011004,
308 0x20001000, 0x30001000, 0x20011000, 0x30011000,
309 0x20001004, 0x30001004, 0x20011004, 0x30011004,
310 0x00101000, 0x10101000, 0x00111000, 0x10111000,
311 0x00101004, 0x10101004, 0x00111004, 0x10111004,
312 0x20101000, 0x30101000, 0x20111000, 0x30111000,
313 0x20101004, 0x30101004, 0x20111004, 0x30111004,
314 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
315 0x00000000, 0x08000000, 0x00000008, 0x08000008,
316 0x00000400, 0x08000400, 0x00000408, 0x08000408,
317 0x00020000, 0x08020000, 0x00020008, 0x08020008,
318 0x00020400, 0x08020400, 0x00020408, 0x08020408,
319 0x00000001, 0x08000001, 0x00000009, 0x08000009,
320 0x00000401, 0x08000401, 0x00000409, 0x08000409,
321 0x00020001, 0x08020001, 0x00020009, 0x08020009,
322 0x00020401, 0x08020401, 0x00020409, 0x08020409,
323 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
324 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
325 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
326 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
327 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
328 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
329 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
330 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
331 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
332 0x00000000, 0x00000100, 0x00080000, 0x00080100,
333 0x01000000, 0x01000100, 0x01080000, 0x01080100,
334 0x00000010, 0x00000110, 0x00080010, 0x00080110,
335 0x01000010, 0x01000110, 0x01080010, 0x01080110,
336 0x00200000, 0x00200100, 0x00280000, 0x00280100,
337 0x01200000, 0x01200100, 0x01280000, 0x01280100,
338 0x00200010, 0x00200110, 0x00280010, 0x00280110,
339 0x01200010, 0x01200110, 0x01280010, 0x01280110,
340 0x00000200, 0x00000300, 0x00080200, 0x00080300,
341 0x01000200, 0x01000300, 0x01080200, 0x01080300,
342 0x00000210, 0x00000310, 0x00080210, 0x00080310,
343 0x01000210, 0x01000310, 0x01080210, 0x01080310,
344 0x00200200, 0x00200300, 0x00280200, 0x00280300,
345 0x01200200, 0x01200300, 0x01280200, 0x01280300,
346 0x00200210, 0x00200310, 0x00280210, 0x00280310,
347 0x01200210, 0x01200310, 0x01280210, 0x01280310,
348 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
349 0x00000000, 0x04000000, 0x00040000, 0x04040000,
350 0x00000002, 0x04000002, 0x00040002, 0x04040002,
351 0x00002000, 0x04002000, 0x00042000, 0x04042000,
352 0x00002002, 0x04002002, 0x00042002, 0x04042002,
353 0x00000020, 0x04000020, 0x00040020, 0x04040020,
354 0x00000022, 0x04000022, 0x00040022, 0x04040022,
355 0x00002020, 0x04002020, 0x00042020, 0x04042020,
356 0x00002022, 0x04002022, 0x00042022, 0x04042022,
357 0x00000800, 0x04000800, 0x00040800, 0x04040800,
358 0x00000802, 0x04000802, 0x00040802, 0x04040802,
359 0x00002800, 0x04002800, 0x00042800, 0x04042800,
360 0x00002802, 0x04002802, 0x00042802, 0x04042802,
361 0x00000820, 0x04000820, 0x00040820, 0x04040820,
362 0x00000822, 0x04000822, 0x00040822, 0x04040822,
363 0x00002820, 0x04002820, 0x00042820, 0x04042820,
364 0x00002822, 0x04002822, 0x00042822, 0x04042822
367 __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
368 __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
371 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
375 #define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
379 #define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
382 static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64])
395 for (u32 i = 0; i < 16; i += 2)
401 t = Kd[i + 0] ^ rotl32 (r, 28u);
404 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
405 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
406 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
407 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
408 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
409 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
410 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
411 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
414 t = Kd[i + 1] ^ rotl32 (l, 28u);
417 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
418 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
419 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
420 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
421 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
422 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
423 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
424 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
436 static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64])
440 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
441 HPERM_OP (c, tt, 2, 0xcccc0000);
442 HPERM_OP (d, tt, 2, 0xcccc0000);
443 PERM_OP (d, c, tt, 1, 0x55555555);
444 PERM_OP (c, d, tt, 8, 0x00ff00ff);
445 PERM_OP (d, c, tt, 1, 0x55555555);
447 d = ((d & 0x000000ff) << 16)
448 | ((d & 0x0000ff00) << 0)
449 | ((d & 0x00ff0000) >> 16)
450 | ((c & 0xf0000000) >> 4);
455 for (u32 i = 0; i < 16; i++)
457 c = c >> shifts3s0[i] | c << shifts3s1[i];
458 d = d >> shifts3s0[i] | d << shifts3s1[i];
463 u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
464 | BOX ((((c >> 6) & 0x03)
465 | ((c >> 7) & 0x3c)), 1, s_skb)
466 | BOX ((((c >> 13) & 0x0f)
467 | ((c >> 14) & 0x30)), 2, s_skb)
468 | BOX ((((c >> 20) & 0x01)
470 | ((c >> 22) & 0x38)), 3, s_skb);
472 u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
473 | BOX ((((d >> 7) & 0x03)
474 | ((d >> 8) & 0x3c)), 5, s_skb)
475 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
476 | BOX ((((d >> 21) & 0x0f)
477 | ((d >> 22) & 0x30)), 7, s_skb);
479 #if defined cl_amd_media_ops
480 Kc[i] = amd_bytealign (t, s << 16, 2);
481 Kd[i] = amd_bytealign (t >> 16, s, 2);
483 Kc[i] = ((t << 16) | (s & 0x0000ffff));
484 Kd[i] = ((s >> 16) | (t & 0xffff0000));
487 Kc[i] = rotl32 (Kc[i], 2u);
488 Kd[i] = rotl32 (Kd[i], 2u);
492 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
498 const u32 lid = get_local_id (0);
504 const u32 gid = get_global_id (0);
508 pw_buf0[0] = pws[gid].i[ 0];
509 pw_buf0[1] = pws[gid].i[ 1];
510 pw_buf0[2] = pws[gid].i[ 2];
511 pw_buf0[3] = pws[gid].i[ 3];
515 pw_buf1[0] = pws[gid].i[ 4];
516 pw_buf1[1] = pws[gid].i[ 5];
517 pw_buf1[2] = pws[gid].i[ 6];
518 pw_buf1[3] = pws[gid].i[ 7];
520 const u32 pw_len = pws[gid].pw_len;
526 __local u32 s_SPtrans[8][64];
527 __local u32 s_skb[8][64];
529 s_SPtrans[0][lid] = c_SPtrans[0][lid];
530 s_SPtrans[1][lid] = c_SPtrans[1][lid];
531 s_SPtrans[2][lid] = c_SPtrans[2][lid];
532 s_SPtrans[3][lid] = c_SPtrans[3][lid];
533 s_SPtrans[4][lid] = c_SPtrans[4][lid];
534 s_SPtrans[5][lid] = c_SPtrans[5][lid];
535 s_SPtrans[6][lid] = c_SPtrans[6][lid];
536 s_SPtrans[7][lid] = c_SPtrans[7][lid];
538 s_skb[0][lid] = c_skb[0][lid];
539 s_skb[1][lid] = c_skb[1][lid];
540 s_skb[2][lid] = c_skb[2][lid];
541 s_skb[3][lid] = c_skb[3][lid];
542 s_skb[4][lid] = c_skb[4][lid];
543 s_skb[5][lid] = c_skb[5][lid];
544 s_skb[6][lid] = c_skb[6][lid];
545 s_skb[7][lid] = c_skb[7][lid];
547 barrier (CLK_LOCAL_MEM_FENCE);
549 if (gid >= gid_max) return;
557 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
558 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
559 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
560 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
564 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
565 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
566 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
567 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
569 const u32 salt_len = salt_bufs[salt_pos].salt_len;
575 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
605 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
607 const u32 salt_word_len = (salt_len + out_len) * 2;
641 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
643 w0_t[0] |= salt_buf0[0];
644 w0_t[1] |= salt_buf0[1];
645 w0_t[2] |= salt_buf0[2];
646 w0_t[3] |= salt_buf0[3];
647 w1_t[0] |= salt_buf1[0];
648 w1_t[1] |= salt_buf1[1];
649 w1_t[2] |= salt_buf1[2];
650 w1_t[3] |= salt_buf1[3];
672 * precompute key1 since key is static: 0x0123456789abcdef
673 * plus LEFT_ROTATE by 2
715 * key1 (generate key)
723 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
727 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
728 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
733 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
737 * key2 (generate hash)
740 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
745 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
749 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
750 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
755 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
762 const u32x r0 = iv[0];
763 const u32x r1 = iv[1];
767 #include VECT_COMPARE_M
771 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
775 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
779 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
785 const u32 lid = get_local_id (0);
791 const u32 gid = get_global_id (0);
795 pw_buf0[0] = pws[gid].i[ 0];
796 pw_buf0[1] = pws[gid].i[ 1];
797 pw_buf0[2] = pws[gid].i[ 2];
798 pw_buf0[3] = pws[gid].i[ 3];
802 pw_buf1[0] = pws[gid].i[ 4];
803 pw_buf1[1] = pws[gid].i[ 5];
804 pw_buf1[2] = pws[gid].i[ 6];
805 pw_buf1[3] = pws[gid].i[ 7];
807 const u32 pw_len = pws[gid].pw_len;
813 __local u32 s_SPtrans[8][64];
814 __local u32 s_skb[8][64];
816 s_SPtrans[0][lid] = c_SPtrans[0][lid];
817 s_SPtrans[1][lid] = c_SPtrans[1][lid];
818 s_SPtrans[2][lid] = c_SPtrans[2][lid];
819 s_SPtrans[3][lid] = c_SPtrans[3][lid];
820 s_SPtrans[4][lid] = c_SPtrans[4][lid];
821 s_SPtrans[5][lid] = c_SPtrans[5][lid];
822 s_SPtrans[6][lid] = c_SPtrans[6][lid];
823 s_SPtrans[7][lid] = c_SPtrans[7][lid];
825 s_skb[0][lid] = c_skb[0][lid];
826 s_skb[1][lid] = c_skb[1][lid];
827 s_skb[2][lid] = c_skb[2][lid];
828 s_skb[3][lid] = c_skb[3][lid];
829 s_skb[4][lid] = c_skb[4][lid];
830 s_skb[5][lid] = c_skb[5][lid];
831 s_skb[6][lid] = c_skb[6][lid];
832 s_skb[7][lid] = c_skb[7][lid];
834 barrier (CLK_LOCAL_MEM_FENCE);
836 if (gid >= gid_max) return;
844 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
845 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
846 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
847 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
851 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
852 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
853 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
854 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
856 const u32 salt_len = salt_bufs[salt_pos].salt_len;
862 const u32 search[4] =
864 digests_buf[digests_offset].digest_buf[DGST_R0],
865 digests_buf[digests_offset].digest_buf[DGST_R1],
866 digests_buf[digests_offset].digest_buf[DGST_R2],
867 digests_buf[digests_offset].digest_buf[DGST_R3]
874 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
904 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
906 const u32 salt_word_len = (salt_len + out_len) * 2;
940 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
942 w0_t[0] |= salt_buf0[0];
943 w0_t[1] |= salt_buf0[1];
944 w0_t[2] |= salt_buf0[2];
945 w0_t[3] |= salt_buf0[3];
946 w1_t[0] |= salt_buf1[0];
947 w1_t[1] |= salt_buf1[1];
948 w1_t[2] |= salt_buf1[2];
949 w1_t[3] |= salt_buf1[3];
971 * precompute key1 since key is static: 0x0123456789abcdef
972 * plus LEFT_ROTATE by 2
1000 Kd[ 4] = 0x405cc070;
1001 Kd[ 5] = 0xa010784c;
1002 Kd[ 6] = 0x6074a800;
1003 Kd[ 7] = 0x80701c1c;
1004 Kd[ 8] = 0x9cd49430;
1005 Kd[ 9] = 0x4c8ce078;
1006 Kd[10] = 0x5c18c088;
1007 Kd[11] = 0x28a8a4c8;
1008 Kd[12] = 0x3c180838;
1009 Kd[13] = 0xb0b86c20;
1010 Kd[14] = 0xac84a094;
1011 Kd[15] = 0x4ce0c0c4;
1014 * key1 (generate key)
1022 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1026 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1027 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1032 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1036 * key2 (generate hash)
1039 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1044 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1048 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1049 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1054 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1061 const u32x r0 = iv[0];
1062 const u32x r1 = iv[1];
1066 #include VECT_COMPARE_S
1070 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1074 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)