2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
34 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
38 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
39 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
43 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
44 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
47 #define PERM_OP(a,b,tt,n,m) \
57 #define HPERM_OP(a,tt,n,m) \
63 tt = tt >> (16 + n); \
69 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
70 PERM_OP (l, r, tt, 16, 0x0000ffff); \
71 PERM_OP (r, l, tt, 2, 0x33333333); \
72 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
73 PERM_OP (r, l, tt, 1, 0x55555555); \
78 PERM_OP (l, r, tt, 1, 0x55555555); \
79 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
80 PERM_OP (l, r, tt, 2, 0x33333333); \
81 PERM_OP (r, l, tt, 16, 0x0000ffff); \
82 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
85 __constant u32 c_SPtrans[8][64] =
88 0x02080800, 0x00080000, 0x02000002, 0x02080802,
89 0x02000000, 0x00080802, 0x00080002, 0x02000002,
90 0x00080802, 0x02080800, 0x02080000, 0x00000802,
91 0x02000802, 0x02000000, 0x00000000, 0x00080002,
92 0x00080000, 0x00000002, 0x02000800, 0x00080800,
93 0x02080802, 0x02080000, 0x00000802, 0x02000800,
94 0x00000002, 0x00000800, 0x00080800, 0x02080002,
95 0x00000800, 0x02000802, 0x02080002, 0x00000000,
96 0x00000000, 0x02080802, 0x02000800, 0x00080002,
97 0x02080800, 0x00080000, 0x00000802, 0x02000800,
98 0x02080002, 0x00000800, 0x00080800, 0x02000002,
99 0x00080802, 0x00000002, 0x02000002, 0x02080000,
100 0x02080802, 0x00080800, 0x02080000, 0x02000802,
101 0x02000000, 0x00000802, 0x00080002, 0x00000000,
102 0x00080000, 0x02000000, 0x02000802, 0x02080800,
103 0x00000002, 0x02080002, 0x00000800, 0x00080802,
105 0x40108010, 0x00000000, 0x00108000, 0x40100000,
106 0x40000010, 0x00008010, 0x40008000, 0x00108000,
107 0x00008000, 0x40100010, 0x00000010, 0x40008000,
108 0x00100010, 0x40108000, 0x40100000, 0x00000010,
109 0x00100000, 0x40008010, 0x40100010, 0x00008000,
110 0x00108010, 0x40000000, 0x00000000, 0x00100010,
111 0x40008010, 0x00108010, 0x40108000, 0x40000010,
112 0x40000000, 0x00100000, 0x00008010, 0x40108010,
113 0x00100010, 0x40108000, 0x40008000, 0x00108010,
114 0x40108010, 0x00100010, 0x40000010, 0x00000000,
115 0x40000000, 0x00008010, 0x00100000, 0x40100010,
116 0x00008000, 0x40000000, 0x00108010, 0x40008010,
117 0x40108000, 0x00008000, 0x00000000, 0x40000010,
118 0x00000010, 0x40108010, 0x00108000, 0x40100000,
119 0x40100010, 0x00100000, 0x00008010, 0x40008000,
120 0x40008010, 0x00000010, 0x40100000, 0x00108000,
122 0x04000001, 0x04040100, 0x00000100, 0x04000101,
123 0x00040001, 0x04000000, 0x04000101, 0x00040100,
124 0x04000100, 0x00040000, 0x04040000, 0x00000001,
125 0x04040101, 0x00000101, 0x00000001, 0x04040001,
126 0x00000000, 0x00040001, 0x04040100, 0x00000100,
127 0x00000101, 0x04040101, 0x00040000, 0x04000001,
128 0x04040001, 0x04000100, 0x00040101, 0x04040000,
129 0x00040100, 0x00000000, 0x04000000, 0x00040101,
130 0x04040100, 0x00000100, 0x00000001, 0x00040000,
131 0x00000101, 0x00040001, 0x04040000, 0x04000101,
132 0x00000000, 0x04040100, 0x00040100, 0x04040001,
133 0x00040001, 0x04000000, 0x04040101, 0x00000001,
134 0x00040101, 0x04000001, 0x04000000, 0x04040101,
135 0x00040000, 0x04000100, 0x04000101, 0x00040100,
136 0x04000100, 0x00000000, 0x04040001, 0x00000101,
137 0x04000001, 0x00040101, 0x00000100, 0x04040000,
139 0x00401008, 0x10001000, 0x00000008, 0x10401008,
140 0x00000000, 0x10400000, 0x10001008, 0x00400008,
141 0x10401000, 0x10000008, 0x10000000, 0x00001008,
142 0x10000008, 0x00401008, 0x00400000, 0x10000000,
143 0x10400008, 0x00401000, 0x00001000, 0x00000008,
144 0x00401000, 0x10001008, 0x10400000, 0x00001000,
145 0x00001008, 0x00000000, 0x00400008, 0x10401000,
146 0x10001000, 0x10400008, 0x10401008, 0x00400000,
147 0x10400008, 0x00001008, 0x00400000, 0x10000008,
148 0x00401000, 0x10001000, 0x00000008, 0x10400000,
149 0x10001008, 0x00000000, 0x00001000, 0x00400008,
150 0x00000000, 0x10400008, 0x10401000, 0x00001000,
151 0x10000000, 0x10401008, 0x00401008, 0x00400000,
152 0x10401008, 0x00000008, 0x10001000, 0x00401008,
153 0x00400008, 0x00401000, 0x10400000, 0x10001008,
154 0x00001008, 0x10000000, 0x10000008, 0x10401000,
156 0x08000000, 0x00010000, 0x00000400, 0x08010420,
157 0x08010020, 0x08000400, 0x00010420, 0x08010000,
158 0x00010000, 0x00000020, 0x08000020, 0x00010400,
159 0x08000420, 0x08010020, 0x08010400, 0x00000000,
160 0x00010400, 0x08000000, 0x00010020, 0x00000420,
161 0x08000400, 0x00010420, 0x00000000, 0x08000020,
162 0x00000020, 0x08000420, 0x08010420, 0x00010020,
163 0x08010000, 0x00000400, 0x00000420, 0x08010400,
164 0x08010400, 0x08000420, 0x00010020, 0x08010000,
165 0x00010000, 0x00000020, 0x08000020, 0x08000400,
166 0x08000000, 0x00010400, 0x08010420, 0x00000000,
167 0x00010420, 0x08000000, 0x00000400, 0x00010020,
168 0x08000420, 0x00000400, 0x00000000, 0x08010420,
169 0x08010020, 0x08010400, 0x00000420, 0x00010000,
170 0x00010400, 0x08010020, 0x08000400, 0x00000420,
171 0x00000020, 0x00010420, 0x08010000, 0x08000020,
173 0x80000040, 0x00200040, 0x00000000, 0x80202000,
174 0x00200040, 0x00002000, 0x80002040, 0x00200000,
175 0x00002040, 0x80202040, 0x00202000, 0x80000000,
176 0x80002000, 0x80000040, 0x80200000, 0x00202040,
177 0x00200000, 0x80002040, 0x80200040, 0x00000000,
178 0x00002000, 0x00000040, 0x80202000, 0x80200040,
179 0x80202040, 0x80200000, 0x80000000, 0x00002040,
180 0x00000040, 0x00202000, 0x00202040, 0x80002000,
181 0x00002040, 0x80000000, 0x80002000, 0x00202040,
182 0x80202000, 0x00200040, 0x00000000, 0x80002000,
183 0x80000000, 0x00002000, 0x80200040, 0x00200000,
184 0x00200040, 0x80202040, 0x00202000, 0x00000040,
185 0x80202040, 0x00202000, 0x00200000, 0x80002040,
186 0x80000040, 0x80200000, 0x00202040, 0x00000000,
187 0x00002000, 0x80000040, 0x80002040, 0x80202000,
188 0x80200000, 0x00002040, 0x00000040, 0x80200040,
190 0x00004000, 0x00000200, 0x01000200, 0x01000004,
191 0x01004204, 0x00004004, 0x00004200, 0x00000000,
192 0x01000000, 0x01000204, 0x00000204, 0x01004000,
193 0x00000004, 0x01004200, 0x01004000, 0x00000204,
194 0x01000204, 0x00004000, 0x00004004, 0x01004204,
195 0x00000000, 0x01000200, 0x01000004, 0x00004200,
196 0x01004004, 0x00004204, 0x01004200, 0x00000004,
197 0x00004204, 0x01004004, 0x00000200, 0x01000000,
198 0x00004204, 0x01004000, 0x01004004, 0x00000204,
199 0x00004000, 0x00000200, 0x01000000, 0x01004004,
200 0x01000204, 0x00004204, 0x00004200, 0x00000000,
201 0x00000200, 0x01000004, 0x00000004, 0x01000200,
202 0x00000000, 0x01000204, 0x01000200, 0x00004200,
203 0x00000204, 0x00004000, 0x01004204, 0x01000000,
204 0x01004200, 0x00000004, 0x00004004, 0x01004204,
205 0x01000004, 0x01004200, 0x01004000, 0x00004004,
207 0x20800080, 0x20820000, 0x00020080, 0x00000000,
208 0x20020000, 0x00800080, 0x20800000, 0x20820080,
209 0x00000080, 0x20000000, 0x00820000, 0x00020080,
210 0x00820080, 0x20020080, 0x20000080, 0x20800000,
211 0x00020000, 0x00820080, 0x00800080, 0x20020000,
212 0x20820080, 0x20000080, 0x00000000, 0x00820000,
213 0x20000000, 0x00800000, 0x20020080, 0x20800080,
214 0x00800000, 0x00020000, 0x20820000, 0x00000080,
215 0x00800000, 0x00020000, 0x20000080, 0x20820080,
216 0x00020080, 0x20000000, 0x00000000, 0x00820000,
217 0x20800080, 0x20020080, 0x20020000, 0x00800080,
218 0x20820000, 0x00000080, 0x00800080, 0x20020000,
219 0x20820080, 0x00800000, 0x20800000, 0x20000080,
220 0x00820000, 0x00020080, 0x20020080, 0x20800000,
221 0x00000080, 0x20820000, 0x00820080, 0x00000000,
222 0x20000000, 0x20800080, 0x00020000, 0x00820080,
225 __constant u32 c_skb[8][64] =
227 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
228 0x00000000, 0x00000010, 0x20000000, 0x20000010,
229 0x00010000, 0x00010010, 0x20010000, 0x20010010,
230 0x00000800, 0x00000810, 0x20000800, 0x20000810,
231 0x00010800, 0x00010810, 0x20010800, 0x20010810,
232 0x00000020, 0x00000030, 0x20000020, 0x20000030,
233 0x00010020, 0x00010030, 0x20010020, 0x20010030,
234 0x00000820, 0x00000830, 0x20000820, 0x20000830,
235 0x00010820, 0x00010830, 0x20010820, 0x20010830,
236 0x00080000, 0x00080010, 0x20080000, 0x20080010,
237 0x00090000, 0x00090010, 0x20090000, 0x20090010,
238 0x00080800, 0x00080810, 0x20080800, 0x20080810,
239 0x00090800, 0x00090810, 0x20090800, 0x20090810,
240 0x00080020, 0x00080030, 0x20080020, 0x20080030,
241 0x00090020, 0x00090030, 0x20090020, 0x20090030,
242 0x00080820, 0x00080830, 0x20080820, 0x20080830,
243 0x00090820, 0x00090830, 0x20090820, 0x20090830,
244 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
245 0x00000000, 0x02000000, 0x00002000, 0x02002000,
246 0x00200000, 0x02200000, 0x00202000, 0x02202000,
247 0x00000004, 0x02000004, 0x00002004, 0x02002004,
248 0x00200004, 0x02200004, 0x00202004, 0x02202004,
249 0x00000400, 0x02000400, 0x00002400, 0x02002400,
250 0x00200400, 0x02200400, 0x00202400, 0x02202400,
251 0x00000404, 0x02000404, 0x00002404, 0x02002404,
252 0x00200404, 0x02200404, 0x00202404, 0x02202404,
253 0x10000000, 0x12000000, 0x10002000, 0x12002000,
254 0x10200000, 0x12200000, 0x10202000, 0x12202000,
255 0x10000004, 0x12000004, 0x10002004, 0x12002004,
256 0x10200004, 0x12200004, 0x10202004, 0x12202004,
257 0x10000400, 0x12000400, 0x10002400, 0x12002400,
258 0x10200400, 0x12200400, 0x10202400, 0x12202400,
259 0x10000404, 0x12000404, 0x10002404, 0x12002404,
260 0x10200404, 0x12200404, 0x10202404, 0x12202404,
261 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
262 0x00000000, 0x00000001, 0x00040000, 0x00040001,
263 0x01000000, 0x01000001, 0x01040000, 0x01040001,
264 0x00000002, 0x00000003, 0x00040002, 0x00040003,
265 0x01000002, 0x01000003, 0x01040002, 0x01040003,
266 0x00000200, 0x00000201, 0x00040200, 0x00040201,
267 0x01000200, 0x01000201, 0x01040200, 0x01040201,
268 0x00000202, 0x00000203, 0x00040202, 0x00040203,
269 0x01000202, 0x01000203, 0x01040202, 0x01040203,
270 0x08000000, 0x08000001, 0x08040000, 0x08040001,
271 0x09000000, 0x09000001, 0x09040000, 0x09040001,
272 0x08000002, 0x08000003, 0x08040002, 0x08040003,
273 0x09000002, 0x09000003, 0x09040002, 0x09040003,
274 0x08000200, 0x08000201, 0x08040200, 0x08040201,
275 0x09000200, 0x09000201, 0x09040200, 0x09040201,
276 0x08000202, 0x08000203, 0x08040202, 0x08040203,
277 0x09000202, 0x09000203, 0x09040202, 0x09040203,
278 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
279 0x00000000, 0x00100000, 0x00000100, 0x00100100,
280 0x00000008, 0x00100008, 0x00000108, 0x00100108,
281 0x00001000, 0x00101000, 0x00001100, 0x00101100,
282 0x00001008, 0x00101008, 0x00001108, 0x00101108,
283 0x04000000, 0x04100000, 0x04000100, 0x04100100,
284 0x04000008, 0x04100008, 0x04000108, 0x04100108,
285 0x04001000, 0x04101000, 0x04001100, 0x04101100,
286 0x04001008, 0x04101008, 0x04001108, 0x04101108,
287 0x00020000, 0x00120000, 0x00020100, 0x00120100,
288 0x00020008, 0x00120008, 0x00020108, 0x00120108,
289 0x00021000, 0x00121000, 0x00021100, 0x00121100,
290 0x00021008, 0x00121008, 0x00021108, 0x00121108,
291 0x04020000, 0x04120000, 0x04020100, 0x04120100,
292 0x04020008, 0x04120008, 0x04020108, 0x04120108,
293 0x04021000, 0x04121000, 0x04021100, 0x04121100,
294 0x04021008, 0x04121008, 0x04021108, 0x04121108,
295 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
296 0x00000000, 0x10000000, 0x00010000, 0x10010000,
297 0x00000004, 0x10000004, 0x00010004, 0x10010004,
298 0x20000000, 0x30000000, 0x20010000, 0x30010000,
299 0x20000004, 0x30000004, 0x20010004, 0x30010004,
300 0x00100000, 0x10100000, 0x00110000, 0x10110000,
301 0x00100004, 0x10100004, 0x00110004, 0x10110004,
302 0x20100000, 0x30100000, 0x20110000, 0x30110000,
303 0x20100004, 0x30100004, 0x20110004, 0x30110004,
304 0x00001000, 0x10001000, 0x00011000, 0x10011000,
305 0x00001004, 0x10001004, 0x00011004, 0x10011004,
306 0x20001000, 0x30001000, 0x20011000, 0x30011000,
307 0x20001004, 0x30001004, 0x20011004, 0x30011004,
308 0x00101000, 0x10101000, 0x00111000, 0x10111000,
309 0x00101004, 0x10101004, 0x00111004, 0x10111004,
310 0x20101000, 0x30101000, 0x20111000, 0x30111000,
311 0x20101004, 0x30101004, 0x20111004, 0x30111004,
312 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
313 0x00000000, 0x08000000, 0x00000008, 0x08000008,
314 0x00000400, 0x08000400, 0x00000408, 0x08000408,
315 0x00020000, 0x08020000, 0x00020008, 0x08020008,
316 0x00020400, 0x08020400, 0x00020408, 0x08020408,
317 0x00000001, 0x08000001, 0x00000009, 0x08000009,
318 0x00000401, 0x08000401, 0x00000409, 0x08000409,
319 0x00020001, 0x08020001, 0x00020009, 0x08020009,
320 0x00020401, 0x08020401, 0x00020409, 0x08020409,
321 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
322 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
323 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
324 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
325 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
326 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
327 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
328 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
329 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
330 0x00000000, 0x00000100, 0x00080000, 0x00080100,
331 0x01000000, 0x01000100, 0x01080000, 0x01080100,
332 0x00000010, 0x00000110, 0x00080010, 0x00080110,
333 0x01000010, 0x01000110, 0x01080010, 0x01080110,
334 0x00200000, 0x00200100, 0x00280000, 0x00280100,
335 0x01200000, 0x01200100, 0x01280000, 0x01280100,
336 0x00200010, 0x00200110, 0x00280010, 0x00280110,
337 0x01200010, 0x01200110, 0x01280010, 0x01280110,
338 0x00000200, 0x00000300, 0x00080200, 0x00080300,
339 0x01000200, 0x01000300, 0x01080200, 0x01080300,
340 0x00000210, 0x00000310, 0x00080210, 0x00080310,
341 0x01000210, 0x01000310, 0x01080210, 0x01080310,
342 0x00200200, 0x00200300, 0x00280200, 0x00280300,
343 0x01200200, 0x01200300, 0x01280200, 0x01280300,
344 0x00200210, 0x00200310, 0x00280210, 0x00280310,
345 0x01200210, 0x01200310, 0x01280210, 0x01280310,
346 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
347 0x00000000, 0x04000000, 0x00040000, 0x04040000,
348 0x00000002, 0x04000002, 0x00040002, 0x04040002,
349 0x00002000, 0x04002000, 0x00042000, 0x04042000,
350 0x00002002, 0x04002002, 0x00042002, 0x04042002,
351 0x00000020, 0x04000020, 0x00040020, 0x04040020,
352 0x00000022, 0x04000022, 0x00040022, 0x04040022,
353 0x00002020, 0x04002020, 0x00042020, 0x04042020,
354 0x00002022, 0x04002022, 0x00042022, 0x04042022,
355 0x00000800, 0x04000800, 0x00040800, 0x04040800,
356 0x00000802, 0x04000802, 0x00040802, 0x04040802,
357 0x00002800, 0x04002800, 0x00042800, 0x04042800,
358 0x00002802, 0x04002802, 0x00042802, 0x04042802,
359 0x00000820, 0x04000820, 0x00040820, 0x04040820,
360 0x00000822, 0x04000822, 0x00040822, 0x04040822,
361 0x00002820, 0x04002820, 0x00042820, 0x04042820,
362 0x00002822, 0x04002822, 0x00042822, 0x04042822
365 __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
366 __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
369 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
373 #define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
377 #define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
380 static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64])
393 for (u32 i = 0; i < 16; i += 2)
399 t = Kd[i + 0] ^ rotl32 (r, 28u);
402 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
403 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
404 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
405 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
406 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
407 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
408 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
409 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
412 t = Kd[i + 1] ^ rotl32 (l, 28u);
415 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
416 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
417 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
418 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
419 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
420 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
421 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
422 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
434 static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64])
438 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
439 HPERM_OP (c, tt, 2, 0xcccc0000);
440 HPERM_OP (d, tt, 2, 0xcccc0000);
441 PERM_OP (d, c, tt, 1, 0x55555555);
442 PERM_OP (c, d, tt, 8, 0x00ff00ff);
443 PERM_OP (d, c, tt, 1, 0x55555555);
445 d = ((d & 0x000000ff) << 16)
446 | ((d & 0x0000ff00) << 0)
447 | ((d & 0x00ff0000) >> 16)
448 | ((c & 0xf0000000) >> 4);
453 for (u32 i = 0; i < 16; i++)
455 c = c >> shifts3s0[i] | c << shifts3s1[i];
456 d = d >> shifts3s0[i] | d << shifts3s1[i];
461 u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
462 | BOX ((((c >> 6) & 0x03)
463 | ((c >> 7) & 0x3c)), 1, s_skb)
464 | BOX ((((c >> 13) & 0x0f)
465 | ((c >> 14) & 0x30)), 2, s_skb)
466 | BOX ((((c >> 20) & 0x01)
468 | ((c >> 22) & 0x38)), 3, s_skb);
470 u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
471 | BOX ((((d >> 7) & 0x03)
472 | ((d >> 8) & 0x3c)), 5, s_skb)
473 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
474 | BOX ((((d >> 21) & 0x0f)
475 | ((d >> 22) & 0x30)), 7, s_skb);
477 #if defined cl_amd_media_ops
478 Kc[i] = amd_bytealign (t, s << 16, 2);
479 Kd[i] = amd_bytealign (t >> 16, s, 2);
481 Kc[i] = ((t << 16) | (s & 0x0000ffff));
482 Kd[i] = ((s >> 16) | (t & 0xffff0000));
485 Kc[i] = rotl32 (Kc[i], 2u);
486 Kd[i] = rotl32 (Kd[i], 2u);
490 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
496 const u32 lid = get_local_id (0);
502 const u32 gid = get_global_id (0);
506 wordl0[0] = pws[gid].i[ 0];
507 wordl0[1] = pws[gid].i[ 1];
508 wordl0[2] = pws[gid].i[ 2];
509 wordl0[3] = pws[gid].i[ 3];
513 wordl1[0] = pws[gid].i[ 4];
514 wordl1[1] = pws[gid].i[ 5];
515 wordl1[2] = pws[gid].i[ 6];
516 wordl1[3] = pws[gid].i[ 7];
532 const u32 pw_l_len = pws[gid].pw_len;
534 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
536 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
543 __local u32 s_SPtrans[8][64];
545 __local u32 s_skb[8][64];
547 s_SPtrans[0][lid] = c_SPtrans[0][lid];
548 s_SPtrans[1][lid] = c_SPtrans[1][lid];
549 s_SPtrans[2][lid] = c_SPtrans[2][lid];
550 s_SPtrans[3][lid] = c_SPtrans[3][lid];
551 s_SPtrans[4][lid] = c_SPtrans[4][lid];
552 s_SPtrans[5][lid] = c_SPtrans[5][lid];
553 s_SPtrans[6][lid] = c_SPtrans[6][lid];
554 s_SPtrans[7][lid] = c_SPtrans[7][lid];
556 s_skb[0][lid] = c_skb[0][lid];
557 s_skb[1][lid] = c_skb[1][lid];
558 s_skb[2][lid] = c_skb[2][lid];
559 s_skb[3][lid] = c_skb[3][lid];
560 s_skb[4][lid] = c_skb[4][lid];
561 s_skb[5][lid] = c_skb[5][lid];
562 s_skb[6][lid] = c_skb[6][lid];
563 s_skb[7][lid] = c_skb[7][lid];
565 barrier (CLK_LOCAL_MEM_FENCE);
567 if (gid >= gid_max) return;
575 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
576 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
577 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
578 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
582 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
583 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
584 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
585 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
587 const u32 salt_len = salt_bufs[salt_pos].salt_len;
593 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
595 const u32 pw_r_len = combs_buf[il_pos].pw_len;
597 const u32 pw_len = pw_l_len + pw_r_len;
599 const u32 salt_word_len = (salt_len + pw_len) * 2;
603 wordr0[0] = combs_buf[il_pos].i[0];
604 wordr0[1] = combs_buf[il_pos].i[1];
605 wordr0[2] = combs_buf[il_pos].i[2];
606 wordr0[3] = combs_buf[il_pos].i[3];
610 wordr1[0] = combs_buf[il_pos].i[4];
611 wordr1[1] = combs_buf[il_pos].i[5];
612 wordr1[2] = combs_buf[il_pos].i[6];
613 wordr1[3] = combs_buf[il_pos].i[7];
629 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
631 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
636 w0[0] = wordl0[0] | wordr0[0];
637 w0[1] = wordl0[1] | wordr0[1];
638 w0[2] = wordl0[2] | wordr0[2];
639 w0[3] = wordl0[3] | wordr0[3];
643 w1[0] = wordl1[0] | wordr1[0];
644 w1[1] = wordl1[1] | wordr1[1];
645 w1[2] = wordl1[2] | wordr1[2];
646 w1[3] = wordl1[3] | wordr1[3];
650 w2[0] = wordl2[0] | wordr2[0];
651 w2[1] = wordl2[1] | wordr2[1];
652 w2[2] = wordl2[2] | wordr2[2];
653 w2[3] = wordl2[3] | wordr2[3];
657 w3[0] = wordl3[0] | wordr3[0];
658 w3[1] = wordl3[1] | wordr3[1];
659 w3[2] = wordl3[2] | wordr3[2];
660 w3[3] = wordl3[3] | wordr3[3];
688 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
690 w0_t[0] |= salt_buf0[0];
691 w0_t[1] |= salt_buf0[1];
692 w0_t[2] |= salt_buf0[2];
693 w0_t[3] |= salt_buf0[3];
694 w1_t[0] |= salt_buf1[0];
695 w1_t[1] |= salt_buf1[1];
696 w1_t[2] |= salt_buf1[2];
697 w1_t[3] |= salt_buf1[3];
719 * precompute key1 since key is static: 0x0123456789abcdef
720 * plus LEFT_ROTATE by 2
762 * key1 (generate key)
770 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
774 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
775 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
780 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
784 * key2 (generate hash)
787 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
792 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
796 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
797 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
802 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
809 const u32x r0 = iv[0];
810 const u32x r1 = iv[1];
814 #include VECT_COMPARE_M
818 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
822 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
826 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
832 const u32 lid = get_local_id (0);
838 const u32 gid = get_global_id (0);
842 wordl0[0] = pws[gid].i[ 0];
843 wordl0[1] = pws[gid].i[ 1];
844 wordl0[2] = pws[gid].i[ 2];
845 wordl0[3] = pws[gid].i[ 3];
849 wordl1[0] = pws[gid].i[ 4];
850 wordl1[1] = pws[gid].i[ 5];
851 wordl1[2] = pws[gid].i[ 6];
852 wordl1[3] = pws[gid].i[ 7];
868 const u32 pw_l_len = pws[gid].pw_len;
870 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
872 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
879 __local u32 s_SPtrans[8][64];
881 __local u32 s_skb[8][64];
883 s_SPtrans[0][lid] = c_SPtrans[0][lid];
884 s_SPtrans[1][lid] = c_SPtrans[1][lid];
885 s_SPtrans[2][lid] = c_SPtrans[2][lid];
886 s_SPtrans[3][lid] = c_SPtrans[3][lid];
887 s_SPtrans[4][lid] = c_SPtrans[4][lid];
888 s_SPtrans[5][lid] = c_SPtrans[5][lid];
889 s_SPtrans[6][lid] = c_SPtrans[6][lid];
890 s_SPtrans[7][lid] = c_SPtrans[7][lid];
892 s_skb[0][lid] = c_skb[0][lid];
893 s_skb[1][lid] = c_skb[1][lid];
894 s_skb[2][lid] = c_skb[2][lid];
895 s_skb[3][lid] = c_skb[3][lid];
896 s_skb[4][lid] = c_skb[4][lid];
897 s_skb[5][lid] = c_skb[5][lid];
898 s_skb[6][lid] = c_skb[6][lid];
899 s_skb[7][lid] = c_skb[7][lid];
901 barrier (CLK_LOCAL_MEM_FENCE);
903 if (gid >= gid_max) return;
911 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
912 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
913 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
914 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
918 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
919 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
920 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
921 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
923 const u32 salt_len = salt_bufs[salt_pos].salt_len;
929 const u32 search[4] =
931 digests_buf[digests_offset].digest_buf[DGST_R0],
932 digests_buf[digests_offset].digest_buf[DGST_R1],
933 digests_buf[digests_offset].digest_buf[DGST_R2],
934 digests_buf[digests_offset].digest_buf[DGST_R3]
941 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
943 const u32 pw_r_len = combs_buf[il_pos].pw_len;
945 const u32 pw_len = pw_l_len + pw_r_len;
947 const u32 salt_word_len = (salt_len + pw_len) * 2;
951 wordr0[0] = combs_buf[il_pos].i[0];
952 wordr0[1] = combs_buf[il_pos].i[1];
953 wordr0[2] = combs_buf[il_pos].i[2];
954 wordr0[3] = combs_buf[il_pos].i[3];
958 wordr1[0] = combs_buf[il_pos].i[4];
959 wordr1[1] = combs_buf[il_pos].i[5];
960 wordr1[2] = combs_buf[il_pos].i[6];
961 wordr1[3] = combs_buf[il_pos].i[7];
977 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
979 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
984 w0[0] = wordl0[0] | wordr0[0];
985 w0[1] = wordl0[1] | wordr0[1];
986 w0[2] = wordl0[2] | wordr0[2];
987 w0[3] = wordl0[3] | wordr0[3];
991 w1[0] = wordl1[0] | wordr1[0];
992 w1[1] = wordl1[1] | wordr1[1];
993 w1[2] = wordl1[2] | wordr1[2];
994 w1[3] = wordl1[3] | wordr1[3];
998 w2[0] = wordl2[0] | wordr2[0];
999 w2[1] = wordl2[1] | wordr2[1];
1000 w2[2] = wordl2[2] | wordr2[2];
1001 w2[3] = wordl2[3] | wordr2[3];
1005 w3[0] = wordl3[0] | wordr3[0];
1006 w3[1] = wordl3[1] | wordr3[1];
1007 w3[2] = wordl3[2] | wordr3[2];
1008 w3[3] = wordl3[3] | wordr3[3];
1036 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
1038 w0_t[0] |= salt_buf0[0];
1039 w0_t[1] |= salt_buf0[1];
1040 w0_t[2] |= salt_buf0[2];
1041 w0_t[3] |= salt_buf0[3];
1042 w1_t[0] |= salt_buf1[0];
1043 w1_t[1] |= salt_buf1[1];
1044 w1_t[2] |= salt_buf1[2];
1045 w1_t[3] |= salt_buf1[3];
1067 * precompute key1 since key is static: 0x0123456789abcdef
1068 * plus LEFT_ROTATE by 2
1073 Kc[ 0] = 0x64649040;
1074 Kc[ 1] = 0x14909858;
1075 Kc[ 2] = 0xc4b44888;
1076 Kc[ 3] = 0x9094e438;
1077 Kc[ 4] = 0xd8a004f0;
1078 Kc[ 5] = 0xa8f02810;
1079 Kc[ 6] = 0xc84048d8;
1080 Kc[ 7] = 0x68d804a8;
1081 Kc[ 8] = 0x0490e40c;
1082 Kc[ 9] = 0xac183024;
1083 Kc[10] = 0x24c07c10;
1084 Kc[11] = 0x8c88c038;
1085 Kc[12] = 0xc048c824;
1086 Kc[13] = 0x4c0470a8;
1087 Kc[14] = 0x584020b4;
1088 Kc[15] = 0x00742c4c;
1092 Kd[ 0] = 0xa42ce40c;
1093 Kd[ 1] = 0x64689858;
1094 Kd[ 2] = 0x484050b8;
1095 Kd[ 3] = 0xe8184814;
1096 Kd[ 4] = 0x405cc070;
1097 Kd[ 5] = 0xa010784c;
1098 Kd[ 6] = 0x6074a800;
1099 Kd[ 7] = 0x80701c1c;
1100 Kd[ 8] = 0x9cd49430;
1101 Kd[ 9] = 0x4c8ce078;
1102 Kd[10] = 0x5c18c088;
1103 Kd[11] = 0x28a8a4c8;
1104 Kd[12] = 0x3c180838;
1105 Kd[13] = 0xb0b86c20;
1106 Kd[14] = 0xac84a094;
1107 Kd[15] = 0x4ce0c0c4;
1110 * key1 (generate key)
1118 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1122 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1123 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1128 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1132 * key2 (generate hash)
1135 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1140 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1144 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1145 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1150 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1157 const u32x r0 = iv[0];
1158 const u32x r1 = iv[1];
1162 #include VECT_COMPARE_S
1166 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1170 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)