2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 #define PERM_OP(a,b,tt,n,m) \
33 #define HPERM_OP(a,tt,n,m) \
39 tt = tt >> (16 + n); \
45 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
46 PERM_OP (l, r, tt, 16, 0x0000ffff); \
47 PERM_OP (r, l, tt, 2, 0x33333333); \
48 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
49 PERM_OP (r, l, tt, 1, 0x55555555); \
54 PERM_OP (l, r, tt, 1, 0x55555555); \
55 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
56 PERM_OP (l, r, tt, 2, 0x33333333); \
57 PERM_OP (r, l, tt, 16, 0x0000ffff); \
58 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
61 __constant u32 c_SPtrans[8][64] =
64 0x02080800, 0x00080000, 0x02000002, 0x02080802,
65 0x02000000, 0x00080802, 0x00080002, 0x02000002,
66 0x00080802, 0x02080800, 0x02080000, 0x00000802,
67 0x02000802, 0x02000000, 0x00000000, 0x00080002,
68 0x00080000, 0x00000002, 0x02000800, 0x00080800,
69 0x02080802, 0x02080000, 0x00000802, 0x02000800,
70 0x00000002, 0x00000800, 0x00080800, 0x02080002,
71 0x00000800, 0x02000802, 0x02080002, 0x00000000,
72 0x00000000, 0x02080802, 0x02000800, 0x00080002,
73 0x02080800, 0x00080000, 0x00000802, 0x02000800,
74 0x02080002, 0x00000800, 0x00080800, 0x02000002,
75 0x00080802, 0x00000002, 0x02000002, 0x02080000,
76 0x02080802, 0x00080800, 0x02080000, 0x02000802,
77 0x02000000, 0x00000802, 0x00080002, 0x00000000,
78 0x00080000, 0x02000000, 0x02000802, 0x02080800,
79 0x00000002, 0x02080002, 0x00000800, 0x00080802,
81 0x40108010, 0x00000000, 0x00108000, 0x40100000,
82 0x40000010, 0x00008010, 0x40008000, 0x00108000,
83 0x00008000, 0x40100010, 0x00000010, 0x40008000,
84 0x00100010, 0x40108000, 0x40100000, 0x00000010,
85 0x00100000, 0x40008010, 0x40100010, 0x00008000,
86 0x00108010, 0x40000000, 0x00000000, 0x00100010,
87 0x40008010, 0x00108010, 0x40108000, 0x40000010,
88 0x40000000, 0x00100000, 0x00008010, 0x40108010,
89 0x00100010, 0x40108000, 0x40008000, 0x00108010,
90 0x40108010, 0x00100010, 0x40000010, 0x00000000,
91 0x40000000, 0x00008010, 0x00100000, 0x40100010,
92 0x00008000, 0x40000000, 0x00108010, 0x40008010,
93 0x40108000, 0x00008000, 0x00000000, 0x40000010,
94 0x00000010, 0x40108010, 0x00108000, 0x40100000,
95 0x40100010, 0x00100000, 0x00008010, 0x40008000,
96 0x40008010, 0x00000010, 0x40100000, 0x00108000,
98 0x04000001, 0x04040100, 0x00000100, 0x04000101,
99 0x00040001, 0x04000000, 0x04000101, 0x00040100,
100 0x04000100, 0x00040000, 0x04040000, 0x00000001,
101 0x04040101, 0x00000101, 0x00000001, 0x04040001,
102 0x00000000, 0x00040001, 0x04040100, 0x00000100,
103 0x00000101, 0x04040101, 0x00040000, 0x04000001,
104 0x04040001, 0x04000100, 0x00040101, 0x04040000,
105 0x00040100, 0x00000000, 0x04000000, 0x00040101,
106 0x04040100, 0x00000100, 0x00000001, 0x00040000,
107 0x00000101, 0x00040001, 0x04040000, 0x04000101,
108 0x00000000, 0x04040100, 0x00040100, 0x04040001,
109 0x00040001, 0x04000000, 0x04040101, 0x00000001,
110 0x00040101, 0x04000001, 0x04000000, 0x04040101,
111 0x00040000, 0x04000100, 0x04000101, 0x00040100,
112 0x04000100, 0x00000000, 0x04040001, 0x00000101,
113 0x04000001, 0x00040101, 0x00000100, 0x04040000,
115 0x00401008, 0x10001000, 0x00000008, 0x10401008,
116 0x00000000, 0x10400000, 0x10001008, 0x00400008,
117 0x10401000, 0x10000008, 0x10000000, 0x00001008,
118 0x10000008, 0x00401008, 0x00400000, 0x10000000,
119 0x10400008, 0x00401000, 0x00001000, 0x00000008,
120 0x00401000, 0x10001008, 0x10400000, 0x00001000,
121 0x00001008, 0x00000000, 0x00400008, 0x10401000,
122 0x10001000, 0x10400008, 0x10401008, 0x00400000,
123 0x10400008, 0x00001008, 0x00400000, 0x10000008,
124 0x00401000, 0x10001000, 0x00000008, 0x10400000,
125 0x10001008, 0x00000000, 0x00001000, 0x00400008,
126 0x00000000, 0x10400008, 0x10401000, 0x00001000,
127 0x10000000, 0x10401008, 0x00401008, 0x00400000,
128 0x10401008, 0x00000008, 0x10001000, 0x00401008,
129 0x00400008, 0x00401000, 0x10400000, 0x10001008,
130 0x00001008, 0x10000000, 0x10000008, 0x10401000,
132 0x08000000, 0x00010000, 0x00000400, 0x08010420,
133 0x08010020, 0x08000400, 0x00010420, 0x08010000,
134 0x00010000, 0x00000020, 0x08000020, 0x00010400,
135 0x08000420, 0x08010020, 0x08010400, 0x00000000,
136 0x00010400, 0x08000000, 0x00010020, 0x00000420,
137 0x08000400, 0x00010420, 0x00000000, 0x08000020,
138 0x00000020, 0x08000420, 0x08010420, 0x00010020,
139 0x08010000, 0x00000400, 0x00000420, 0x08010400,
140 0x08010400, 0x08000420, 0x00010020, 0x08010000,
141 0x00010000, 0x00000020, 0x08000020, 0x08000400,
142 0x08000000, 0x00010400, 0x08010420, 0x00000000,
143 0x00010420, 0x08000000, 0x00000400, 0x00010020,
144 0x08000420, 0x00000400, 0x00000000, 0x08010420,
145 0x08010020, 0x08010400, 0x00000420, 0x00010000,
146 0x00010400, 0x08010020, 0x08000400, 0x00000420,
147 0x00000020, 0x00010420, 0x08010000, 0x08000020,
149 0x80000040, 0x00200040, 0x00000000, 0x80202000,
150 0x00200040, 0x00002000, 0x80002040, 0x00200000,
151 0x00002040, 0x80202040, 0x00202000, 0x80000000,
152 0x80002000, 0x80000040, 0x80200000, 0x00202040,
153 0x00200000, 0x80002040, 0x80200040, 0x00000000,
154 0x00002000, 0x00000040, 0x80202000, 0x80200040,
155 0x80202040, 0x80200000, 0x80000000, 0x00002040,
156 0x00000040, 0x00202000, 0x00202040, 0x80002000,
157 0x00002040, 0x80000000, 0x80002000, 0x00202040,
158 0x80202000, 0x00200040, 0x00000000, 0x80002000,
159 0x80000000, 0x00002000, 0x80200040, 0x00200000,
160 0x00200040, 0x80202040, 0x00202000, 0x00000040,
161 0x80202040, 0x00202000, 0x00200000, 0x80002040,
162 0x80000040, 0x80200000, 0x00202040, 0x00000000,
163 0x00002000, 0x80000040, 0x80002040, 0x80202000,
164 0x80200000, 0x00002040, 0x00000040, 0x80200040,
166 0x00004000, 0x00000200, 0x01000200, 0x01000004,
167 0x01004204, 0x00004004, 0x00004200, 0x00000000,
168 0x01000000, 0x01000204, 0x00000204, 0x01004000,
169 0x00000004, 0x01004200, 0x01004000, 0x00000204,
170 0x01000204, 0x00004000, 0x00004004, 0x01004204,
171 0x00000000, 0x01000200, 0x01000004, 0x00004200,
172 0x01004004, 0x00004204, 0x01004200, 0x00000004,
173 0x00004204, 0x01004004, 0x00000200, 0x01000000,
174 0x00004204, 0x01004000, 0x01004004, 0x00000204,
175 0x00004000, 0x00000200, 0x01000000, 0x01004004,
176 0x01000204, 0x00004204, 0x00004200, 0x00000000,
177 0x00000200, 0x01000004, 0x00000004, 0x01000200,
178 0x00000000, 0x01000204, 0x01000200, 0x00004200,
179 0x00000204, 0x00004000, 0x01004204, 0x01000000,
180 0x01004200, 0x00000004, 0x00004004, 0x01004204,
181 0x01000004, 0x01004200, 0x01004000, 0x00004004,
183 0x20800080, 0x20820000, 0x00020080, 0x00000000,
184 0x20020000, 0x00800080, 0x20800000, 0x20820080,
185 0x00000080, 0x20000000, 0x00820000, 0x00020080,
186 0x00820080, 0x20020080, 0x20000080, 0x20800000,
187 0x00020000, 0x00820080, 0x00800080, 0x20020000,
188 0x20820080, 0x20000080, 0x00000000, 0x00820000,
189 0x20000000, 0x00800000, 0x20020080, 0x20800080,
190 0x00800000, 0x00020000, 0x20820000, 0x00000080,
191 0x00800000, 0x00020000, 0x20000080, 0x20820080,
192 0x00020080, 0x20000000, 0x00000000, 0x00820000,
193 0x20800080, 0x20020080, 0x20020000, 0x00800080,
194 0x20820000, 0x00000080, 0x00800080, 0x20020000,
195 0x20820080, 0x00800000, 0x20800000, 0x20000080,
196 0x00820000, 0x00020080, 0x20020080, 0x20800000,
197 0x00000080, 0x20820000, 0x00820080, 0x00000000,
198 0x20000000, 0x20800080, 0x00020000, 0x00820080,
201 __constant u32 c_skb[8][64] =
203 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
204 0x00000000, 0x00000010, 0x20000000, 0x20000010,
205 0x00010000, 0x00010010, 0x20010000, 0x20010010,
206 0x00000800, 0x00000810, 0x20000800, 0x20000810,
207 0x00010800, 0x00010810, 0x20010800, 0x20010810,
208 0x00000020, 0x00000030, 0x20000020, 0x20000030,
209 0x00010020, 0x00010030, 0x20010020, 0x20010030,
210 0x00000820, 0x00000830, 0x20000820, 0x20000830,
211 0x00010820, 0x00010830, 0x20010820, 0x20010830,
212 0x00080000, 0x00080010, 0x20080000, 0x20080010,
213 0x00090000, 0x00090010, 0x20090000, 0x20090010,
214 0x00080800, 0x00080810, 0x20080800, 0x20080810,
215 0x00090800, 0x00090810, 0x20090800, 0x20090810,
216 0x00080020, 0x00080030, 0x20080020, 0x20080030,
217 0x00090020, 0x00090030, 0x20090020, 0x20090030,
218 0x00080820, 0x00080830, 0x20080820, 0x20080830,
219 0x00090820, 0x00090830, 0x20090820, 0x20090830,
220 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
221 0x00000000, 0x02000000, 0x00002000, 0x02002000,
222 0x00200000, 0x02200000, 0x00202000, 0x02202000,
223 0x00000004, 0x02000004, 0x00002004, 0x02002004,
224 0x00200004, 0x02200004, 0x00202004, 0x02202004,
225 0x00000400, 0x02000400, 0x00002400, 0x02002400,
226 0x00200400, 0x02200400, 0x00202400, 0x02202400,
227 0x00000404, 0x02000404, 0x00002404, 0x02002404,
228 0x00200404, 0x02200404, 0x00202404, 0x02202404,
229 0x10000000, 0x12000000, 0x10002000, 0x12002000,
230 0x10200000, 0x12200000, 0x10202000, 0x12202000,
231 0x10000004, 0x12000004, 0x10002004, 0x12002004,
232 0x10200004, 0x12200004, 0x10202004, 0x12202004,
233 0x10000400, 0x12000400, 0x10002400, 0x12002400,
234 0x10200400, 0x12200400, 0x10202400, 0x12202400,
235 0x10000404, 0x12000404, 0x10002404, 0x12002404,
236 0x10200404, 0x12200404, 0x10202404, 0x12202404,
237 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
238 0x00000000, 0x00000001, 0x00040000, 0x00040001,
239 0x01000000, 0x01000001, 0x01040000, 0x01040001,
240 0x00000002, 0x00000003, 0x00040002, 0x00040003,
241 0x01000002, 0x01000003, 0x01040002, 0x01040003,
242 0x00000200, 0x00000201, 0x00040200, 0x00040201,
243 0x01000200, 0x01000201, 0x01040200, 0x01040201,
244 0x00000202, 0x00000203, 0x00040202, 0x00040203,
245 0x01000202, 0x01000203, 0x01040202, 0x01040203,
246 0x08000000, 0x08000001, 0x08040000, 0x08040001,
247 0x09000000, 0x09000001, 0x09040000, 0x09040001,
248 0x08000002, 0x08000003, 0x08040002, 0x08040003,
249 0x09000002, 0x09000003, 0x09040002, 0x09040003,
250 0x08000200, 0x08000201, 0x08040200, 0x08040201,
251 0x09000200, 0x09000201, 0x09040200, 0x09040201,
252 0x08000202, 0x08000203, 0x08040202, 0x08040203,
253 0x09000202, 0x09000203, 0x09040202, 0x09040203,
254 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
255 0x00000000, 0x00100000, 0x00000100, 0x00100100,
256 0x00000008, 0x00100008, 0x00000108, 0x00100108,
257 0x00001000, 0x00101000, 0x00001100, 0x00101100,
258 0x00001008, 0x00101008, 0x00001108, 0x00101108,
259 0x04000000, 0x04100000, 0x04000100, 0x04100100,
260 0x04000008, 0x04100008, 0x04000108, 0x04100108,
261 0x04001000, 0x04101000, 0x04001100, 0x04101100,
262 0x04001008, 0x04101008, 0x04001108, 0x04101108,
263 0x00020000, 0x00120000, 0x00020100, 0x00120100,
264 0x00020008, 0x00120008, 0x00020108, 0x00120108,
265 0x00021000, 0x00121000, 0x00021100, 0x00121100,
266 0x00021008, 0x00121008, 0x00021108, 0x00121108,
267 0x04020000, 0x04120000, 0x04020100, 0x04120100,
268 0x04020008, 0x04120008, 0x04020108, 0x04120108,
269 0x04021000, 0x04121000, 0x04021100, 0x04121100,
270 0x04021008, 0x04121008, 0x04021108, 0x04121108,
271 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
272 0x00000000, 0x10000000, 0x00010000, 0x10010000,
273 0x00000004, 0x10000004, 0x00010004, 0x10010004,
274 0x20000000, 0x30000000, 0x20010000, 0x30010000,
275 0x20000004, 0x30000004, 0x20010004, 0x30010004,
276 0x00100000, 0x10100000, 0x00110000, 0x10110000,
277 0x00100004, 0x10100004, 0x00110004, 0x10110004,
278 0x20100000, 0x30100000, 0x20110000, 0x30110000,
279 0x20100004, 0x30100004, 0x20110004, 0x30110004,
280 0x00001000, 0x10001000, 0x00011000, 0x10011000,
281 0x00001004, 0x10001004, 0x00011004, 0x10011004,
282 0x20001000, 0x30001000, 0x20011000, 0x30011000,
283 0x20001004, 0x30001004, 0x20011004, 0x30011004,
284 0x00101000, 0x10101000, 0x00111000, 0x10111000,
285 0x00101004, 0x10101004, 0x00111004, 0x10111004,
286 0x20101000, 0x30101000, 0x20111000, 0x30111000,
287 0x20101004, 0x30101004, 0x20111004, 0x30111004,
288 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
289 0x00000000, 0x08000000, 0x00000008, 0x08000008,
290 0x00000400, 0x08000400, 0x00000408, 0x08000408,
291 0x00020000, 0x08020000, 0x00020008, 0x08020008,
292 0x00020400, 0x08020400, 0x00020408, 0x08020408,
293 0x00000001, 0x08000001, 0x00000009, 0x08000009,
294 0x00000401, 0x08000401, 0x00000409, 0x08000409,
295 0x00020001, 0x08020001, 0x00020009, 0x08020009,
296 0x00020401, 0x08020401, 0x00020409, 0x08020409,
297 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
298 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
299 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
300 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
301 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
302 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
303 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
304 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
305 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
306 0x00000000, 0x00000100, 0x00080000, 0x00080100,
307 0x01000000, 0x01000100, 0x01080000, 0x01080100,
308 0x00000010, 0x00000110, 0x00080010, 0x00080110,
309 0x01000010, 0x01000110, 0x01080010, 0x01080110,
310 0x00200000, 0x00200100, 0x00280000, 0x00280100,
311 0x01200000, 0x01200100, 0x01280000, 0x01280100,
312 0x00200010, 0x00200110, 0x00280010, 0x00280110,
313 0x01200010, 0x01200110, 0x01280010, 0x01280110,
314 0x00000200, 0x00000300, 0x00080200, 0x00080300,
315 0x01000200, 0x01000300, 0x01080200, 0x01080300,
316 0x00000210, 0x00000310, 0x00080210, 0x00080310,
317 0x01000210, 0x01000310, 0x01080210, 0x01080310,
318 0x00200200, 0x00200300, 0x00280200, 0x00280300,
319 0x01200200, 0x01200300, 0x01280200, 0x01280300,
320 0x00200210, 0x00200310, 0x00280210, 0x00280310,
321 0x01200210, 0x01200310, 0x01280210, 0x01280310,
322 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
323 0x00000000, 0x04000000, 0x00040000, 0x04040000,
324 0x00000002, 0x04000002, 0x00040002, 0x04040002,
325 0x00002000, 0x04002000, 0x00042000, 0x04042000,
326 0x00002002, 0x04002002, 0x00042002, 0x04042002,
327 0x00000020, 0x04000020, 0x00040020, 0x04040020,
328 0x00000022, 0x04000022, 0x00040022, 0x04040022,
329 0x00002020, 0x04002020, 0x00042020, 0x04042020,
330 0x00002022, 0x04002022, 0x00042022, 0x04042022,
331 0x00000800, 0x04000800, 0x00040800, 0x04040800,
332 0x00000802, 0x04000802, 0x00040802, 0x04040802,
333 0x00002800, 0x04002800, 0x00042800, 0x04042800,
334 0x00002802, 0x04002802, 0x00042802, 0x04042802,
335 0x00000820, 0x04000820, 0x00040820, 0x04040820,
336 0x00000822, 0x04000822, 0x00040822, 0x04040822,
337 0x00002820, 0x04002820, 0x00042820, 0x04042820,
338 0x00002822, 0x04002822, 0x00042822, 0x04042822
341 __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
342 __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
345 #define BOX(i,n,S) u32 ((S)[(n)][(i)])
349 #define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
353 #define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
356 static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64])
369 for (u32 i = 0; i < 16; i += 2)
375 t = Kd[i + 0] ^ rotl32 (r, 28u);
378 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
379 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
380 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
381 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
382 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
383 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
384 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
385 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
388 t = Kd[i + 1] ^ rotl32 (l, 28u);
391 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
392 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
393 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
394 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
395 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
396 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
397 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
398 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
410 static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64])
414 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
415 HPERM_OP (c, tt, 2, 0xcccc0000);
416 HPERM_OP (d, tt, 2, 0xcccc0000);
417 PERM_OP (d, c, tt, 1, 0x55555555);
418 PERM_OP (c, d, tt, 8, 0x00ff00ff);
419 PERM_OP (d, c, tt, 1, 0x55555555);
421 d = ((d & 0x000000ff) << 16)
422 | ((d & 0x0000ff00) << 0)
423 | ((d & 0x00ff0000) >> 16)
424 | ((c & 0xf0000000) >> 4);
429 for (u32 i = 0; i < 16; i++)
431 c = c >> shifts3s0[i] | c << shifts3s1[i];
432 d = d >> shifts3s0[i] | d << shifts3s1[i];
437 u32 s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
438 | BOX ((((c >> 6) & 0x03)
439 | ((c >> 7) & 0x3c)), 1, s_skb)
440 | BOX ((((c >> 13) & 0x0f)
441 | ((c >> 14) & 0x30)), 2, s_skb)
442 | BOX ((((c >> 20) & 0x01)
444 | ((c >> 22) & 0x38)), 3, s_skb);
446 u32 t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
447 | BOX ((((d >> 7) & 0x03)
448 | ((d >> 8) & 0x3c)), 5, s_skb)
449 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
450 | BOX ((((d >> 21) & 0x0f)
451 | ((d >> 22) & 0x30)), 7, s_skb);
453 #if defined cl_amd_media_ops
454 Kc[i] = amd_bytealign (t, s << 16, 2);
455 Kd[i] = amd_bytealign (t >> 16, s, 2);
457 Kc[i] = ((t << 16) | (s & 0x0000ffff));
458 Kd[i] = ((s >> 16) | (t & 0xffff0000));
461 Kc[i] = rotl32 (Kc[i], 2u);
462 Kd[i] = rotl32 (Kd[i], 2u);
466 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
472 const u32 lid = get_local_id (0);
478 const u32 gid = get_global_id (0);
482 wordl0[0] = pws[gid].i[ 0];
483 wordl0[1] = pws[gid].i[ 1];
484 wordl0[2] = pws[gid].i[ 2];
485 wordl0[3] = pws[gid].i[ 3];
489 wordl1[0] = pws[gid].i[ 4];
490 wordl1[1] = pws[gid].i[ 5];
491 wordl1[2] = pws[gid].i[ 6];
492 wordl1[3] = pws[gid].i[ 7];
508 const u32 pw_l_len = pws[gid].pw_len;
510 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
512 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
519 __local u32 s_SPtrans[8][64];
521 __local u32 s_skb[8][64];
523 s_SPtrans[0][lid] = c_SPtrans[0][lid];
524 s_SPtrans[1][lid] = c_SPtrans[1][lid];
525 s_SPtrans[2][lid] = c_SPtrans[2][lid];
526 s_SPtrans[3][lid] = c_SPtrans[3][lid];
527 s_SPtrans[4][lid] = c_SPtrans[4][lid];
528 s_SPtrans[5][lid] = c_SPtrans[5][lid];
529 s_SPtrans[6][lid] = c_SPtrans[6][lid];
530 s_SPtrans[7][lid] = c_SPtrans[7][lid];
532 s_skb[0][lid] = c_skb[0][lid];
533 s_skb[1][lid] = c_skb[1][lid];
534 s_skb[2][lid] = c_skb[2][lid];
535 s_skb[3][lid] = c_skb[3][lid];
536 s_skb[4][lid] = c_skb[4][lid];
537 s_skb[5][lid] = c_skb[5][lid];
538 s_skb[6][lid] = c_skb[6][lid];
539 s_skb[7][lid] = c_skb[7][lid];
541 barrier (CLK_LOCAL_MEM_FENCE);
543 if (gid >= gid_max) return;
551 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
552 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
553 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
554 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
558 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
559 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
560 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
561 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
563 const u32 salt_len = salt_bufs[salt_pos].salt_len;
569 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
571 const u32 pw_r_len = combs_buf[il_pos].pw_len;
573 const u32 pw_len = pw_l_len + pw_r_len;
575 const u32 salt_word_len = (salt_len + pw_len) * 2;
579 wordr0[0] = combs_buf[il_pos].i[0];
580 wordr0[1] = combs_buf[il_pos].i[1];
581 wordr0[2] = combs_buf[il_pos].i[2];
582 wordr0[3] = combs_buf[il_pos].i[3];
586 wordr1[0] = combs_buf[il_pos].i[4];
587 wordr1[1] = combs_buf[il_pos].i[5];
588 wordr1[2] = combs_buf[il_pos].i[6];
589 wordr1[3] = combs_buf[il_pos].i[7];
605 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
607 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
612 w0[0] = wordl0[0] | wordr0[0];
613 w0[1] = wordl0[1] | wordr0[1];
614 w0[2] = wordl0[2] | wordr0[2];
615 w0[3] = wordl0[3] | wordr0[3];
619 w1[0] = wordl1[0] | wordr1[0];
620 w1[1] = wordl1[1] | wordr1[1];
621 w1[2] = wordl1[2] | wordr1[2];
622 w1[3] = wordl1[3] | wordr1[3];
626 w2[0] = wordl2[0] | wordr2[0];
627 w2[1] = wordl2[1] | wordr2[1];
628 w2[2] = wordl2[2] | wordr2[2];
629 w2[3] = wordl2[3] | wordr2[3];
633 w3[0] = wordl3[0] | wordr3[0];
634 w3[1] = wordl3[1] | wordr3[1];
635 w3[2] = wordl3[2] | wordr3[2];
636 w3[3] = wordl3[3] | wordr3[3];
664 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
666 w0_t[0] |= salt_buf0[0];
667 w0_t[1] |= salt_buf0[1];
668 w0_t[2] |= salt_buf0[2];
669 w0_t[3] |= salt_buf0[3];
670 w1_t[0] |= salt_buf1[0];
671 w1_t[1] |= salt_buf1[1];
672 w1_t[2] |= salt_buf1[2];
673 w1_t[3] |= salt_buf1[3];
695 * precompute key1 since key is static: 0x0123456789abcdef
696 * plus LEFT_ROTATE by 2
738 * key1 (generate key)
746 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
750 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
751 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
756 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
760 * key2 (generate hash)
763 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
768 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
772 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
773 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
778 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
785 const u32 r0 = iv[0];
786 const u32 r1 = iv[1];
794 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
798 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
802 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
808 const u32 lid = get_local_id (0);
814 const u32 gid = get_global_id (0);
818 wordl0[0] = pws[gid].i[ 0];
819 wordl0[1] = pws[gid].i[ 1];
820 wordl0[2] = pws[gid].i[ 2];
821 wordl0[3] = pws[gid].i[ 3];
825 wordl1[0] = pws[gid].i[ 4];
826 wordl1[1] = pws[gid].i[ 5];
827 wordl1[2] = pws[gid].i[ 6];
828 wordl1[3] = pws[gid].i[ 7];
844 const u32 pw_l_len = pws[gid].pw_len;
846 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
848 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
855 __local u32 s_SPtrans[8][64];
857 __local u32 s_skb[8][64];
859 s_SPtrans[0][lid] = c_SPtrans[0][lid];
860 s_SPtrans[1][lid] = c_SPtrans[1][lid];
861 s_SPtrans[2][lid] = c_SPtrans[2][lid];
862 s_SPtrans[3][lid] = c_SPtrans[3][lid];
863 s_SPtrans[4][lid] = c_SPtrans[4][lid];
864 s_SPtrans[5][lid] = c_SPtrans[5][lid];
865 s_SPtrans[6][lid] = c_SPtrans[6][lid];
866 s_SPtrans[7][lid] = c_SPtrans[7][lid];
868 s_skb[0][lid] = c_skb[0][lid];
869 s_skb[1][lid] = c_skb[1][lid];
870 s_skb[2][lid] = c_skb[2][lid];
871 s_skb[3][lid] = c_skb[3][lid];
872 s_skb[4][lid] = c_skb[4][lid];
873 s_skb[5][lid] = c_skb[5][lid];
874 s_skb[6][lid] = c_skb[6][lid];
875 s_skb[7][lid] = c_skb[7][lid];
877 barrier (CLK_LOCAL_MEM_FENCE);
879 if (gid >= gid_max) return;
887 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
888 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
889 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
890 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
894 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
895 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
896 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
897 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
899 const u32 salt_len = salt_bufs[salt_pos].salt_len;
905 const u32 search[4] =
907 digests_buf[digests_offset].digest_buf[DGST_R0],
908 digests_buf[digests_offset].digest_buf[DGST_R1],
909 digests_buf[digests_offset].digest_buf[DGST_R2],
910 digests_buf[digests_offset].digest_buf[DGST_R3]
917 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
919 const u32 pw_r_len = combs_buf[il_pos].pw_len;
921 const u32 pw_len = pw_l_len + pw_r_len;
923 const u32 salt_word_len = (salt_len + pw_len) * 2;
927 wordr0[0] = combs_buf[il_pos].i[0];
928 wordr0[1] = combs_buf[il_pos].i[1];
929 wordr0[2] = combs_buf[il_pos].i[2];
930 wordr0[3] = combs_buf[il_pos].i[3];
934 wordr1[0] = combs_buf[il_pos].i[4];
935 wordr1[1] = combs_buf[il_pos].i[5];
936 wordr1[2] = combs_buf[il_pos].i[6];
937 wordr1[3] = combs_buf[il_pos].i[7];
953 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
955 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
960 w0[0] = wordl0[0] | wordr0[0];
961 w0[1] = wordl0[1] | wordr0[1];
962 w0[2] = wordl0[2] | wordr0[2];
963 w0[3] = wordl0[3] | wordr0[3];
967 w1[0] = wordl1[0] | wordr1[0];
968 w1[1] = wordl1[1] | wordr1[1];
969 w1[2] = wordl1[2] | wordr1[2];
970 w1[3] = wordl1[3] | wordr1[3];
974 w2[0] = wordl2[0] | wordr2[0];
975 w2[1] = wordl2[1] | wordr2[1];
976 w2[2] = wordl2[2] | wordr2[2];
977 w2[3] = wordl2[3] | wordr2[3];
981 w3[0] = wordl3[0] | wordr3[0];
982 w3[1] = wordl3[1] | wordr3[1];
983 w3[2] = wordl3[2] | wordr3[2];
984 w3[3] = wordl3[3] | wordr3[3];
1012 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
1014 w0_t[0] |= salt_buf0[0];
1015 w0_t[1] |= salt_buf0[1];
1016 w0_t[2] |= salt_buf0[2];
1017 w0_t[3] |= salt_buf0[3];
1018 w1_t[0] |= salt_buf1[0];
1019 w1_t[1] |= salt_buf1[1];
1020 w1_t[2] |= salt_buf1[2];
1021 w1_t[3] |= salt_buf1[3];
1043 * precompute key1 since key is static: 0x0123456789abcdef
1044 * plus LEFT_ROTATE by 2
1049 Kc[ 0] = 0x64649040;
1050 Kc[ 1] = 0x14909858;
1051 Kc[ 2] = 0xc4b44888;
1052 Kc[ 3] = 0x9094e438;
1053 Kc[ 4] = 0xd8a004f0;
1054 Kc[ 5] = 0xa8f02810;
1055 Kc[ 6] = 0xc84048d8;
1056 Kc[ 7] = 0x68d804a8;
1057 Kc[ 8] = 0x0490e40c;
1058 Kc[ 9] = 0xac183024;
1059 Kc[10] = 0x24c07c10;
1060 Kc[11] = 0x8c88c038;
1061 Kc[12] = 0xc048c824;
1062 Kc[13] = 0x4c0470a8;
1063 Kc[14] = 0x584020b4;
1064 Kc[15] = 0x00742c4c;
1068 Kd[ 0] = 0xa42ce40c;
1069 Kd[ 1] = 0x64689858;
1070 Kd[ 2] = 0x484050b8;
1071 Kd[ 3] = 0xe8184814;
1072 Kd[ 4] = 0x405cc070;
1073 Kd[ 5] = 0xa010784c;
1074 Kd[ 6] = 0x6074a800;
1075 Kd[ 7] = 0x80701c1c;
1076 Kd[ 8] = 0x9cd49430;
1077 Kd[ 9] = 0x4c8ce078;
1078 Kd[10] = 0x5c18c088;
1079 Kd[11] = 0x28a8a4c8;
1080 Kd[12] = 0x3c180838;
1081 Kd[13] = 0xb0b86c20;
1082 Kd[14] = 0xac84a094;
1083 Kd[15] = 0x4ce0c0c4;
1086 * key1 (generate key)
1094 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1098 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1099 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1104 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1108 * key2 (generate hash)
1111 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1116 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1120 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1121 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1126 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1133 const u32 r0 = iv[0];
1134 const u32 r1 = iv[1];
1142 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1146 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)