2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
19 #include "include/rp_gpu.h"
22 #define COMPARE_S "check_single_comp4.c"
23 #define COMPARE_M "check_multi_comp4.c"
25 #define PERM_OP(a,b,tt,n,m) \
35 #define HPERM_OP(a,tt,n,m) \
41 tt = tt >> (16 + n); \
47 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
48 PERM_OP (l, r, tt, 16, 0x0000ffff); \
49 PERM_OP (r, l, tt, 2, 0x33333333); \
50 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
51 PERM_OP (r, l, tt, 1, 0x55555555); \
56 PERM_OP (l, r, tt, 1, 0x55555555); \
57 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
58 PERM_OP (l, r, tt, 2, 0x33333333); \
59 PERM_OP (r, l, tt, 16, 0x0000ffff); \
60 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
63 __constant u32 c_SPtrans[8][64] =
66 0x02080800, 0x00080000, 0x02000002, 0x02080802,
67 0x02000000, 0x00080802, 0x00080002, 0x02000002,
68 0x00080802, 0x02080800, 0x02080000, 0x00000802,
69 0x02000802, 0x02000000, 0x00000000, 0x00080002,
70 0x00080000, 0x00000002, 0x02000800, 0x00080800,
71 0x02080802, 0x02080000, 0x00000802, 0x02000800,
72 0x00000002, 0x00000800, 0x00080800, 0x02080002,
73 0x00000800, 0x02000802, 0x02080002, 0x00000000,
74 0x00000000, 0x02080802, 0x02000800, 0x00080002,
75 0x02080800, 0x00080000, 0x00000802, 0x02000800,
76 0x02080002, 0x00000800, 0x00080800, 0x02000002,
77 0x00080802, 0x00000002, 0x02000002, 0x02080000,
78 0x02080802, 0x00080800, 0x02080000, 0x02000802,
79 0x02000000, 0x00000802, 0x00080002, 0x00000000,
80 0x00080000, 0x02000000, 0x02000802, 0x02080800,
81 0x00000002, 0x02080002, 0x00000800, 0x00080802,
83 0x40108010, 0x00000000, 0x00108000, 0x40100000,
84 0x40000010, 0x00008010, 0x40008000, 0x00108000,
85 0x00008000, 0x40100010, 0x00000010, 0x40008000,
86 0x00100010, 0x40108000, 0x40100000, 0x00000010,
87 0x00100000, 0x40008010, 0x40100010, 0x00008000,
88 0x00108010, 0x40000000, 0x00000000, 0x00100010,
89 0x40008010, 0x00108010, 0x40108000, 0x40000010,
90 0x40000000, 0x00100000, 0x00008010, 0x40108010,
91 0x00100010, 0x40108000, 0x40008000, 0x00108010,
92 0x40108010, 0x00100010, 0x40000010, 0x00000000,
93 0x40000000, 0x00008010, 0x00100000, 0x40100010,
94 0x00008000, 0x40000000, 0x00108010, 0x40008010,
95 0x40108000, 0x00008000, 0x00000000, 0x40000010,
96 0x00000010, 0x40108010, 0x00108000, 0x40100000,
97 0x40100010, 0x00100000, 0x00008010, 0x40008000,
98 0x40008010, 0x00000010, 0x40100000, 0x00108000,
100 0x04000001, 0x04040100, 0x00000100, 0x04000101,
101 0x00040001, 0x04000000, 0x04000101, 0x00040100,
102 0x04000100, 0x00040000, 0x04040000, 0x00000001,
103 0x04040101, 0x00000101, 0x00000001, 0x04040001,
104 0x00000000, 0x00040001, 0x04040100, 0x00000100,
105 0x00000101, 0x04040101, 0x00040000, 0x04000001,
106 0x04040001, 0x04000100, 0x00040101, 0x04040000,
107 0x00040100, 0x00000000, 0x04000000, 0x00040101,
108 0x04040100, 0x00000100, 0x00000001, 0x00040000,
109 0x00000101, 0x00040001, 0x04040000, 0x04000101,
110 0x00000000, 0x04040100, 0x00040100, 0x04040001,
111 0x00040001, 0x04000000, 0x04040101, 0x00000001,
112 0x00040101, 0x04000001, 0x04000000, 0x04040101,
113 0x00040000, 0x04000100, 0x04000101, 0x00040100,
114 0x04000100, 0x00000000, 0x04040001, 0x00000101,
115 0x04000001, 0x00040101, 0x00000100, 0x04040000,
117 0x00401008, 0x10001000, 0x00000008, 0x10401008,
118 0x00000000, 0x10400000, 0x10001008, 0x00400008,
119 0x10401000, 0x10000008, 0x10000000, 0x00001008,
120 0x10000008, 0x00401008, 0x00400000, 0x10000000,
121 0x10400008, 0x00401000, 0x00001000, 0x00000008,
122 0x00401000, 0x10001008, 0x10400000, 0x00001000,
123 0x00001008, 0x00000000, 0x00400008, 0x10401000,
124 0x10001000, 0x10400008, 0x10401008, 0x00400000,
125 0x10400008, 0x00001008, 0x00400000, 0x10000008,
126 0x00401000, 0x10001000, 0x00000008, 0x10400000,
127 0x10001008, 0x00000000, 0x00001000, 0x00400008,
128 0x00000000, 0x10400008, 0x10401000, 0x00001000,
129 0x10000000, 0x10401008, 0x00401008, 0x00400000,
130 0x10401008, 0x00000008, 0x10001000, 0x00401008,
131 0x00400008, 0x00401000, 0x10400000, 0x10001008,
132 0x00001008, 0x10000000, 0x10000008, 0x10401000,
134 0x08000000, 0x00010000, 0x00000400, 0x08010420,
135 0x08010020, 0x08000400, 0x00010420, 0x08010000,
136 0x00010000, 0x00000020, 0x08000020, 0x00010400,
137 0x08000420, 0x08010020, 0x08010400, 0x00000000,
138 0x00010400, 0x08000000, 0x00010020, 0x00000420,
139 0x08000400, 0x00010420, 0x00000000, 0x08000020,
140 0x00000020, 0x08000420, 0x08010420, 0x00010020,
141 0x08010000, 0x00000400, 0x00000420, 0x08010400,
142 0x08010400, 0x08000420, 0x00010020, 0x08010000,
143 0x00010000, 0x00000020, 0x08000020, 0x08000400,
144 0x08000000, 0x00010400, 0x08010420, 0x00000000,
145 0x00010420, 0x08000000, 0x00000400, 0x00010020,
146 0x08000420, 0x00000400, 0x00000000, 0x08010420,
147 0x08010020, 0x08010400, 0x00000420, 0x00010000,
148 0x00010400, 0x08010020, 0x08000400, 0x00000420,
149 0x00000020, 0x00010420, 0x08010000, 0x08000020,
151 0x80000040, 0x00200040, 0x00000000, 0x80202000,
152 0x00200040, 0x00002000, 0x80002040, 0x00200000,
153 0x00002040, 0x80202040, 0x00202000, 0x80000000,
154 0x80002000, 0x80000040, 0x80200000, 0x00202040,
155 0x00200000, 0x80002040, 0x80200040, 0x00000000,
156 0x00002000, 0x00000040, 0x80202000, 0x80200040,
157 0x80202040, 0x80200000, 0x80000000, 0x00002040,
158 0x00000040, 0x00202000, 0x00202040, 0x80002000,
159 0x00002040, 0x80000000, 0x80002000, 0x00202040,
160 0x80202000, 0x00200040, 0x00000000, 0x80002000,
161 0x80000000, 0x00002000, 0x80200040, 0x00200000,
162 0x00200040, 0x80202040, 0x00202000, 0x00000040,
163 0x80202040, 0x00202000, 0x00200000, 0x80002040,
164 0x80000040, 0x80200000, 0x00202040, 0x00000000,
165 0x00002000, 0x80000040, 0x80002040, 0x80202000,
166 0x80200000, 0x00002040, 0x00000040, 0x80200040,
168 0x00004000, 0x00000200, 0x01000200, 0x01000004,
169 0x01004204, 0x00004004, 0x00004200, 0x00000000,
170 0x01000000, 0x01000204, 0x00000204, 0x01004000,
171 0x00000004, 0x01004200, 0x01004000, 0x00000204,
172 0x01000204, 0x00004000, 0x00004004, 0x01004204,
173 0x00000000, 0x01000200, 0x01000004, 0x00004200,
174 0x01004004, 0x00004204, 0x01004200, 0x00000004,
175 0x00004204, 0x01004004, 0x00000200, 0x01000000,
176 0x00004204, 0x01004000, 0x01004004, 0x00000204,
177 0x00004000, 0x00000200, 0x01000000, 0x01004004,
178 0x01000204, 0x00004204, 0x00004200, 0x00000000,
179 0x00000200, 0x01000004, 0x00000004, 0x01000200,
180 0x00000000, 0x01000204, 0x01000200, 0x00004200,
181 0x00000204, 0x00004000, 0x01004204, 0x01000000,
182 0x01004200, 0x00000004, 0x00004004, 0x01004204,
183 0x01000004, 0x01004200, 0x01004000, 0x00004004,
185 0x20800080, 0x20820000, 0x00020080, 0x00000000,
186 0x20020000, 0x00800080, 0x20800000, 0x20820080,
187 0x00000080, 0x20000000, 0x00820000, 0x00020080,
188 0x00820080, 0x20020080, 0x20000080, 0x20800000,
189 0x00020000, 0x00820080, 0x00800080, 0x20020000,
190 0x20820080, 0x20000080, 0x00000000, 0x00820000,
191 0x20000000, 0x00800000, 0x20020080, 0x20800080,
192 0x00800000, 0x00020000, 0x20820000, 0x00000080,
193 0x00800000, 0x00020000, 0x20000080, 0x20820080,
194 0x00020080, 0x20000000, 0x00000000, 0x00820000,
195 0x20800080, 0x20020080, 0x20020000, 0x00800080,
196 0x20820000, 0x00000080, 0x00800080, 0x20020000,
197 0x20820080, 0x00800000, 0x20800000, 0x20000080,
198 0x00820000, 0x00020080, 0x20020080, 0x20800000,
199 0x00000080, 0x20820000, 0x00820080, 0x00000000,
200 0x20000000, 0x20800080, 0x00020000, 0x00820080,
203 __constant u32 c_skb[8][64] =
205 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
206 0x00000000, 0x00000010, 0x20000000, 0x20000010,
207 0x00010000, 0x00010010, 0x20010000, 0x20010010,
208 0x00000800, 0x00000810, 0x20000800, 0x20000810,
209 0x00010800, 0x00010810, 0x20010800, 0x20010810,
210 0x00000020, 0x00000030, 0x20000020, 0x20000030,
211 0x00010020, 0x00010030, 0x20010020, 0x20010030,
212 0x00000820, 0x00000830, 0x20000820, 0x20000830,
213 0x00010820, 0x00010830, 0x20010820, 0x20010830,
214 0x00080000, 0x00080010, 0x20080000, 0x20080010,
215 0x00090000, 0x00090010, 0x20090000, 0x20090010,
216 0x00080800, 0x00080810, 0x20080800, 0x20080810,
217 0x00090800, 0x00090810, 0x20090800, 0x20090810,
218 0x00080020, 0x00080030, 0x20080020, 0x20080030,
219 0x00090020, 0x00090030, 0x20090020, 0x20090030,
220 0x00080820, 0x00080830, 0x20080820, 0x20080830,
221 0x00090820, 0x00090830, 0x20090820, 0x20090830,
222 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
223 0x00000000, 0x02000000, 0x00002000, 0x02002000,
224 0x00200000, 0x02200000, 0x00202000, 0x02202000,
225 0x00000004, 0x02000004, 0x00002004, 0x02002004,
226 0x00200004, 0x02200004, 0x00202004, 0x02202004,
227 0x00000400, 0x02000400, 0x00002400, 0x02002400,
228 0x00200400, 0x02200400, 0x00202400, 0x02202400,
229 0x00000404, 0x02000404, 0x00002404, 0x02002404,
230 0x00200404, 0x02200404, 0x00202404, 0x02202404,
231 0x10000000, 0x12000000, 0x10002000, 0x12002000,
232 0x10200000, 0x12200000, 0x10202000, 0x12202000,
233 0x10000004, 0x12000004, 0x10002004, 0x12002004,
234 0x10200004, 0x12200004, 0x10202004, 0x12202004,
235 0x10000400, 0x12000400, 0x10002400, 0x12002400,
236 0x10200400, 0x12200400, 0x10202400, 0x12202400,
237 0x10000404, 0x12000404, 0x10002404, 0x12002404,
238 0x10200404, 0x12200404, 0x10202404, 0x12202404,
239 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
240 0x00000000, 0x00000001, 0x00040000, 0x00040001,
241 0x01000000, 0x01000001, 0x01040000, 0x01040001,
242 0x00000002, 0x00000003, 0x00040002, 0x00040003,
243 0x01000002, 0x01000003, 0x01040002, 0x01040003,
244 0x00000200, 0x00000201, 0x00040200, 0x00040201,
245 0x01000200, 0x01000201, 0x01040200, 0x01040201,
246 0x00000202, 0x00000203, 0x00040202, 0x00040203,
247 0x01000202, 0x01000203, 0x01040202, 0x01040203,
248 0x08000000, 0x08000001, 0x08040000, 0x08040001,
249 0x09000000, 0x09000001, 0x09040000, 0x09040001,
250 0x08000002, 0x08000003, 0x08040002, 0x08040003,
251 0x09000002, 0x09000003, 0x09040002, 0x09040003,
252 0x08000200, 0x08000201, 0x08040200, 0x08040201,
253 0x09000200, 0x09000201, 0x09040200, 0x09040201,
254 0x08000202, 0x08000203, 0x08040202, 0x08040203,
255 0x09000202, 0x09000203, 0x09040202, 0x09040203,
256 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
257 0x00000000, 0x00100000, 0x00000100, 0x00100100,
258 0x00000008, 0x00100008, 0x00000108, 0x00100108,
259 0x00001000, 0x00101000, 0x00001100, 0x00101100,
260 0x00001008, 0x00101008, 0x00001108, 0x00101108,
261 0x04000000, 0x04100000, 0x04000100, 0x04100100,
262 0x04000008, 0x04100008, 0x04000108, 0x04100108,
263 0x04001000, 0x04101000, 0x04001100, 0x04101100,
264 0x04001008, 0x04101008, 0x04001108, 0x04101108,
265 0x00020000, 0x00120000, 0x00020100, 0x00120100,
266 0x00020008, 0x00120008, 0x00020108, 0x00120108,
267 0x00021000, 0x00121000, 0x00021100, 0x00121100,
268 0x00021008, 0x00121008, 0x00021108, 0x00121108,
269 0x04020000, 0x04120000, 0x04020100, 0x04120100,
270 0x04020008, 0x04120008, 0x04020108, 0x04120108,
271 0x04021000, 0x04121000, 0x04021100, 0x04121100,
272 0x04021008, 0x04121008, 0x04021108, 0x04121108,
273 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
274 0x00000000, 0x10000000, 0x00010000, 0x10010000,
275 0x00000004, 0x10000004, 0x00010004, 0x10010004,
276 0x20000000, 0x30000000, 0x20010000, 0x30010000,
277 0x20000004, 0x30000004, 0x20010004, 0x30010004,
278 0x00100000, 0x10100000, 0x00110000, 0x10110000,
279 0x00100004, 0x10100004, 0x00110004, 0x10110004,
280 0x20100000, 0x30100000, 0x20110000, 0x30110000,
281 0x20100004, 0x30100004, 0x20110004, 0x30110004,
282 0x00001000, 0x10001000, 0x00011000, 0x10011000,
283 0x00001004, 0x10001004, 0x00011004, 0x10011004,
284 0x20001000, 0x30001000, 0x20011000, 0x30011000,
285 0x20001004, 0x30001004, 0x20011004, 0x30011004,
286 0x00101000, 0x10101000, 0x00111000, 0x10111000,
287 0x00101004, 0x10101004, 0x00111004, 0x10111004,
288 0x20101000, 0x30101000, 0x20111000, 0x30111000,
289 0x20101004, 0x30101004, 0x20111004, 0x30111004,
290 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
291 0x00000000, 0x08000000, 0x00000008, 0x08000008,
292 0x00000400, 0x08000400, 0x00000408, 0x08000408,
293 0x00020000, 0x08020000, 0x00020008, 0x08020008,
294 0x00020400, 0x08020400, 0x00020408, 0x08020408,
295 0x00000001, 0x08000001, 0x00000009, 0x08000009,
296 0x00000401, 0x08000401, 0x00000409, 0x08000409,
297 0x00020001, 0x08020001, 0x00020009, 0x08020009,
298 0x00020401, 0x08020401, 0x00020409, 0x08020409,
299 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
300 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
301 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
302 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
303 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
304 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
305 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
306 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
307 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
308 0x00000000, 0x00000100, 0x00080000, 0x00080100,
309 0x01000000, 0x01000100, 0x01080000, 0x01080100,
310 0x00000010, 0x00000110, 0x00080010, 0x00080110,
311 0x01000010, 0x01000110, 0x01080010, 0x01080110,
312 0x00200000, 0x00200100, 0x00280000, 0x00280100,
313 0x01200000, 0x01200100, 0x01280000, 0x01280100,
314 0x00200010, 0x00200110, 0x00280010, 0x00280110,
315 0x01200010, 0x01200110, 0x01280010, 0x01280110,
316 0x00000200, 0x00000300, 0x00080200, 0x00080300,
317 0x01000200, 0x01000300, 0x01080200, 0x01080300,
318 0x00000210, 0x00000310, 0x00080210, 0x00080310,
319 0x01000210, 0x01000310, 0x01080210, 0x01080310,
320 0x00200200, 0x00200300, 0x00280200, 0x00280300,
321 0x01200200, 0x01200300, 0x01280200, 0x01280300,
322 0x00200210, 0x00200310, 0x00280210, 0x00280310,
323 0x01200210, 0x01200310, 0x01280210, 0x01280310,
324 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
325 0x00000000, 0x04000000, 0x00040000, 0x04040000,
326 0x00000002, 0x04000002, 0x00040002, 0x04040002,
327 0x00002000, 0x04002000, 0x00042000, 0x04042000,
328 0x00002002, 0x04002002, 0x00042002, 0x04042002,
329 0x00000020, 0x04000020, 0x00040020, 0x04040020,
330 0x00000022, 0x04000022, 0x00040022, 0x04040022,
331 0x00002020, 0x04002020, 0x00042020, 0x04042020,
332 0x00002022, 0x04002022, 0x00042022, 0x04042022,
333 0x00000800, 0x04000800, 0x00040800, 0x04040800,
334 0x00000802, 0x04000802, 0x00040802, 0x04040802,
335 0x00002800, 0x04002800, 0x00042800, 0x04042800,
336 0x00002802, 0x04002802, 0x00042802, 0x04042802,
337 0x00000820, 0x04000820, 0x00040820, 0x04040820,
338 0x00000822, 0x04000822, 0x00040822, 0x04040822,
339 0x00002820, 0x04002820, 0x00042820, 0x04042820,
340 0x00002822, 0x04002822, 0x00042822, 0x04042822
343 __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
344 __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
347 #define BOX(i,n,S) u32 ((S)[(n)][(i)])
351 #define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
355 #define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
358 static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64])
371 for (u32 i = 0; i < 16; i += 2)
377 t = Kd[i + 0] ^ rotl32 (r, 28u);
380 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
381 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
382 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
383 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
384 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
385 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
386 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
387 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
390 t = Kd[i + 1] ^ rotl32 (l, 28u);
393 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
394 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
395 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
396 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
397 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
398 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
399 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
400 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
412 static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64])
416 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
417 HPERM_OP (c, tt, 2, 0xcccc0000);
418 HPERM_OP (d, tt, 2, 0xcccc0000);
419 PERM_OP (d, c, tt, 1, 0x55555555);
420 PERM_OP (c, d, tt, 8, 0x00ff00ff);
421 PERM_OP (d, c, tt, 1, 0x55555555);
423 d = ((d & 0x000000ff) << 16)
424 | ((d & 0x0000ff00) << 0)
425 | ((d & 0x00ff0000) >> 16)
426 | ((c & 0xf0000000) >> 4);
431 for (u32 i = 0; i < 16; i++)
433 c = c >> shifts3s0[i] | c << shifts3s1[i];
434 d = d >> shifts3s0[i] | d << shifts3s1[i];
439 u32 s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
440 | BOX ((((c >> 6) & 0x03)
441 | ((c >> 7) & 0x3c)), 1, s_skb)
442 | BOX ((((c >> 13) & 0x0f)
443 | ((c >> 14) & 0x30)), 2, s_skb)
444 | BOX ((((c >> 20) & 0x01)
446 | ((c >> 22) & 0x38)), 3, s_skb);
448 u32 t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
449 | BOX ((((d >> 7) & 0x03)
450 | ((d >> 8) & 0x3c)), 5, s_skb)
451 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
452 | BOX ((((d >> 21) & 0x0f)
453 | ((d >> 22) & 0x30)), 7, s_skb);
455 #if defined cl_amd_media_ops
456 Kc[i] = amd_bytealign (t, s << 16, 2);
457 Kd[i] = amd_bytealign (t >> 16, s, 2);
459 Kc[i] = ((t << 16) | (s & 0x0000ffff));
460 Kd[i] = ((s >> 16) | (t & 0xffff0000));
463 Kc[i] = rotl32 (Kc[i], 2u);
464 Kd[i] = rotl32 (Kd[i], 2u);
468 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
474 const u32 lid = get_local_id (0);
480 const u32 gid = get_global_id (0);
484 pw_buf0[0] = pws[gid].i[ 0];
485 pw_buf0[1] = pws[gid].i[ 1];
486 pw_buf0[2] = pws[gid].i[ 2];
487 pw_buf0[3] = pws[gid].i[ 3];
491 pw_buf1[0] = pws[gid].i[ 4];
492 pw_buf1[1] = pws[gid].i[ 5];
493 pw_buf1[2] = pws[gid].i[ 6];
494 pw_buf1[3] = pws[gid].i[ 7];
496 const u32 pw_len = pws[gid].pw_len;
502 __local u32 s_SPtrans[8][64];
503 __local u32 s_skb[8][64];
505 s_SPtrans[0][lid] = c_SPtrans[0][lid];
506 s_SPtrans[1][lid] = c_SPtrans[1][lid];
507 s_SPtrans[2][lid] = c_SPtrans[2][lid];
508 s_SPtrans[3][lid] = c_SPtrans[3][lid];
509 s_SPtrans[4][lid] = c_SPtrans[4][lid];
510 s_SPtrans[5][lid] = c_SPtrans[5][lid];
511 s_SPtrans[6][lid] = c_SPtrans[6][lid];
512 s_SPtrans[7][lid] = c_SPtrans[7][lid];
514 s_skb[0][lid] = c_skb[0][lid];
515 s_skb[1][lid] = c_skb[1][lid];
516 s_skb[2][lid] = c_skb[2][lid];
517 s_skb[3][lid] = c_skb[3][lid];
518 s_skb[4][lid] = c_skb[4][lid];
519 s_skb[5][lid] = c_skb[5][lid];
520 s_skb[6][lid] = c_skb[6][lid];
521 s_skb[7][lid] = c_skb[7][lid];
523 barrier (CLK_LOCAL_MEM_FENCE);
525 if (gid >= gid_max) return;
533 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
534 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
535 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
536 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
540 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
541 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
542 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
543 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
545 const u32 salt_len = salt_bufs[salt_pos].salt_len;
551 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
581 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
583 const u32 salt_word_len = (salt_len + out_len) * 2;
617 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
619 w0_t[0] |= salt_buf0[0];
620 w0_t[1] |= salt_buf0[1];
621 w0_t[2] |= salt_buf0[2];
622 w0_t[3] |= salt_buf0[3];
623 w1_t[0] |= salt_buf1[0];
624 w1_t[1] |= salt_buf1[1];
625 w1_t[2] |= salt_buf1[2];
626 w1_t[3] |= salt_buf1[3];
648 * precompute key1 since key is static: 0x0123456789abcdef
649 * plus LEFT_ROTATE by 2
691 * key1 (generate key)
699 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
703 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
704 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
709 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
713 * key2 (generate hash)
716 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
721 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
725 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
726 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
731 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
738 const u32 r0 = iv[0];
739 const u32 r1 = iv[1];
747 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
751 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
755 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
761 const u32 lid = get_local_id (0);
767 const u32 gid = get_global_id (0);
771 pw_buf0[0] = pws[gid].i[ 0];
772 pw_buf0[1] = pws[gid].i[ 1];
773 pw_buf0[2] = pws[gid].i[ 2];
774 pw_buf0[3] = pws[gid].i[ 3];
778 pw_buf1[0] = pws[gid].i[ 4];
779 pw_buf1[1] = pws[gid].i[ 5];
780 pw_buf1[2] = pws[gid].i[ 6];
781 pw_buf1[3] = pws[gid].i[ 7];
783 const u32 pw_len = pws[gid].pw_len;
789 __local u32 s_SPtrans[8][64];
790 __local u32 s_skb[8][64];
792 s_SPtrans[0][lid] = c_SPtrans[0][lid];
793 s_SPtrans[1][lid] = c_SPtrans[1][lid];
794 s_SPtrans[2][lid] = c_SPtrans[2][lid];
795 s_SPtrans[3][lid] = c_SPtrans[3][lid];
796 s_SPtrans[4][lid] = c_SPtrans[4][lid];
797 s_SPtrans[5][lid] = c_SPtrans[5][lid];
798 s_SPtrans[6][lid] = c_SPtrans[6][lid];
799 s_SPtrans[7][lid] = c_SPtrans[7][lid];
801 s_skb[0][lid] = c_skb[0][lid];
802 s_skb[1][lid] = c_skb[1][lid];
803 s_skb[2][lid] = c_skb[2][lid];
804 s_skb[3][lid] = c_skb[3][lid];
805 s_skb[4][lid] = c_skb[4][lid];
806 s_skb[5][lid] = c_skb[5][lid];
807 s_skb[6][lid] = c_skb[6][lid];
808 s_skb[7][lid] = c_skb[7][lid];
810 barrier (CLK_LOCAL_MEM_FENCE);
812 if (gid >= gid_max) return;
820 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
821 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
822 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
823 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
827 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
828 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
829 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
830 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
832 const u32 salt_len = salt_bufs[salt_pos].salt_len;
838 const u32 search[4] =
840 digests_buf[digests_offset].digest_buf[DGST_R0],
841 digests_buf[digests_offset].digest_buf[DGST_R1],
842 digests_buf[digests_offset].digest_buf[DGST_R2],
843 digests_buf[digests_offset].digest_buf[DGST_R3]
850 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
880 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
882 const u32 salt_word_len = (salt_len + out_len) * 2;
916 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
918 w0_t[0] |= salt_buf0[0];
919 w0_t[1] |= salt_buf0[1];
920 w0_t[2] |= salt_buf0[2];
921 w0_t[3] |= salt_buf0[3];
922 w1_t[0] |= salt_buf1[0];
923 w1_t[1] |= salt_buf1[1];
924 w1_t[2] |= salt_buf1[2];
925 w1_t[3] |= salt_buf1[3];
947 * precompute key1 since key is static: 0x0123456789abcdef
948 * plus LEFT_ROTATE by 2
990 * key1 (generate key)
998 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1002 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1003 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1008 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1012 * key2 (generate hash)
1015 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1020 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1024 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1025 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1030 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1037 const u32 r0 = iv[0];
1038 const u32 r1 = iv[1];
1046 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1050 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)