2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
19 #include "include/rp_gpu.h"
22 #define COMPARE_S "check_single_comp4.c"
23 #define COMPARE_M "check_multi_comp4.c"
25 #define PERM_OP(a,b,tt,n,m) \
35 #define HPERM_OP(a,tt,n,m) \
41 tt = tt >> (16 + n); \
47 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
48 PERM_OP (l, r, tt, 16, 0x0000ffff); \
49 PERM_OP (r, l, tt, 2, 0x33333333); \
50 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
51 PERM_OP (r, l, tt, 1, 0x55555555); \
56 PERM_OP (l, r, tt, 1, 0x55555555); \
57 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
58 PERM_OP (l, r, tt, 2, 0x33333333); \
59 PERM_OP (r, l, tt, 16, 0x0000ffff); \
60 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
63 __constant u32 c_SPtrans[8][64] =
66 0x02080800, 0x00080000, 0x02000002, 0x02080802,
67 0x02000000, 0x00080802, 0x00080002, 0x02000002,
68 0x00080802, 0x02080800, 0x02080000, 0x00000802,
69 0x02000802, 0x02000000, 0x00000000, 0x00080002,
70 0x00080000, 0x00000002, 0x02000800, 0x00080800,
71 0x02080802, 0x02080000, 0x00000802, 0x02000800,
72 0x00000002, 0x00000800, 0x00080800, 0x02080002,
73 0x00000800, 0x02000802, 0x02080002, 0x00000000,
74 0x00000000, 0x02080802, 0x02000800, 0x00080002,
75 0x02080800, 0x00080000, 0x00000802, 0x02000800,
76 0x02080002, 0x00000800, 0x00080800, 0x02000002,
77 0x00080802, 0x00000002, 0x02000002, 0x02080000,
78 0x02080802, 0x00080800, 0x02080000, 0x02000802,
79 0x02000000, 0x00000802, 0x00080002, 0x00000000,
80 0x00080000, 0x02000000, 0x02000802, 0x02080800,
81 0x00000002, 0x02080002, 0x00000800, 0x00080802,
83 0x40108010, 0x00000000, 0x00108000, 0x40100000,
84 0x40000010, 0x00008010, 0x40008000, 0x00108000,
85 0x00008000, 0x40100010, 0x00000010, 0x40008000,
86 0x00100010, 0x40108000, 0x40100000, 0x00000010,
87 0x00100000, 0x40008010, 0x40100010, 0x00008000,
88 0x00108010, 0x40000000, 0x00000000, 0x00100010,
89 0x40008010, 0x00108010, 0x40108000, 0x40000010,
90 0x40000000, 0x00100000, 0x00008010, 0x40108010,
91 0x00100010, 0x40108000, 0x40008000, 0x00108010,
92 0x40108010, 0x00100010, 0x40000010, 0x00000000,
93 0x40000000, 0x00008010, 0x00100000, 0x40100010,
94 0x00008000, 0x40000000, 0x00108010, 0x40008010,
95 0x40108000, 0x00008000, 0x00000000, 0x40000010,
96 0x00000010, 0x40108010, 0x00108000, 0x40100000,
97 0x40100010, 0x00100000, 0x00008010, 0x40008000,
98 0x40008010, 0x00000010, 0x40100000, 0x00108000,
100 0x04000001, 0x04040100, 0x00000100, 0x04000101,
101 0x00040001, 0x04000000, 0x04000101, 0x00040100,
102 0x04000100, 0x00040000, 0x04040000, 0x00000001,
103 0x04040101, 0x00000101, 0x00000001, 0x04040001,
104 0x00000000, 0x00040001, 0x04040100, 0x00000100,
105 0x00000101, 0x04040101, 0x00040000, 0x04000001,
106 0x04040001, 0x04000100, 0x00040101, 0x04040000,
107 0x00040100, 0x00000000, 0x04000000, 0x00040101,
108 0x04040100, 0x00000100, 0x00000001, 0x00040000,
109 0x00000101, 0x00040001, 0x04040000, 0x04000101,
110 0x00000000, 0x04040100, 0x00040100, 0x04040001,
111 0x00040001, 0x04000000, 0x04040101, 0x00000001,
112 0x00040101, 0x04000001, 0x04000000, 0x04040101,
113 0x00040000, 0x04000100, 0x04000101, 0x00040100,
114 0x04000100, 0x00000000, 0x04040001, 0x00000101,
115 0x04000001, 0x00040101, 0x00000100, 0x04040000,
117 0x00401008, 0x10001000, 0x00000008, 0x10401008,
118 0x00000000, 0x10400000, 0x10001008, 0x00400008,
119 0x10401000, 0x10000008, 0x10000000, 0x00001008,
120 0x10000008, 0x00401008, 0x00400000, 0x10000000,
121 0x10400008, 0x00401000, 0x00001000, 0x00000008,
122 0x00401000, 0x10001008, 0x10400000, 0x00001000,
123 0x00001008, 0x00000000, 0x00400008, 0x10401000,
124 0x10001000, 0x10400008, 0x10401008, 0x00400000,
125 0x10400008, 0x00001008, 0x00400000, 0x10000008,
126 0x00401000, 0x10001000, 0x00000008, 0x10400000,
127 0x10001008, 0x00000000, 0x00001000, 0x00400008,
128 0x00000000, 0x10400008, 0x10401000, 0x00001000,
129 0x10000000, 0x10401008, 0x00401008, 0x00400000,
130 0x10401008, 0x00000008, 0x10001000, 0x00401008,
131 0x00400008, 0x00401000, 0x10400000, 0x10001008,
132 0x00001008, 0x10000000, 0x10000008, 0x10401000,
134 0x08000000, 0x00010000, 0x00000400, 0x08010420,
135 0x08010020, 0x08000400, 0x00010420, 0x08010000,
136 0x00010000, 0x00000020, 0x08000020, 0x00010400,
137 0x08000420, 0x08010020, 0x08010400, 0x00000000,
138 0x00010400, 0x08000000, 0x00010020, 0x00000420,
139 0x08000400, 0x00010420, 0x00000000, 0x08000020,
140 0x00000020, 0x08000420, 0x08010420, 0x00010020,
141 0x08010000, 0x00000400, 0x00000420, 0x08010400,
142 0x08010400, 0x08000420, 0x00010020, 0x08010000,
143 0x00010000, 0x00000020, 0x08000020, 0x08000400,
144 0x08000000, 0x00010400, 0x08010420, 0x00000000,
145 0x00010420, 0x08000000, 0x00000400, 0x00010020,
146 0x08000420, 0x00000400, 0x00000000, 0x08010420,
147 0x08010020, 0x08010400, 0x00000420, 0x00010000,
148 0x00010400, 0x08010020, 0x08000400, 0x00000420,
149 0x00000020, 0x00010420, 0x08010000, 0x08000020,
151 0x80000040, 0x00200040, 0x00000000, 0x80202000,
152 0x00200040, 0x00002000, 0x80002040, 0x00200000,
153 0x00002040, 0x80202040, 0x00202000, 0x80000000,
154 0x80002000, 0x80000040, 0x80200000, 0x00202040,
155 0x00200000, 0x80002040, 0x80200040, 0x00000000,
156 0x00002000, 0x00000040, 0x80202000, 0x80200040,
157 0x80202040, 0x80200000, 0x80000000, 0x00002040,
158 0x00000040, 0x00202000, 0x00202040, 0x80002000,
159 0x00002040, 0x80000000, 0x80002000, 0x00202040,
160 0x80202000, 0x00200040, 0x00000000, 0x80002000,
161 0x80000000, 0x00002000, 0x80200040, 0x00200000,
162 0x00200040, 0x80202040, 0x00202000, 0x00000040,
163 0x80202040, 0x00202000, 0x00200000, 0x80002040,
164 0x80000040, 0x80200000, 0x00202040, 0x00000000,
165 0x00002000, 0x80000040, 0x80002040, 0x80202000,
166 0x80200000, 0x00002040, 0x00000040, 0x80200040,
168 0x00004000, 0x00000200, 0x01000200, 0x01000004,
169 0x01004204, 0x00004004, 0x00004200, 0x00000000,
170 0x01000000, 0x01000204, 0x00000204, 0x01004000,
171 0x00000004, 0x01004200, 0x01004000, 0x00000204,
172 0x01000204, 0x00004000, 0x00004004, 0x01004204,
173 0x00000000, 0x01000200, 0x01000004, 0x00004200,
174 0x01004004, 0x00004204, 0x01004200, 0x00000004,
175 0x00004204, 0x01004004, 0x00000200, 0x01000000,
176 0x00004204, 0x01004000, 0x01004004, 0x00000204,
177 0x00004000, 0x00000200, 0x01000000, 0x01004004,
178 0x01000204, 0x00004204, 0x00004200, 0x00000000,
179 0x00000200, 0x01000004, 0x00000004, 0x01000200,
180 0x00000000, 0x01000204, 0x01000200, 0x00004200,
181 0x00000204, 0x00004000, 0x01004204, 0x01000000,
182 0x01004200, 0x00000004, 0x00004004, 0x01004204,
183 0x01000004, 0x01004200, 0x01004000, 0x00004004,
185 0x20800080, 0x20820000, 0x00020080, 0x00000000,
186 0x20020000, 0x00800080, 0x20800000, 0x20820080,
187 0x00000080, 0x20000000, 0x00820000, 0x00020080,
188 0x00820080, 0x20020080, 0x20000080, 0x20800000,
189 0x00020000, 0x00820080, 0x00800080, 0x20020000,
190 0x20820080, 0x20000080, 0x00000000, 0x00820000,
191 0x20000000, 0x00800000, 0x20020080, 0x20800080,
192 0x00800000, 0x00020000, 0x20820000, 0x00000080,
193 0x00800000, 0x00020000, 0x20000080, 0x20820080,
194 0x00020080, 0x20000000, 0x00000000, 0x00820000,
195 0x20800080, 0x20020080, 0x20020000, 0x00800080,
196 0x20820000, 0x00000080, 0x00800080, 0x20020000,
197 0x20820080, 0x00800000, 0x20800000, 0x20000080,
198 0x00820000, 0x00020080, 0x20020080, 0x20800000,
199 0x00000080, 0x20820000, 0x00820080, 0x00000000,
200 0x20000000, 0x20800080, 0x00020000, 0x00820080,
203 __constant u32 c_skb[8][64] =
205 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
206 0x00000000, 0x00000010, 0x20000000, 0x20000010,
207 0x00010000, 0x00010010, 0x20010000, 0x20010010,
208 0x00000800, 0x00000810, 0x20000800, 0x20000810,
209 0x00010800, 0x00010810, 0x20010800, 0x20010810,
210 0x00000020, 0x00000030, 0x20000020, 0x20000030,
211 0x00010020, 0x00010030, 0x20010020, 0x20010030,
212 0x00000820, 0x00000830, 0x20000820, 0x20000830,
213 0x00010820, 0x00010830, 0x20010820, 0x20010830,
214 0x00080000, 0x00080010, 0x20080000, 0x20080010,
215 0x00090000, 0x00090010, 0x20090000, 0x20090010,
216 0x00080800, 0x00080810, 0x20080800, 0x20080810,
217 0x00090800, 0x00090810, 0x20090800, 0x20090810,
218 0x00080020, 0x00080030, 0x20080020, 0x20080030,
219 0x00090020, 0x00090030, 0x20090020, 0x20090030,
220 0x00080820, 0x00080830, 0x20080820, 0x20080830,
221 0x00090820, 0x00090830, 0x20090820, 0x20090830,
222 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
223 0x00000000, 0x02000000, 0x00002000, 0x02002000,
224 0x00200000, 0x02200000, 0x00202000, 0x02202000,
225 0x00000004, 0x02000004, 0x00002004, 0x02002004,
226 0x00200004, 0x02200004, 0x00202004, 0x02202004,
227 0x00000400, 0x02000400, 0x00002400, 0x02002400,
228 0x00200400, 0x02200400, 0x00202400, 0x02202400,
229 0x00000404, 0x02000404, 0x00002404, 0x02002404,
230 0x00200404, 0x02200404, 0x00202404, 0x02202404,
231 0x10000000, 0x12000000, 0x10002000, 0x12002000,
232 0x10200000, 0x12200000, 0x10202000, 0x12202000,
233 0x10000004, 0x12000004, 0x10002004, 0x12002004,
234 0x10200004, 0x12200004, 0x10202004, 0x12202004,
235 0x10000400, 0x12000400, 0x10002400, 0x12002400,
236 0x10200400, 0x12200400, 0x10202400, 0x12202400,
237 0x10000404, 0x12000404, 0x10002404, 0x12002404,
238 0x10200404, 0x12200404, 0x10202404, 0x12202404,
239 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
240 0x00000000, 0x00000001, 0x00040000, 0x00040001,
241 0x01000000, 0x01000001, 0x01040000, 0x01040001,
242 0x00000002, 0x00000003, 0x00040002, 0x00040003,
243 0x01000002, 0x01000003, 0x01040002, 0x01040003,
244 0x00000200, 0x00000201, 0x00040200, 0x00040201,
245 0x01000200, 0x01000201, 0x01040200, 0x01040201,
246 0x00000202, 0x00000203, 0x00040202, 0x00040203,
247 0x01000202, 0x01000203, 0x01040202, 0x01040203,
248 0x08000000, 0x08000001, 0x08040000, 0x08040001,
249 0x09000000, 0x09000001, 0x09040000, 0x09040001,
250 0x08000002, 0x08000003, 0x08040002, 0x08040003,
251 0x09000002, 0x09000003, 0x09040002, 0x09040003,
252 0x08000200, 0x08000201, 0x08040200, 0x08040201,
253 0x09000200, 0x09000201, 0x09040200, 0x09040201,
254 0x08000202, 0x08000203, 0x08040202, 0x08040203,
255 0x09000202, 0x09000203, 0x09040202, 0x09040203,
256 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
257 0x00000000, 0x00100000, 0x00000100, 0x00100100,
258 0x00000008, 0x00100008, 0x00000108, 0x00100108,
259 0x00001000, 0x00101000, 0x00001100, 0x00101100,
260 0x00001008, 0x00101008, 0x00001108, 0x00101108,
261 0x04000000, 0x04100000, 0x04000100, 0x04100100,
262 0x04000008, 0x04100008, 0x04000108, 0x04100108,
263 0x04001000, 0x04101000, 0x04001100, 0x04101100,
264 0x04001008, 0x04101008, 0x04001108, 0x04101108,
265 0x00020000, 0x00120000, 0x00020100, 0x00120100,
266 0x00020008, 0x00120008, 0x00020108, 0x00120108,
267 0x00021000, 0x00121000, 0x00021100, 0x00121100,
268 0x00021008, 0x00121008, 0x00021108, 0x00121108,
269 0x04020000, 0x04120000, 0x04020100, 0x04120100,
270 0x04020008, 0x04120008, 0x04020108, 0x04120108,
271 0x04021000, 0x04121000, 0x04021100, 0x04121100,
272 0x04021008, 0x04121008, 0x04021108, 0x04121108,
273 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
274 0x00000000, 0x10000000, 0x00010000, 0x10010000,
275 0x00000004, 0x10000004, 0x00010004, 0x10010004,
276 0x20000000, 0x30000000, 0x20010000, 0x30010000,
277 0x20000004, 0x30000004, 0x20010004, 0x30010004,
278 0x00100000, 0x10100000, 0x00110000, 0x10110000,
279 0x00100004, 0x10100004, 0x00110004, 0x10110004,
280 0x20100000, 0x30100000, 0x20110000, 0x30110000,
281 0x20100004, 0x30100004, 0x20110004, 0x30110004,
282 0x00001000, 0x10001000, 0x00011000, 0x10011000,
283 0x00001004, 0x10001004, 0x00011004, 0x10011004,
284 0x20001000, 0x30001000, 0x20011000, 0x30011000,
285 0x20001004, 0x30001004, 0x20011004, 0x30011004,
286 0x00101000, 0x10101000, 0x00111000, 0x10111000,
287 0x00101004, 0x10101004, 0x00111004, 0x10111004,
288 0x20101000, 0x30101000, 0x20111000, 0x30111000,
289 0x20101004, 0x30101004, 0x20111004, 0x30111004,
290 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
291 0x00000000, 0x08000000, 0x00000008, 0x08000008,
292 0x00000400, 0x08000400, 0x00000408, 0x08000408,
293 0x00020000, 0x08020000, 0x00020008, 0x08020008,
294 0x00020400, 0x08020400, 0x00020408, 0x08020408,
295 0x00000001, 0x08000001, 0x00000009, 0x08000009,
296 0x00000401, 0x08000401, 0x00000409, 0x08000409,
297 0x00020001, 0x08020001, 0x00020009, 0x08020009,
298 0x00020401, 0x08020401, 0x00020409, 0x08020409,
299 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
300 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
301 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
302 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
303 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
304 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
305 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
306 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
307 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
308 0x00000000, 0x00000100, 0x00080000, 0x00080100,
309 0x01000000, 0x01000100, 0x01080000, 0x01080100,
310 0x00000010, 0x00000110, 0x00080010, 0x00080110,
311 0x01000010, 0x01000110, 0x01080010, 0x01080110,
312 0x00200000, 0x00200100, 0x00280000, 0x00280100,
313 0x01200000, 0x01200100, 0x01280000, 0x01280100,
314 0x00200010, 0x00200110, 0x00280010, 0x00280110,
315 0x01200010, 0x01200110, 0x01280010, 0x01280110,
316 0x00000200, 0x00000300, 0x00080200, 0x00080300,
317 0x01000200, 0x01000300, 0x01080200, 0x01080300,
318 0x00000210, 0x00000310, 0x00080210, 0x00080310,
319 0x01000210, 0x01000310, 0x01080210, 0x01080310,
320 0x00200200, 0x00200300, 0x00280200, 0x00280300,
321 0x01200200, 0x01200300, 0x01280200, 0x01280300,
322 0x00200210, 0x00200310, 0x00280210, 0x00280310,
323 0x01200210, 0x01200310, 0x01280210, 0x01280310,
324 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
325 0x00000000, 0x04000000, 0x00040000, 0x04040000,
326 0x00000002, 0x04000002, 0x00040002, 0x04040002,
327 0x00002000, 0x04002000, 0x00042000, 0x04042000,
328 0x00002002, 0x04002002, 0x00042002, 0x04042002,
329 0x00000020, 0x04000020, 0x00040020, 0x04040020,
330 0x00000022, 0x04000022, 0x00040022, 0x04040022,
331 0x00002020, 0x04002020, 0x00042020, 0x04042020,
332 0x00002022, 0x04002022, 0x00042022, 0x04042022,
333 0x00000800, 0x04000800, 0x00040800, 0x04040800,
334 0x00000802, 0x04000802, 0x00040802, 0x04040802,
335 0x00002800, 0x04002800, 0x00042800, 0x04042800,
336 0x00002802, 0x04002802, 0x00042802, 0x04042802,
337 0x00000820, 0x04000820, 0x00040820, 0x04040820,
338 0x00000822, 0x04000822, 0x00040822, 0x04040822,
339 0x00002820, 0x04002820, 0x00042820, 0x04042820,
340 0x00002822, 0x04002822, 0x00042822, 0x04042822
343 __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
344 __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
346 #define BOX(i,n,S) (S)[(n)][(i)]
348 static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64])
361 for (u32 i = 0; i < 16; i += 2)
367 t = Kd[i + 0] ^ rotl32 (r, 28u);
370 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
371 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
372 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
373 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
374 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
375 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
376 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
377 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
380 t = Kd[i + 1] ^ rotl32 (l, 28u);
383 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
384 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
385 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
386 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
387 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
388 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
389 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
390 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
402 static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64])
406 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
407 HPERM_OP (c, tt, 2, 0xcccc0000);
408 HPERM_OP (d, tt, 2, 0xcccc0000);
409 PERM_OP (d, c, tt, 1, 0x55555555);
410 PERM_OP (c, d, tt, 8, 0x00ff00ff);
411 PERM_OP (d, c, tt, 1, 0x55555555);
413 d = ((d & 0x000000ff) << 16)
414 | ((d & 0x0000ff00) << 0)
415 | ((d & 0x00ff0000) >> 16)
416 | ((c & 0xf0000000) >> 4);
421 for (u32 i = 0; i < 16; i++)
423 c = c >> shifts3s0[i] | c << shifts3s1[i];
424 d = d >> shifts3s0[i] | d << shifts3s1[i];
429 u32 s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
430 | BOX ((((c >> 6) & 0x03)
431 | ((c >> 7) & 0x3c)), 1, s_skb)
432 | BOX ((((c >> 13) & 0x0f)
433 | ((c >> 14) & 0x30)), 2, s_skb)
434 | BOX ((((c >> 20) & 0x01)
436 | ((c >> 22) & 0x38)), 3, s_skb);
438 u32 t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
439 | BOX ((((d >> 7) & 0x03)
440 | ((d >> 8) & 0x3c)), 5, s_skb)
441 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
442 | BOX ((((d >> 21) & 0x0f)
443 | ((d >> 22) & 0x30)), 7, s_skb);
445 #if defined cl_amd_media_ops
446 Kc[i] = amd_bytealign (t, s << 16, 2);
447 Kd[i] = amd_bytealign (t >> 16, s, 2);
449 Kc[i] = ((t << 16) | (s & 0x0000ffff));
450 Kd[i] = ((s >> 16) | (t & 0xffff0000));
453 Kc[i] = rotl32 (Kc[i], 2u);
454 Kd[i] = rotl32 (Kd[i], 2u);
458 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
464 const u32 lid = get_local_id (0);
470 const u32 gid = get_global_id (0);
474 pw_buf0[0] = pws[gid].i[ 0];
475 pw_buf0[1] = pws[gid].i[ 1];
476 pw_buf0[2] = pws[gid].i[ 2];
477 pw_buf0[3] = pws[gid].i[ 3];
481 pw_buf1[0] = pws[gid].i[ 4];
482 pw_buf1[1] = pws[gid].i[ 5];
483 pw_buf1[2] = pws[gid].i[ 6];
484 pw_buf1[3] = pws[gid].i[ 7];
486 const u32 pw_len = pws[gid].pw_len;
492 __local u32 s_SPtrans[8][64];
493 __local u32 s_skb[8][64];
495 s_SPtrans[0][lid] = c_SPtrans[0][lid];
496 s_SPtrans[1][lid] = c_SPtrans[1][lid];
497 s_SPtrans[2][lid] = c_SPtrans[2][lid];
498 s_SPtrans[3][lid] = c_SPtrans[3][lid];
499 s_SPtrans[4][lid] = c_SPtrans[4][lid];
500 s_SPtrans[5][lid] = c_SPtrans[5][lid];
501 s_SPtrans[6][lid] = c_SPtrans[6][lid];
502 s_SPtrans[7][lid] = c_SPtrans[7][lid];
504 s_skb[0][lid] = c_skb[0][lid];
505 s_skb[1][lid] = c_skb[1][lid];
506 s_skb[2][lid] = c_skb[2][lid];
507 s_skb[3][lid] = c_skb[3][lid];
508 s_skb[4][lid] = c_skb[4][lid];
509 s_skb[5][lid] = c_skb[5][lid];
510 s_skb[6][lid] = c_skb[6][lid];
511 s_skb[7][lid] = c_skb[7][lid];
513 barrier (CLK_LOCAL_MEM_FENCE);
515 if (gid >= gid_max) return;
523 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
524 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
525 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
526 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
530 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
531 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
532 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
533 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
535 const u32 salt_len = salt_bufs[salt_pos].salt_len;
541 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
571 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
573 const u32 salt_word_len = (salt_len + out_len) * 2;
607 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
609 w0_t[0] |= salt_buf0[0];
610 w0_t[1] |= salt_buf0[1];
611 w0_t[2] |= salt_buf0[2];
612 w0_t[3] |= salt_buf0[3];
613 w1_t[0] |= salt_buf1[0];
614 w1_t[1] |= salt_buf1[1];
615 w1_t[2] |= salt_buf1[2];
616 w1_t[3] |= salt_buf1[3];
638 * precompute key1 since key is static: 0x0123456789abcdef
639 * plus LEFT_ROTATE by 2
681 * key1 (generate key)
689 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
693 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
694 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
699 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
703 * key2 (generate hash)
706 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
711 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
715 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
716 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
721 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
728 const u32 r0 = iv[0];
729 const u32 r1 = iv[1];
737 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
741 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
745 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
751 const u32 lid = get_local_id (0);
757 const u32 gid = get_global_id (0);
761 pw_buf0[0] = pws[gid].i[ 0];
762 pw_buf0[1] = pws[gid].i[ 1];
763 pw_buf0[2] = pws[gid].i[ 2];
764 pw_buf0[3] = pws[gid].i[ 3];
768 pw_buf1[0] = pws[gid].i[ 4];
769 pw_buf1[1] = pws[gid].i[ 5];
770 pw_buf1[2] = pws[gid].i[ 6];
771 pw_buf1[3] = pws[gid].i[ 7];
773 const u32 pw_len = pws[gid].pw_len;
779 __local u32 s_SPtrans[8][64];
780 __local u32 s_skb[8][64];
782 s_SPtrans[0][lid] = c_SPtrans[0][lid];
783 s_SPtrans[1][lid] = c_SPtrans[1][lid];
784 s_SPtrans[2][lid] = c_SPtrans[2][lid];
785 s_SPtrans[3][lid] = c_SPtrans[3][lid];
786 s_SPtrans[4][lid] = c_SPtrans[4][lid];
787 s_SPtrans[5][lid] = c_SPtrans[5][lid];
788 s_SPtrans[6][lid] = c_SPtrans[6][lid];
789 s_SPtrans[7][lid] = c_SPtrans[7][lid];
791 s_skb[0][lid] = c_skb[0][lid];
792 s_skb[1][lid] = c_skb[1][lid];
793 s_skb[2][lid] = c_skb[2][lid];
794 s_skb[3][lid] = c_skb[3][lid];
795 s_skb[4][lid] = c_skb[4][lid];
796 s_skb[5][lid] = c_skb[5][lid];
797 s_skb[6][lid] = c_skb[6][lid];
798 s_skb[7][lid] = c_skb[7][lid];
800 barrier (CLK_LOCAL_MEM_FENCE);
802 if (gid >= gid_max) return;
810 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
811 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
812 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
813 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
817 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
818 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
819 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
820 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
822 const u32 salt_len = salt_bufs[salt_pos].salt_len;
828 const u32 search[4] =
830 digests_buf[digests_offset].digest_buf[DGST_R0],
831 digests_buf[digests_offset].digest_buf[DGST_R1],
832 digests_buf[digests_offset].digest_buf[DGST_R2],
833 digests_buf[digests_offset].digest_buf[DGST_R3]
840 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
870 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
872 const u32 salt_word_len = (salt_len + out_len) * 2;
906 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
908 w0_t[0] |= salt_buf0[0];
909 w0_t[1] |= salt_buf0[1];
910 w0_t[2] |= salt_buf0[2];
911 w0_t[3] |= salt_buf0[3];
912 w1_t[0] |= salt_buf1[0];
913 w1_t[1] |= salt_buf1[1];
914 w1_t[2] |= salt_buf1[2];
915 w1_t[3] |= salt_buf1[3];
937 * precompute key1 since key is static: 0x0123456789abcdef
938 * plus LEFT_ROTATE by 2
980 * key1 (generate key)
988 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
992 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
993 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
998 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1002 * key2 (generate hash)
1005 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1010 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1014 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1015 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1020 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1027 const u32 r0 = iv[0];
1028 const u32 r1 = iv[1];
1036 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1040 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)