2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 #define PERM_OP(a,b,tt,n,m) \
33 #define HPERM_OP(a,tt,n,m) \
39 tt = tt >> (16 + n); \
45 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
46 PERM_OP (l, r, tt, 16, 0x0000ffff); \
47 PERM_OP (r, l, tt, 2, 0x33333333); \
48 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
49 PERM_OP (r, l, tt, 1, 0x55555555); \
54 PERM_OP (l, r, tt, 1, 0x55555555); \
55 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
56 PERM_OP (l, r, tt, 2, 0x33333333); \
57 PERM_OP (r, l, tt, 16, 0x0000ffff); \
58 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
61 __constant u32 c_SPtrans[8][64] =
64 0x02080800, 0x00080000, 0x02000002, 0x02080802,
65 0x02000000, 0x00080802, 0x00080002, 0x02000002,
66 0x00080802, 0x02080800, 0x02080000, 0x00000802,
67 0x02000802, 0x02000000, 0x00000000, 0x00080002,
68 0x00080000, 0x00000002, 0x02000800, 0x00080800,
69 0x02080802, 0x02080000, 0x00000802, 0x02000800,
70 0x00000002, 0x00000800, 0x00080800, 0x02080002,
71 0x00000800, 0x02000802, 0x02080002, 0x00000000,
72 0x00000000, 0x02080802, 0x02000800, 0x00080002,
73 0x02080800, 0x00080000, 0x00000802, 0x02000800,
74 0x02080002, 0x00000800, 0x00080800, 0x02000002,
75 0x00080802, 0x00000002, 0x02000002, 0x02080000,
76 0x02080802, 0x00080800, 0x02080000, 0x02000802,
77 0x02000000, 0x00000802, 0x00080002, 0x00000000,
78 0x00080000, 0x02000000, 0x02000802, 0x02080800,
79 0x00000002, 0x02080002, 0x00000800, 0x00080802,
81 0x40108010, 0x00000000, 0x00108000, 0x40100000,
82 0x40000010, 0x00008010, 0x40008000, 0x00108000,
83 0x00008000, 0x40100010, 0x00000010, 0x40008000,
84 0x00100010, 0x40108000, 0x40100000, 0x00000010,
85 0x00100000, 0x40008010, 0x40100010, 0x00008000,
86 0x00108010, 0x40000000, 0x00000000, 0x00100010,
87 0x40008010, 0x00108010, 0x40108000, 0x40000010,
88 0x40000000, 0x00100000, 0x00008010, 0x40108010,
89 0x00100010, 0x40108000, 0x40008000, 0x00108010,
90 0x40108010, 0x00100010, 0x40000010, 0x00000000,
91 0x40000000, 0x00008010, 0x00100000, 0x40100010,
92 0x00008000, 0x40000000, 0x00108010, 0x40008010,
93 0x40108000, 0x00008000, 0x00000000, 0x40000010,
94 0x00000010, 0x40108010, 0x00108000, 0x40100000,
95 0x40100010, 0x00100000, 0x00008010, 0x40008000,
96 0x40008010, 0x00000010, 0x40100000, 0x00108000,
98 0x04000001, 0x04040100, 0x00000100, 0x04000101,
99 0x00040001, 0x04000000, 0x04000101, 0x00040100,
100 0x04000100, 0x00040000, 0x04040000, 0x00000001,
101 0x04040101, 0x00000101, 0x00000001, 0x04040001,
102 0x00000000, 0x00040001, 0x04040100, 0x00000100,
103 0x00000101, 0x04040101, 0x00040000, 0x04000001,
104 0x04040001, 0x04000100, 0x00040101, 0x04040000,
105 0x00040100, 0x00000000, 0x04000000, 0x00040101,
106 0x04040100, 0x00000100, 0x00000001, 0x00040000,
107 0x00000101, 0x00040001, 0x04040000, 0x04000101,
108 0x00000000, 0x04040100, 0x00040100, 0x04040001,
109 0x00040001, 0x04000000, 0x04040101, 0x00000001,
110 0x00040101, 0x04000001, 0x04000000, 0x04040101,
111 0x00040000, 0x04000100, 0x04000101, 0x00040100,
112 0x04000100, 0x00000000, 0x04040001, 0x00000101,
113 0x04000001, 0x00040101, 0x00000100, 0x04040000,
115 0x00401008, 0x10001000, 0x00000008, 0x10401008,
116 0x00000000, 0x10400000, 0x10001008, 0x00400008,
117 0x10401000, 0x10000008, 0x10000000, 0x00001008,
118 0x10000008, 0x00401008, 0x00400000, 0x10000000,
119 0x10400008, 0x00401000, 0x00001000, 0x00000008,
120 0x00401000, 0x10001008, 0x10400000, 0x00001000,
121 0x00001008, 0x00000000, 0x00400008, 0x10401000,
122 0x10001000, 0x10400008, 0x10401008, 0x00400000,
123 0x10400008, 0x00001008, 0x00400000, 0x10000008,
124 0x00401000, 0x10001000, 0x00000008, 0x10400000,
125 0x10001008, 0x00000000, 0x00001000, 0x00400008,
126 0x00000000, 0x10400008, 0x10401000, 0x00001000,
127 0x10000000, 0x10401008, 0x00401008, 0x00400000,
128 0x10401008, 0x00000008, 0x10001000, 0x00401008,
129 0x00400008, 0x00401000, 0x10400000, 0x10001008,
130 0x00001008, 0x10000000, 0x10000008, 0x10401000,
132 0x08000000, 0x00010000, 0x00000400, 0x08010420,
133 0x08010020, 0x08000400, 0x00010420, 0x08010000,
134 0x00010000, 0x00000020, 0x08000020, 0x00010400,
135 0x08000420, 0x08010020, 0x08010400, 0x00000000,
136 0x00010400, 0x08000000, 0x00010020, 0x00000420,
137 0x08000400, 0x00010420, 0x00000000, 0x08000020,
138 0x00000020, 0x08000420, 0x08010420, 0x00010020,
139 0x08010000, 0x00000400, 0x00000420, 0x08010400,
140 0x08010400, 0x08000420, 0x00010020, 0x08010000,
141 0x00010000, 0x00000020, 0x08000020, 0x08000400,
142 0x08000000, 0x00010400, 0x08010420, 0x00000000,
143 0x00010420, 0x08000000, 0x00000400, 0x00010020,
144 0x08000420, 0x00000400, 0x00000000, 0x08010420,
145 0x08010020, 0x08010400, 0x00000420, 0x00010000,
146 0x00010400, 0x08010020, 0x08000400, 0x00000420,
147 0x00000020, 0x00010420, 0x08010000, 0x08000020,
149 0x80000040, 0x00200040, 0x00000000, 0x80202000,
150 0x00200040, 0x00002000, 0x80002040, 0x00200000,
151 0x00002040, 0x80202040, 0x00202000, 0x80000000,
152 0x80002000, 0x80000040, 0x80200000, 0x00202040,
153 0x00200000, 0x80002040, 0x80200040, 0x00000000,
154 0x00002000, 0x00000040, 0x80202000, 0x80200040,
155 0x80202040, 0x80200000, 0x80000000, 0x00002040,
156 0x00000040, 0x00202000, 0x00202040, 0x80002000,
157 0x00002040, 0x80000000, 0x80002000, 0x00202040,
158 0x80202000, 0x00200040, 0x00000000, 0x80002000,
159 0x80000000, 0x00002000, 0x80200040, 0x00200000,
160 0x00200040, 0x80202040, 0x00202000, 0x00000040,
161 0x80202040, 0x00202000, 0x00200000, 0x80002040,
162 0x80000040, 0x80200000, 0x00202040, 0x00000000,
163 0x00002000, 0x80000040, 0x80002040, 0x80202000,
164 0x80200000, 0x00002040, 0x00000040, 0x80200040,
166 0x00004000, 0x00000200, 0x01000200, 0x01000004,
167 0x01004204, 0x00004004, 0x00004200, 0x00000000,
168 0x01000000, 0x01000204, 0x00000204, 0x01004000,
169 0x00000004, 0x01004200, 0x01004000, 0x00000204,
170 0x01000204, 0x00004000, 0x00004004, 0x01004204,
171 0x00000000, 0x01000200, 0x01000004, 0x00004200,
172 0x01004004, 0x00004204, 0x01004200, 0x00000004,
173 0x00004204, 0x01004004, 0x00000200, 0x01000000,
174 0x00004204, 0x01004000, 0x01004004, 0x00000204,
175 0x00004000, 0x00000200, 0x01000000, 0x01004004,
176 0x01000204, 0x00004204, 0x00004200, 0x00000000,
177 0x00000200, 0x01000004, 0x00000004, 0x01000200,
178 0x00000000, 0x01000204, 0x01000200, 0x00004200,
179 0x00000204, 0x00004000, 0x01004204, 0x01000000,
180 0x01004200, 0x00000004, 0x00004004, 0x01004204,
181 0x01000004, 0x01004200, 0x01004000, 0x00004004,
183 0x20800080, 0x20820000, 0x00020080, 0x00000000,
184 0x20020000, 0x00800080, 0x20800000, 0x20820080,
185 0x00000080, 0x20000000, 0x00820000, 0x00020080,
186 0x00820080, 0x20020080, 0x20000080, 0x20800000,
187 0x00020000, 0x00820080, 0x00800080, 0x20020000,
188 0x20820080, 0x20000080, 0x00000000, 0x00820000,
189 0x20000000, 0x00800000, 0x20020080, 0x20800080,
190 0x00800000, 0x00020000, 0x20820000, 0x00000080,
191 0x00800000, 0x00020000, 0x20000080, 0x20820080,
192 0x00020080, 0x20000000, 0x00000000, 0x00820000,
193 0x20800080, 0x20020080, 0x20020000, 0x00800080,
194 0x20820000, 0x00000080, 0x00800080, 0x20020000,
195 0x20820080, 0x00800000, 0x20800000, 0x20000080,
196 0x00820000, 0x00020080, 0x20020080, 0x20800000,
197 0x00000080, 0x20820000, 0x00820080, 0x00000000,
198 0x20000000, 0x20800080, 0x00020000, 0x00820080,
201 __constant u32 c_skb[8][64] =
203 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
204 0x00000000, 0x00000010, 0x20000000, 0x20000010,
205 0x00010000, 0x00010010, 0x20010000, 0x20010010,
206 0x00000800, 0x00000810, 0x20000800, 0x20000810,
207 0x00010800, 0x00010810, 0x20010800, 0x20010810,
208 0x00000020, 0x00000030, 0x20000020, 0x20000030,
209 0x00010020, 0x00010030, 0x20010020, 0x20010030,
210 0x00000820, 0x00000830, 0x20000820, 0x20000830,
211 0x00010820, 0x00010830, 0x20010820, 0x20010830,
212 0x00080000, 0x00080010, 0x20080000, 0x20080010,
213 0x00090000, 0x00090010, 0x20090000, 0x20090010,
214 0x00080800, 0x00080810, 0x20080800, 0x20080810,
215 0x00090800, 0x00090810, 0x20090800, 0x20090810,
216 0x00080020, 0x00080030, 0x20080020, 0x20080030,
217 0x00090020, 0x00090030, 0x20090020, 0x20090030,
218 0x00080820, 0x00080830, 0x20080820, 0x20080830,
219 0x00090820, 0x00090830, 0x20090820, 0x20090830,
220 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
221 0x00000000, 0x02000000, 0x00002000, 0x02002000,
222 0x00200000, 0x02200000, 0x00202000, 0x02202000,
223 0x00000004, 0x02000004, 0x00002004, 0x02002004,
224 0x00200004, 0x02200004, 0x00202004, 0x02202004,
225 0x00000400, 0x02000400, 0x00002400, 0x02002400,
226 0x00200400, 0x02200400, 0x00202400, 0x02202400,
227 0x00000404, 0x02000404, 0x00002404, 0x02002404,
228 0x00200404, 0x02200404, 0x00202404, 0x02202404,
229 0x10000000, 0x12000000, 0x10002000, 0x12002000,
230 0x10200000, 0x12200000, 0x10202000, 0x12202000,
231 0x10000004, 0x12000004, 0x10002004, 0x12002004,
232 0x10200004, 0x12200004, 0x10202004, 0x12202004,
233 0x10000400, 0x12000400, 0x10002400, 0x12002400,
234 0x10200400, 0x12200400, 0x10202400, 0x12202400,
235 0x10000404, 0x12000404, 0x10002404, 0x12002404,
236 0x10200404, 0x12200404, 0x10202404, 0x12202404,
237 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
238 0x00000000, 0x00000001, 0x00040000, 0x00040001,
239 0x01000000, 0x01000001, 0x01040000, 0x01040001,
240 0x00000002, 0x00000003, 0x00040002, 0x00040003,
241 0x01000002, 0x01000003, 0x01040002, 0x01040003,
242 0x00000200, 0x00000201, 0x00040200, 0x00040201,
243 0x01000200, 0x01000201, 0x01040200, 0x01040201,
244 0x00000202, 0x00000203, 0x00040202, 0x00040203,
245 0x01000202, 0x01000203, 0x01040202, 0x01040203,
246 0x08000000, 0x08000001, 0x08040000, 0x08040001,
247 0x09000000, 0x09000001, 0x09040000, 0x09040001,
248 0x08000002, 0x08000003, 0x08040002, 0x08040003,
249 0x09000002, 0x09000003, 0x09040002, 0x09040003,
250 0x08000200, 0x08000201, 0x08040200, 0x08040201,
251 0x09000200, 0x09000201, 0x09040200, 0x09040201,
252 0x08000202, 0x08000203, 0x08040202, 0x08040203,
253 0x09000202, 0x09000203, 0x09040202, 0x09040203,
254 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
255 0x00000000, 0x00100000, 0x00000100, 0x00100100,
256 0x00000008, 0x00100008, 0x00000108, 0x00100108,
257 0x00001000, 0x00101000, 0x00001100, 0x00101100,
258 0x00001008, 0x00101008, 0x00001108, 0x00101108,
259 0x04000000, 0x04100000, 0x04000100, 0x04100100,
260 0x04000008, 0x04100008, 0x04000108, 0x04100108,
261 0x04001000, 0x04101000, 0x04001100, 0x04101100,
262 0x04001008, 0x04101008, 0x04001108, 0x04101108,
263 0x00020000, 0x00120000, 0x00020100, 0x00120100,
264 0x00020008, 0x00120008, 0x00020108, 0x00120108,
265 0x00021000, 0x00121000, 0x00021100, 0x00121100,
266 0x00021008, 0x00121008, 0x00021108, 0x00121108,
267 0x04020000, 0x04120000, 0x04020100, 0x04120100,
268 0x04020008, 0x04120008, 0x04020108, 0x04120108,
269 0x04021000, 0x04121000, 0x04021100, 0x04121100,
270 0x04021008, 0x04121008, 0x04021108, 0x04121108,
271 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
272 0x00000000, 0x10000000, 0x00010000, 0x10010000,
273 0x00000004, 0x10000004, 0x00010004, 0x10010004,
274 0x20000000, 0x30000000, 0x20010000, 0x30010000,
275 0x20000004, 0x30000004, 0x20010004, 0x30010004,
276 0x00100000, 0x10100000, 0x00110000, 0x10110000,
277 0x00100004, 0x10100004, 0x00110004, 0x10110004,
278 0x20100000, 0x30100000, 0x20110000, 0x30110000,
279 0x20100004, 0x30100004, 0x20110004, 0x30110004,
280 0x00001000, 0x10001000, 0x00011000, 0x10011000,
281 0x00001004, 0x10001004, 0x00011004, 0x10011004,
282 0x20001000, 0x30001000, 0x20011000, 0x30011000,
283 0x20001004, 0x30001004, 0x20011004, 0x30011004,
284 0x00101000, 0x10101000, 0x00111000, 0x10111000,
285 0x00101004, 0x10101004, 0x00111004, 0x10111004,
286 0x20101000, 0x30101000, 0x20111000, 0x30111000,
287 0x20101004, 0x30101004, 0x20111004, 0x30111004,
288 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
289 0x00000000, 0x08000000, 0x00000008, 0x08000008,
290 0x00000400, 0x08000400, 0x00000408, 0x08000408,
291 0x00020000, 0x08020000, 0x00020008, 0x08020008,
292 0x00020400, 0x08020400, 0x00020408, 0x08020408,
293 0x00000001, 0x08000001, 0x00000009, 0x08000009,
294 0x00000401, 0x08000401, 0x00000409, 0x08000409,
295 0x00020001, 0x08020001, 0x00020009, 0x08020009,
296 0x00020401, 0x08020401, 0x00020409, 0x08020409,
297 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
298 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
299 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
300 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
301 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
302 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
303 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
304 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
305 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
306 0x00000000, 0x00000100, 0x00080000, 0x00080100,
307 0x01000000, 0x01000100, 0x01080000, 0x01080100,
308 0x00000010, 0x00000110, 0x00080010, 0x00080110,
309 0x01000010, 0x01000110, 0x01080010, 0x01080110,
310 0x00200000, 0x00200100, 0x00280000, 0x00280100,
311 0x01200000, 0x01200100, 0x01280000, 0x01280100,
312 0x00200010, 0x00200110, 0x00280010, 0x00280110,
313 0x01200010, 0x01200110, 0x01280010, 0x01280110,
314 0x00000200, 0x00000300, 0x00080200, 0x00080300,
315 0x01000200, 0x01000300, 0x01080200, 0x01080300,
316 0x00000210, 0x00000310, 0x00080210, 0x00080310,
317 0x01000210, 0x01000310, 0x01080210, 0x01080310,
318 0x00200200, 0x00200300, 0x00280200, 0x00280300,
319 0x01200200, 0x01200300, 0x01280200, 0x01280300,
320 0x00200210, 0x00200310, 0x00280210, 0x00280310,
321 0x01200210, 0x01200310, 0x01280210, 0x01280310,
322 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
323 0x00000000, 0x04000000, 0x00040000, 0x04040000,
324 0x00000002, 0x04000002, 0x00040002, 0x04040002,
325 0x00002000, 0x04002000, 0x00042000, 0x04042000,
326 0x00002002, 0x04002002, 0x00042002, 0x04042002,
327 0x00000020, 0x04000020, 0x00040020, 0x04040020,
328 0x00000022, 0x04000022, 0x00040022, 0x04040022,
329 0x00002020, 0x04002020, 0x00042020, 0x04042020,
330 0x00002022, 0x04002022, 0x00042022, 0x04042022,
331 0x00000800, 0x04000800, 0x00040800, 0x04040800,
332 0x00000802, 0x04000802, 0x00040802, 0x04040802,
333 0x00002800, 0x04002800, 0x00042800, 0x04042800,
334 0x00002802, 0x04002802, 0x00042802, 0x04042802,
335 0x00000820, 0x04000820, 0x00040820, 0x04040820,
336 0x00000822, 0x04000822, 0x00040822, 0x04040822,
337 0x00002820, 0x04002820, 0x00042820, 0x04042820,
338 0x00002822, 0x04002822, 0x00042822, 0x04042822
341 __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
342 __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
344 #define BOX(i,n,S) (S)[(n)][(i)]
346 static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64])
359 for (u32 i = 0; i < 16; i += 2)
365 t = Kd[i + 0] ^ rotl32 (r, 28u);
368 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
369 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
370 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
371 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
372 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
373 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
374 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
375 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
378 t = Kd[i + 1] ^ rotl32 (l, 28u);
381 ^ BOX (amd_bfe (u, 2, 6), 0, s_SPtrans)
382 ^ BOX (amd_bfe (u, 10, 6), 2, s_SPtrans)
383 ^ BOX (amd_bfe (u, 18, 6), 4, s_SPtrans)
384 ^ BOX (amd_bfe (u, 26, 6), 6, s_SPtrans)
385 ^ BOX (amd_bfe (t, 2, 6), 1, s_SPtrans)
386 ^ BOX (amd_bfe (t, 10, 6), 3, s_SPtrans)
387 ^ BOX (amd_bfe (t, 18, 6), 5, s_SPtrans)
388 ^ BOX (amd_bfe (t, 26, 6), 7, s_SPtrans);
400 static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64])
404 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
405 HPERM_OP (c, tt, 2, 0xcccc0000);
406 HPERM_OP (d, tt, 2, 0xcccc0000);
407 PERM_OP (d, c, tt, 1, 0x55555555);
408 PERM_OP (c, d, tt, 8, 0x00ff00ff);
409 PERM_OP (d, c, tt, 1, 0x55555555);
411 d = ((d & 0x000000ff) << 16)
412 | ((d & 0x0000ff00) << 0)
413 | ((d & 0x00ff0000) >> 16)
414 | ((c & 0xf0000000) >> 4);
419 for (u32 i = 0; i < 16; i++)
421 c = c >> shifts3s0[i] | c << shifts3s1[i];
422 d = d >> shifts3s0[i] | d << shifts3s1[i];
427 u32 s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
428 | BOX ((((c >> 6) & 0x03)
429 | ((c >> 7) & 0x3c)), 1, s_skb)
430 | BOX ((((c >> 13) & 0x0f)
431 | ((c >> 14) & 0x30)), 2, s_skb)
432 | BOX ((((c >> 20) & 0x01)
434 | ((c >> 22) & 0x38)), 3, s_skb);
436 u32 t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
437 | BOX ((((d >> 7) & 0x03)
438 | ((d >> 8) & 0x3c)), 5, s_skb)
439 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
440 | BOX ((((d >> 21) & 0x0f)
441 | ((d >> 22) & 0x30)), 7, s_skb);
443 #if defined cl_amd_media_ops
444 Kc[i] = amd_bytealign (t, s << 16, 2);
445 Kd[i] = amd_bytealign (t >> 16, s, 2);
447 Kc[i] = ((t << 16) | (s & 0x0000ffff));
448 Kd[i] = ((s >> 16) | (t & 0xffff0000));
451 Kc[i] = rotl32 (Kc[i], 2u);
452 Kd[i] = rotl32 (Kd[i], 2u);
456 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
462 const u32 lid = get_local_id (0);
468 const u32 gid = get_global_id (0);
472 wordl0[0] = pws[gid].i[ 0];
473 wordl0[1] = pws[gid].i[ 1];
474 wordl0[2] = pws[gid].i[ 2];
475 wordl0[3] = pws[gid].i[ 3];
479 wordl1[0] = pws[gid].i[ 4];
480 wordl1[1] = pws[gid].i[ 5];
481 wordl1[2] = pws[gid].i[ 6];
482 wordl1[3] = pws[gid].i[ 7];
498 const u32 pw_l_len = pws[gid].pw_len;
500 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
502 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
509 __local u32 s_SPtrans[8][64];
511 __local u32 s_skb[8][64];
513 s_SPtrans[0][lid] = c_SPtrans[0][lid];
514 s_SPtrans[1][lid] = c_SPtrans[1][lid];
515 s_SPtrans[2][lid] = c_SPtrans[2][lid];
516 s_SPtrans[3][lid] = c_SPtrans[3][lid];
517 s_SPtrans[4][lid] = c_SPtrans[4][lid];
518 s_SPtrans[5][lid] = c_SPtrans[5][lid];
519 s_SPtrans[6][lid] = c_SPtrans[6][lid];
520 s_SPtrans[7][lid] = c_SPtrans[7][lid];
522 s_skb[0][lid] = c_skb[0][lid];
523 s_skb[1][lid] = c_skb[1][lid];
524 s_skb[2][lid] = c_skb[2][lid];
525 s_skb[3][lid] = c_skb[3][lid];
526 s_skb[4][lid] = c_skb[4][lid];
527 s_skb[5][lid] = c_skb[5][lid];
528 s_skb[6][lid] = c_skb[6][lid];
529 s_skb[7][lid] = c_skb[7][lid];
531 barrier (CLK_LOCAL_MEM_FENCE);
533 if (gid >= gid_max) return;
541 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
542 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
543 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
544 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
548 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
549 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
550 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
551 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
553 const u32 salt_len = salt_bufs[salt_pos].salt_len;
559 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
561 const u32 pw_r_len = combs_buf[il_pos].pw_len;
563 const u32 pw_len = pw_l_len + pw_r_len;
565 const u32 salt_word_len = (salt_len + pw_len) * 2;
569 wordr0[0] = combs_buf[il_pos].i[0];
570 wordr0[1] = combs_buf[il_pos].i[1];
571 wordr0[2] = combs_buf[il_pos].i[2];
572 wordr0[3] = combs_buf[il_pos].i[3];
576 wordr1[0] = combs_buf[il_pos].i[4];
577 wordr1[1] = combs_buf[il_pos].i[5];
578 wordr1[2] = combs_buf[il_pos].i[6];
579 wordr1[3] = combs_buf[il_pos].i[7];
595 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
597 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
602 w0[0] = wordl0[0] | wordr0[0];
603 w0[1] = wordl0[1] | wordr0[1];
604 w0[2] = wordl0[2] | wordr0[2];
605 w0[3] = wordl0[3] | wordr0[3];
609 w1[0] = wordl1[0] | wordr1[0];
610 w1[1] = wordl1[1] | wordr1[1];
611 w1[2] = wordl1[2] | wordr1[2];
612 w1[3] = wordl1[3] | wordr1[3];
616 w2[0] = wordl2[0] | wordr2[0];
617 w2[1] = wordl2[1] | wordr2[1];
618 w2[2] = wordl2[2] | wordr2[2];
619 w2[3] = wordl2[3] | wordr2[3];
623 w3[0] = wordl3[0] | wordr3[0];
624 w3[1] = wordl3[1] | wordr3[1];
625 w3[2] = wordl3[2] | wordr3[2];
626 w3[3] = wordl3[3] | wordr3[3];
654 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
656 w0_t[0] |= salt_buf0[0];
657 w0_t[1] |= salt_buf0[1];
658 w0_t[2] |= salt_buf0[2];
659 w0_t[3] |= salt_buf0[3];
660 w1_t[0] |= salt_buf1[0];
661 w1_t[1] |= salt_buf1[1];
662 w1_t[2] |= salt_buf1[2];
663 w1_t[3] |= salt_buf1[3];
685 * precompute key1 since key is static: 0x0123456789abcdef
686 * plus LEFT_ROTATE by 2
728 * key1 (generate key)
736 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
740 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
741 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
746 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
750 * key2 (generate hash)
753 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
758 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
762 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
763 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
768 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
775 const u32 r0 = iv[0];
776 const u32 r1 = iv[1];
784 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
788 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
792 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
798 const u32 lid = get_local_id (0);
804 const u32 gid = get_global_id (0);
808 wordl0[0] = pws[gid].i[ 0];
809 wordl0[1] = pws[gid].i[ 1];
810 wordl0[2] = pws[gid].i[ 2];
811 wordl0[3] = pws[gid].i[ 3];
815 wordl1[0] = pws[gid].i[ 4];
816 wordl1[1] = pws[gid].i[ 5];
817 wordl1[2] = pws[gid].i[ 6];
818 wordl1[3] = pws[gid].i[ 7];
834 const u32 pw_l_len = pws[gid].pw_len;
836 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
838 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
845 __local u32 s_SPtrans[8][64];
847 __local u32 s_skb[8][64];
849 s_SPtrans[0][lid] = c_SPtrans[0][lid];
850 s_SPtrans[1][lid] = c_SPtrans[1][lid];
851 s_SPtrans[2][lid] = c_SPtrans[2][lid];
852 s_SPtrans[3][lid] = c_SPtrans[3][lid];
853 s_SPtrans[4][lid] = c_SPtrans[4][lid];
854 s_SPtrans[5][lid] = c_SPtrans[5][lid];
855 s_SPtrans[6][lid] = c_SPtrans[6][lid];
856 s_SPtrans[7][lid] = c_SPtrans[7][lid];
858 s_skb[0][lid] = c_skb[0][lid];
859 s_skb[1][lid] = c_skb[1][lid];
860 s_skb[2][lid] = c_skb[2][lid];
861 s_skb[3][lid] = c_skb[3][lid];
862 s_skb[4][lid] = c_skb[4][lid];
863 s_skb[5][lid] = c_skb[5][lid];
864 s_skb[6][lid] = c_skb[6][lid];
865 s_skb[7][lid] = c_skb[7][lid];
867 barrier (CLK_LOCAL_MEM_FENCE);
869 if (gid >= gid_max) return;
877 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
878 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
879 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
880 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
884 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
885 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
886 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
887 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
889 const u32 salt_len = salt_bufs[salt_pos].salt_len;
895 const u32 search[4] =
897 digests_buf[digests_offset].digest_buf[DGST_R0],
898 digests_buf[digests_offset].digest_buf[DGST_R1],
899 digests_buf[digests_offset].digest_buf[DGST_R2],
900 digests_buf[digests_offset].digest_buf[DGST_R3]
907 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
909 const u32 pw_r_len = combs_buf[il_pos].pw_len;
911 const u32 pw_len = pw_l_len + pw_r_len;
913 const u32 salt_word_len = (salt_len + pw_len) * 2;
917 wordr0[0] = combs_buf[il_pos].i[0];
918 wordr0[1] = combs_buf[il_pos].i[1];
919 wordr0[2] = combs_buf[il_pos].i[2];
920 wordr0[3] = combs_buf[il_pos].i[3];
924 wordr1[0] = combs_buf[il_pos].i[4];
925 wordr1[1] = combs_buf[il_pos].i[5];
926 wordr1[2] = combs_buf[il_pos].i[6];
927 wordr1[3] = combs_buf[il_pos].i[7];
943 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
945 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
950 w0[0] = wordl0[0] | wordr0[0];
951 w0[1] = wordl0[1] | wordr0[1];
952 w0[2] = wordl0[2] | wordr0[2];
953 w0[3] = wordl0[3] | wordr0[3];
957 w1[0] = wordl1[0] | wordr1[0];
958 w1[1] = wordl1[1] | wordr1[1];
959 w1[2] = wordl1[2] | wordr1[2];
960 w1[3] = wordl1[3] | wordr1[3];
964 w2[0] = wordl2[0] | wordr2[0];
965 w2[1] = wordl2[1] | wordr2[1];
966 w2[2] = wordl2[2] | wordr2[2];
967 w2[3] = wordl2[3] | wordr2[3];
971 w3[0] = wordl3[0] | wordr3[0];
972 w3[1] = wordl3[1] | wordr3[1];
973 w3[2] = wordl3[2] | wordr3[2];
974 w3[3] = wordl3[3] | wordr3[3];
1002 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
1004 w0_t[0] |= salt_buf0[0];
1005 w0_t[1] |= salt_buf0[1];
1006 w0_t[2] |= salt_buf0[2];
1007 w0_t[3] |= salt_buf0[3];
1008 w1_t[0] |= salt_buf1[0];
1009 w1_t[1] |= salt_buf1[1];
1010 w1_t[2] |= salt_buf1[2];
1011 w1_t[3] |= salt_buf1[3];
1033 * precompute key1 since key is static: 0x0123456789abcdef
1034 * plus LEFT_ROTATE by 2
1039 Kc[ 0] = 0x64649040;
1040 Kc[ 1] = 0x14909858;
1041 Kc[ 2] = 0xc4b44888;
1042 Kc[ 3] = 0x9094e438;
1043 Kc[ 4] = 0xd8a004f0;
1044 Kc[ 5] = 0xa8f02810;
1045 Kc[ 6] = 0xc84048d8;
1046 Kc[ 7] = 0x68d804a8;
1047 Kc[ 8] = 0x0490e40c;
1048 Kc[ 9] = 0xac183024;
1049 Kc[10] = 0x24c07c10;
1050 Kc[11] = 0x8c88c038;
1051 Kc[12] = 0xc048c824;
1052 Kc[13] = 0x4c0470a8;
1053 Kc[14] = 0x584020b4;
1054 Kc[15] = 0x00742c4c;
1058 Kd[ 0] = 0xa42ce40c;
1059 Kd[ 1] = 0x64689858;
1060 Kd[ 2] = 0x484050b8;
1061 Kd[ 3] = 0xe8184814;
1062 Kd[ 4] = 0x405cc070;
1063 Kd[ 5] = 0xa010784c;
1064 Kd[ 6] = 0x6074a800;
1065 Kd[ 7] = 0x80701c1c;
1066 Kd[ 8] = 0x9cd49430;
1067 Kd[ 9] = 0x4c8ce078;
1068 Kd[10] = 0x5c18c088;
1069 Kd[11] = 0x28a8a4c8;
1070 Kd[12] = 0x3c180838;
1071 Kd[13] = 0xb0b86c20;
1072 Kd[14] = 0xac84a094;
1073 Kd[15] = 0x4ce0c0c4;
1076 * key1 (generate key)
1084 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1088 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1089 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1094 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1098 * key2 (generate hash)
1101 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1106 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1110 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1111 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1116 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1123 const u32 r0 = iv[0];
1124 const u32 r1 = iv[1];
1132 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1136 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)