2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * Gabriele Gristina <matrix@hashcat.net>
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "OpenCL/types_ocl.c"
20 #include "OpenCL/common.c"
22 #define COMPARE_S "OpenCL/check_single_comp4.c"
23 #define COMPARE_M "OpenCL/check_multi_comp4.c"
45 __constant u32 k_sha256[64] =
47 SHA256C00, SHA256C01, SHA256C02, SHA256C03,
48 SHA256C04, SHA256C05, SHA256C06, SHA256C07,
49 SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
50 SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
51 SHA256C10, SHA256C11, SHA256C12, SHA256C13,
52 SHA256C14, SHA256C15, SHA256C16, SHA256C17,
53 SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
54 SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
55 SHA256C20, SHA256C21, SHA256C22, SHA256C23,
56 SHA256C24, SHA256C25, SHA256C26, SHA256C27,
57 SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
58 SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
59 SHA256C30, SHA256C31, SHA256C32, SHA256C33,
60 SHA256C34, SHA256C35, SHA256C36, SHA256C37,
61 SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
62 SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
65 void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8])
76 u32 w0_t = swap32 (w0[0]);
77 u32 w1_t = swap32 (w0[1]);
78 u32 w2_t = swap32 (w0[2]);
79 u32 w3_t = swap32 (w0[3]);
80 u32 w4_t = swap32 (w1[0]);
81 u32 w5_t = swap32 (w1[1]);
82 u32 w6_t = swap32 (w1[2]);
83 u32 w7_t = swap32 (w1[3]);
84 u32 w8_t = swap32 (w2[0]);
85 u32 w9_t = swap32 (w2[1]);
86 u32 wa_t = swap32 (w2[2]);
87 u32 wb_t = swap32 (w2[3]);
88 u32 wc_t = swap32 (w3[0]);
89 u32 wd_t = swap32 (w3[1]);
90 u32 we_t = swap32 (w3[2]);
91 u32 wf_t = swap32 (w3[3]);
93 #define ROUND256_EXPAND() \
95 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
96 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
97 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
98 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
99 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
100 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
101 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
102 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
103 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
104 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
105 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
106 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
107 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
108 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
109 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
110 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
113 #define ROUND256_STEP(i) \
115 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
116 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
117 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
118 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
119 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
120 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
121 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
122 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
123 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
124 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
125 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
126 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
127 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
128 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
129 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
130 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
138 for (int i = 16; i < 64; i += 16)
140 ROUND256_EXPAND (); ROUND256_STEP (i);
153 __constant u64 k_sha384[80] =
155 SHA384C00, SHA384C01, SHA384C02, SHA384C03,
156 SHA384C04, SHA384C05, SHA384C06, SHA384C07,
157 SHA384C08, SHA384C09, SHA384C0a, SHA384C0b,
158 SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f,
159 SHA384C10, SHA384C11, SHA384C12, SHA384C13,
160 SHA384C14, SHA384C15, SHA384C16, SHA384C17,
161 SHA384C18, SHA384C19, SHA384C1a, SHA384C1b,
162 SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f,
163 SHA384C20, SHA384C21, SHA384C22, SHA384C23,
164 SHA384C24, SHA384C25, SHA384C26, SHA384C27,
165 SHA384C28, SHA384C29, SHA384C2a, SHA384C2b,
166 SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f,
167 SHA384C30, SHA384C31, SHA384C32, SHA384C33,
168 SHA384C34, SHA384C35, SHA384C36, SHA384C37,
169 SHA384C38, SHA384C39, SHA384C3a, SHA384C3b,
170 SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f,
171 SHA384C40, SHA384C41, SHA384C42, SHA384C43,
172 SHA384C44, SHA384C45, SHA384C46, SHA384C47,
173 SHA384C48, SHA384C49, SHA384C4a, SHA384C4b,
174 SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f,
177 void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8])
188 u64 w0_t = swap64 (w0[0]);
189 u64 w1_t = swap64 (w0[1]);
190 u64 w2_t = swap64 (w0[2]);
191 u64 w3_t = swap64 (w0[3]);
192 u64 w4_t = swap64 (w1[0]);
193 u64 w5_t = swap64 (w1[1]);
194 u64 w6_t = swap64 (w1[2]);
195 u64 w7_t = swap64 (w1[3]);
196 u64 w8_t = swap64 (w2[0]);
197 u64 w9_t = swap64 (w2[1]);
198 u64 wa_t = swap64 (w2[2]);
199 u64 wb_t = swap64 (w2[3]);
200 u64 wc_t = swap64 (w3[0]);
201 u64 wd_t = swap64 (w3[1]);
202 u64 we_t = swap64 (w3[2]);
203 u64 wf_t = swap64 (w3[3]);
205 #define ROUND384_EXPAND() \
207 w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \
208 w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \
209 w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \
210 w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \
211 w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \
212 w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \
213 w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \
214 w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \
215 w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \
216 w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \
217 wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \
218 wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \
219 wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \
220 wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \
221 we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \
222 wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \
225 #define ROUND384_STEP(i) \
227 SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \
228 SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \
229 SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \
230 SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \
231 SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \
232 SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \
233 SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \
234 SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \
235 SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \
236 SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \
237 SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \
238 SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \
239 SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \
240 SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \
241 SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \
242 SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \
250 for (int i = 16; i < 80; i += 16)
252 ROUND384_EXPAND (); ROUND384_STEP (i);
265 __constant u64 k_sha512[80] =
267 SHA384C00, SHA384C01, SHA384C02, SHA384C03,
268 SHA384C04, SHA384C05, SHA384C06, SHA384C07,
269 SHA384C08, SHA384C09, SHA384C0a, SHA384C0b,
270 SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f,
271 SHA384C10, SHA384C11, SHA384C12, SHA384C13,
272 SHA384C14, SHA384C15, SHA384C16, SHA384C17,
273 SHA384C18, SHA384C19, SHA384C1a, SHA384C1b,
274 SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f,
275 SHA384C20, SHA384C21, SHA384C22, SHA384C23,
276 SHA384C24, SHA384C25, SHA384C26, SHA384C27,
277 SHA384C28, SHA384C29, SHA384C2a, SHA384C2b,
278 SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f,
279 SHA384C30, SHA384C31, SHA384C32, SHA384C33,
280 SHA384C34, SHA384C35, SHA384C36, SHA384C37,
281 SHA384C38, SHA384C39, SHA384C3a, SHA384C3b,
282 SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f,
283 SHA384C40, SHA384C41, SHA384C42, SHA384C43,
284 SHA384C44, SHA384C45, SHA384C46, SHA384C47,
285 SHA384C48, SHA384C49, SHA384C4a, SHA384C4b,
286 SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f,
289 void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8])
300 u64 w0_t = swap64 (w0[0]);
301 u64 w1_t = swap64 (w0[1]);
302 u64 w2_t = swap64 (w0[2]);
303 u64 w3_t = swap64 (w0[3]);
304 u64 w4_t = swap64 (w1[0]);
305 u64 w5_t = swap64 (w1[1]);
306 u64 w6_t = swap64 (w1[2]);
307 u64 w7_t = swap64 (w1[3]);
308 u64 w8_t = swap64 (w2[0]);
309 u64 w9_t = swap64 (w2[1]);
310 u64 wa_t = swap64 (w2[2]);
311 u64 wb_t = swap64 (w2[3]);
312 u64 wc_t = swap64 (w3[0]);
313 u64 wd_t = swap64 (w3[1]);
314 u64 we_t = swap64 (w3[2]);
315 u64 wf_t = swap64 (w3[3]);
317 #define ROUND512_EXPAND() \
319 w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
320 w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
321 w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
322 w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
323 w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
324 w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
325 w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
326 w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
327 w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
328 w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
329 wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
330 wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
331 wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
332 wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
333 we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
334 wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
337 #define ROUND512_STEP(i) \
339 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
340 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
341 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
342 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
343 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
344 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
345 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
346 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
347 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
348 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
349 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
350 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
351 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
352 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
353 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
354 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
362 for (int i = 16; i < 80; i += 16)
364 ROUND512_EXPAND (); ROUND512_STEP (i);
377 __constant u32 te0[256] =
379 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
380 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
381 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
382 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
383 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
384 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
385 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
386 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
387 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
388 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
389 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
390 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
391 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
392 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
393 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
394 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
395 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
396 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
397 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
398 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
399 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
400 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
401 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
402 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
403 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
404 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
405 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
406 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
407 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
408 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
409 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
410 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
411 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
412 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
413 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
414 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
415 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
416 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
417 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
418 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
419 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
420 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
421 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
422 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
423 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
424 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
425 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
426 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
427 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
428 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
429 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
430 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
431 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
432 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
433 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
434 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
435 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
436 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
437 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
438 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
439 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
440 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
441 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
442 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
445 __constant u32 te1[256] =
447 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
448 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
449 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
450 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
451 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
452 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
453 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
454 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
455 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
456 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
457 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
458 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
459 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
460 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
461 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
462 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
463 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
464 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
465 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
466 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
467 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
468 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
469 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
470 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
471 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
472 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
473 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
474 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
475 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
476 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
477 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
478 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
479 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
480 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
481 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
482 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
483 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
484 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
485 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
486 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
487 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
488 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
489 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
490 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
491 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
492 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
493 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
494 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
495 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
496 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
497 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
498 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
499 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
500 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
501 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
502 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
503 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
504 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
505 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
506 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
507 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
508 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
509 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
510 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
513 __constant u32 te2[256] =
515 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
516 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
517 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
518 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
519 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
520 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
521 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
522 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
523 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
524 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
525 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
526 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
527 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
528 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
529 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
530 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
531 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
532 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
533 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
534 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
535 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
536 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
537 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
538 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
539 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
540 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
541 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
542 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
543 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
544 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
545 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
546 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
547 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
548 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
549 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
550 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
551 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
552 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
553 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
554 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
555 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
556 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
557 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
558 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
559 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
560 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
561 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
562 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
563 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
564 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
565 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
566 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
567 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
568 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
569 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
570 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
571 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
572 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
573 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
574 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
575 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
576 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
577 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
578 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
581 __constant u32 te3[256] =
583 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
584 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
585 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
586 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
587 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
588 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
589 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
590 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
591 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
592 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
593 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
594 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
595 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
596 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
597 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
598 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
599 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
600 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
601 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
602 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
603 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
604 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
605 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
606 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
607 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
608 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
609 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
610 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
611 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
612 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
613 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
614 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
615 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
616 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
617 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
618 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
619 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
620 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
621 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
622 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
623 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
624 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
625 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
626 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
627 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
628 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
629 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
630 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
631 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
632 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
633 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
634 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
635 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
636 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
637 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
638 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
639 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
640 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
641 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
642 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
643 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
644 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
645 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
646 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
649 __constant u32 te4[256] =
651 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
652 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
653 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
654 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
655 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
656 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
657 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
658 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
659 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
660 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
661 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
662 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
663 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
664 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
665 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
666 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
667 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
668 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
669 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
670 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
671 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
672 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
673 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
674 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
675 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
676 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
677 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
678 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
679 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
680 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
681 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
682 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
683 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
684 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
685 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
686 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
687 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
688 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
689 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
690 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
691 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
692 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
693 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
694 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
695 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
696 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
697 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
698 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
699 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
700 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
701 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
702 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
703 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
704 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
705 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
706 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
707 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
708 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
709 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
710 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
711 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
712 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
713 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
714 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
717 __constant u32 rcon[] =
719 0x01000000, 0x02000000, 0x04000000, 0x08000000,
720 0x10000000, 0x20000000, 0x40000000, 0x80000000,
721 0x1b000000, 0x36000000,
724 void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
726 rek[0] = swap32 (userkey[0]);
727 rek[1] = swap32 (userkey[1]);
728 rek[2] = swap32 (userkey[2]);
729 rek[3] = swap32 (userkey[3]);
731 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
733 u32 temp = rek[j + 3];
735 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
736 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
737 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
738 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
740 rek[j + 4] = rek[j + 0]
744 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
745 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
746 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
750 void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
754 in_swap[0] = swap32 (in[0]);
755 in_swap[1] = swap32 (in[1]);
756 in_swap[2] = swap32 (in[2]);
757 in_swap[3] = swap32 (in[3]);
759 u32 s0 = in_swap[0] ^ rek[0];
760 u32 s1 = in_swap[1] ^ rek[1];
761 u32 s2 = in_swap[2] ^ rek[2];
762 u32 s3 = in_swap[3] ^ rek[3];
769 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
770 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
771 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
772 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
773 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
774 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
775 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
776 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
777 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
778 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
779 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
780 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
781 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
782 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
783 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
784 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
785 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
786 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
787 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
788 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
789 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
790 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
791 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
792 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
793 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
794 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
795 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
796 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
797 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
798 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
799 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
800 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
801 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
802 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
803 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
804 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
806 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
807 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
808 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
809 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
812 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
813 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
814 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
815 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
818 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
819 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
820 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
821 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
824 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
825 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
826 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
827 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
830 out[0] = swap32 (out[0]);
831 out[1] = swap32 (out[1]);
832 out[2] = swap32 (out[2]);
833 out[3] = swap32 (out[3]);
836 void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
841 block0[0] = append[0];
842 block0[1] = append[1];
846 block0[0] = block0[0] | append[0] << 8;
847 block0[1] = append[0] >> 24 | append[1] << 8;
848 block0[2] = append[1] >> 24;
852 block0[0] = block0[0] | append[0] << 16;
853 block0[1] = append[0] >> 16 | append[1] << 16;
854 block0[2] = append[1] >> 16;
858 block0[0] = block0[0] | append[0] << 24;
859 block0[1] = append[0] >> 8 | append[1] << 24;
860 block0[2] = append[1] >> 8;
864 block0[1] = append[0];
865 block0[2] = append[1];
869 block0[1] = block0[1] | append[0] << 8;
870 block0[2] = append[0] >> 24 | append[1] << 8;
871 block0[3] = append[1] >> 24;
875 block0[1] = block0[1] | append[0] << 16;
876 block0[2] = append[0] >> 16 | append[1] << 16;
877 block0[3] = append[1] >> 16;
881 block0[1] = block0[1] | append[0] << 24;
882 block0[2] = append[0] >> 8 | append[1] << 24;
883 block0[3] = append[1] >> 8;
887 block0[2] = append[0];
888 block0[3] = append[1];
892 block0[2] = block0[2] | append[0] << 8;
893 block0[3] = append[0] >> 24 | append[1] << 8;
894 block1[0] = append[1] >> 24;
898 block0[2] = block0[2] | append[0] << 16;
899 block0[3] = append[0] >> 16 | append[1] << 16;
900 block1[0] = append[1] >> 16;
904 block0[2] = block0[2] | append[0] << 24;
905 block0[3] = append[0] >> 8 | append[1] << 24;
906 block1[0] = append[1] >> 8;
910 block0[3] = append[0];
911 block1[0] = append[1];
915 block0[3] = block0[3] | append[0] << 8;
916 block1[0] = append[0] >> 24 | append[1] << 8;
917 block1[1] = append[1] >> 24;
921 block0[3] = block0[3] | append[0] << 16;
922 block1[0] = append[0] >> 16 | append[1] << 16;
923 block1[1] = append[1] >> 16;
927 block0[3] = block0[3] | append[0] << 24;
928 block1[0] = append[0] >> 8 | append[1] << 24;
929 block1[1] = append[1] >> 8;
933 block1[0] = append[0];
934 block1[1] = append[1];
938 block1[0] = block1[0] | append[0] << 8;
939 block1[1] = append[0] >> 24 | append[1] << 8;
940 block1[2] = append[1] >> 24;
944 block1[0] = block1[0] | append[0] << 16;
945 block1[1] = append[0] >> 16 | append[1] << 16;
946 block1[2] = append[1] >> 16;
950 block1[0] = block1[0] | append[0] << 24;
951 block1[1] = append[0] >> 8 | append[1] << 24;
952 block1[2] = append[1] >> 8;
956 block1[1] = append[0];
957 block1[2] = append[1];
961 block1[1] = block1[1] | append[0] << 8;
962 block1[2] = append[0] >> 24 | append[1] << 8;
963 block1[3] = append[1] >> 24;
967 block1[1] = block1[1] | append[0] << 16;
968 block1[2] = append[0] >> 16 | append[1] << 16;
969 block1[3] = append[1] >> 16;
973 block1[1] = block1[1] | append[0] << 24;
974 block1[2] = append[0] >> 8 | append[1] << 24;
975 block1[3] = append[1] >> 8;
979 block1[2] = append[0];
980 block1[3] = append[1];
984 block1[2] = block1[2] | append[0] << 8;
985 block1[3] = append[0] >> 24 | append[1] << 8;
986 block2[0] = append[1] >> 24;
990 block1[2] = block1[2] | append[0] << 16;
991 block1[3] = append[0] >> 16 | append[1] << 16;
992 block2[0] = append[1] >> 16;
996 block1[2] = block1[2] | append[0] << 24;
997 block1[3] = append[0] >> 8 | append[1] << 24;
998 block2[0] = append[1] >> 8;
1002 block1[3] = append[0];
1003 block2[0] = append[1];
1007 block1[3] = block1[3] | append[0] << 8;
1008 block2[0] = append[0] >> 24 | append[1] << 8;
1009 block2[1] = append[1] >> 24;
1013 block1[3] = block1[3] | append[0] << 16;
1014 block2[0] = append[0] >> 16 | append[1] << 16;
1015 block2[1] = append[1] >> 16;
1019 block1[3] = block1[3] | append[0] << 24;
1020 block2[0] = append[0] >> 8 | append[1] << 24;
1021 block2[1] = append[1] >> 8;
1025 block2[0] = append[0];
1026 block2[1] = append[1];
1030 block2[0] = block2[0] | append[0] << 8;
1031 block2[1] = append[0] >> 24 | append[1] << 8;
1032 block2[2] = append[1] >> 24;
1036 block2[0] = block2[0] | append[0] << 16;
1037 block2[1] = append[0] >> 16 | append[1] << 16;
1038 block2[2] = append[1] >> 16;
1042 block2[0] = block2[0] | append[0] << 24;
1043 block2[1] = append[0] >> 8 | append[1] << 24;
1044 block2[2] = append[1] >> 8;
1048 block2[1] = append[0];
1049 block2[2] = append[1];
1053 block2[1] = block2[1] | append[0] << 8;
1054 block2[2] = append[0] >> 24 | append[1] << 8;
1055 block2[3] = append[1] >> 24;
1059 block2[1] = block2[1] | append[0] << 16;
1060 block2[2] = append[0] >> 16 | append[1] << 16;
1061 block2[3] = append[1] >> 16;
1065 block2[1] = block2[1] | append[0] << 24;
1066 block2[2] = append[0] >> 8 | append[1] << 24;
1067 block2[3] = append[1] >> 8;
1071 block2[2] = append[0];
1072 block2[3] = append[1];
1076 block2[2] = block2[2] | append[0] << 8;
1077 block2[3] = append[0] >> 24 | append[1] << 8;
1078 block3[0] = append[1] >> 24;
1082 block2[2] = block2[2] | append[0] << 16;
1083 block2[3] = append[0] >> 16 | append[1] << 16;
1084 block3[0] = append[1] >> 16;
1088 block2[2] = block2[2] | append[0] << 24;
1089 block2[3] = append[0] >> 8 | append[1] << 24;
1090 block3[0] = append[1] >> 8;
1094 block2[3] = append[0];
1095 block3[0] = append[1];
1099 block2[3] = block2[3] | append[0] << 8;
1100 block3[0] = append[0] >> 24 | append[1] << 8;
1101 block3[1] = append[1] >> 24;
1105 block2[3] = block2[3] | append[0] << 16;
1106 block3[0] = append[0] >> 16 | append[1] << 16;
1107 block3[1] = append[1] >> 16;
1111 block2[3] = block2[3] | append[0] << 24;
1112 block3[0] = append[0] >> 8 | append[1] << 24;
1113 block3[1] = append[1] >> 8;
1117 block3[0] = append[0];
1118 block3[1] = append[1];
1122 block3[0] = block3[0] | append[0] << 8;
1123 block3[1] = append[0] >> 24 | append[1] << 8;
1124 block3[2] = append[1] >> 24;
1128 block3[0] = block3[0] | append[0] << 16;
1129 block3[1] = append[0] >> 16 | append[1] << 16;
1130 block3[2] = append[1] >> 16;
1134 block3[0] = block3[0] | append[0] << 24;
1135 block3[1] = append[0] >> 8 | append[1] << 24;
1136 block3[2] = append[1] >> 8;
1140 block3[1] = append[0];
1141 block3[2] = append[1];
1145 block3[1] = block3[1] | append[0] << 8;
1146 block3[2] = append[0] >> 24 | append[1] << 8;
1147 block3[3] = append[1] >> 24;
1151 block3[1] = block3[1] | append[0] << 16;
1152 block3[2] = append[0] >> 16 | append[1] << 16;
1153 block3[3] = append[1] >> 16;
1157 block3[1] = block3[1] | append[0] << 24;
1158 block3[2] = append[0] >> 8 | append[1] << 24;
1159 block3[3] = append[1] >> 8;
1163 block3[2] = append[0];
1164 block3[3] = append[1];
1169 #define AESSZ 16 // AES_BLOCK_SIZE
1175 #define WORDSZ256 64
1176 #define WORDSZ384 128
1177 #define WORDSZ512 128
1179 #define PWMAXSZ 32 // hashcat password length limit
1180 #define BLMAXSZ BLSZ512
1181 #define WORDMAXSZ WORDSZ512
1183 #define PWMAXSZ4 (PWMAXSZ / 4)
1184 #define BLMAXSZ4 (BLMAXSZ / 4)
1185 #define WORDMAXSZ4 (WORDMAXSZ / 4)
1186 #define AESSZ4 (AESSZ / 4)
1188 void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, const u32 bl_len)
1190 const u32 bd = bl_len / 4;
1192 const u32 pm = pw_len % 4;
1193 const u32 pd = pw_len / 4;
1199 for (u32 i = 0; i < pd; i++) sc[idx++] = pw[i];
1200 for (u32 i = 0; i < bd; i++) sc[idx++] = bl[i];
1201 for (u32 i = 0; i < 4; i++) sc[idx++] = sc[i];
1209 #if defined IS_AMD || defined IS_GENERIC
1210 for (i = 0; i < pd; i++) sc[idx++] = pw[i];
1212 | amd_bytealign (bl[0], 0, pm4);
1213 for (i = 1; i < bd; i++) sc[idx++] = amd_bytealign (bl[i], bl[i - 1], pm4);
1214 sc[idx++] = amd_bytealign (sc[0], bl[i - 1], pm4);
1215 for (i = 1; i < 4; i++) sc[idx++] = amd_bytealign (sc[i], sc[i - 1], pm4);
1216 sc[idx++] = amd_bytealign ( 0, sc[i - 1], pm4);
1220 int selector = (0x76543210 >> (pm4 * 4)) & 0xffff;
1222 for (i = 0; i < pd; i++) sc[idx++] = pw[i];
1224 | __byte_perm ( 0, bl[0], selector);
1225 for (i = 1; i < bd; i++) sc[idx++] = __byte_perm (bl[i - 1], bl[i], selector);
1226 sc[idx++] = __byte_perm (bl[i - 1], sc[0], selector);
1227 for (i = 1; i < 4; i++) sc[idx++] = __byte_perm (sc[i - 1], sc[i], selector);
1228 sc[idx++] = __byte_perm (sc[i - 1], 0, selector);
1233 void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, const u32 pwbl_len)
1235 const u32 m = offset % pwbl_len;
1237 const u32 om = m % 4;
1238 const u32 od = m / 4;
1240 #if defined IS_AMD || defined IS_GENERIC
1241 pt[0] = amd_bytealign (sc[od + 1], sc[od + 0], om);
1242 pt[1] = amd_bytealign (sc[od + 2], sc[od + 1], om);
1243 pt[2] = amd_bytealign (sc[od + 3], sc[od + 2], om);
1244 pt[3] = amd_bytealign (sc[od + 4], sc[od + 3], om);
1248 int selector = (0x76543210 >> (om * 4)) & 0xffff;
1250 pt[0] = __byte_perm (sc[od + 0], sc[od + 1], selector);
1251 pt[1] = __byte_perm (sc[od + 1], sc[od + 2], selector);
1252 pt[2] = __byte_perm (sc[od + 2], sc[od + 3], selector);
1253 pt[3] = __byte_perm (sc[od + 3], sc[od + 4], selector);
1257 void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, const u32 *sc, const u32 pwbl_len, u32 *iv, const u32 *rek, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
1259 for (u32 k = 0, wk = 0; k < W_len; k += AESSZ, wk += AESSZ4)
1263 make_pt_with_offset (pt, offset + k, sc, pwbl_len);
1270 AES128_encrypt (pt, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1272 ctx->W32[wk + 0] = iv[0];
1273 ctx->W32[wk + 1] = iv[1];
1274 ctx->W32[wk + 2] = iv[2];
1275 ctx->W32[wk + 3] = iv[3];
1279 u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
1281 // make scratch buffer
1283 u32 sc[PWMAXSZ4 + BLMAXSZ4 + AESSZ4];
1285 make_sc (sc, pw, pw_len, ctx->dgst32, ctx->dgst_len);
1287 // make sure pwbl_len is calculcated before it gets changed
1289 const u32 pwbl_len = pw_len + ctx->dgst_len;
1295 iv[0] = ctx->dgst32[4];
1296 iv[1] = ctx->dgst32[5];
1297 iv[2] = ctx->dgst32[6];
1298 iv[3] = ctx->dgst32[7];
1304 AES128_ExpandKey (ctx->dgst32, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1306 // first call is special as the hash depends on the result of it
1307 // but since we do not know about the outcome at this time
1308 // we must use the max
1310 make_w_with_offset (ctx, WORDMAXSZ, 0, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1312 // now we can find out hash to use
1316 for (u32 i = 0; i < 4; i++)
1318 sum += (ctx->W32[i] >> 24) & 0xff;
1319 sum += (ctx->W32[i] >> 16) & 0xff;
1320 sum += (ctx->W32[i] >> 8) & 0xff;
1321 sum += (ctx->W32[i] >> 0) & 0xff;
1328 case 0: ctx->dgst32[0] = SHA256M_A;
1329 ctx->dgst32[1] = SHA256M_B;
1330 ctx->dgst32[2] = SHA256M_C;
1331 ctx->dgst32[3] = SHA256M_D;
1332 ctx->dgst32[4] = SHA256M_E;
1333 ctx->dgst32[5] = SHA256M_F;
1334 ctx->dgst32[6] = SHA256M_G;
1335 ctx->dgst32[7] = SHA256M_H;
1336 ctx->dgst_len = BLSZ256;
1337 ctx->W_len = WORDSZ256;
1338 sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1339 sha256_transform (&ctx->W32[16], &ctx->W32[20], &ctx->W32[24], &ctx->W32[28], ctx->dgst32);
1341 case 1: ctx->dgst64[0] = SHA384M_A;
1342 ctx->dgst64[1] = SHA384M_B;
1343 ctx->dgst64[2] = SHA384M_C;
1344 ctx->dgst64[3] = SHA384M_D;
1345 ctx->dgst64[4] = SHA384M_E;
1346 ctx->dgst64[5] = SHA384M_F;
1347 ctx->dgst64[6] = SHA384M_G;
1348 ctx->dgst64[7] = SHA384M_H;
1349 ctx->dgst_len = BLSZ384;
1350 ctx->W_len = WORDSZ384;
1351 sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1353 case 2: ctx->dgst64[0] = SHA512M_A;
1354 ctx->dgst64[1] = SHA512M_B;
1355 ctx->dgst64[2] = SHA512M_C;
1356 ctx->dgst64[3] = SHA512M_D;
1357 ctx->dgst64[4] = SHA512M_E;
1358 ctx->dgst64[5] = SHA512M_F;
1359 ctx->dgst64[6] = SHA512M_G;
1360 ctx->dgst64[7] = SHA512M_H;
1361 ctx->dgst_len = BLSZ512;
1362 ctx->W_len = WORDSZ512;
1363 sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1369 const u32 final_len = pwbl_len * 64;
1371 const u32 iter_max = ctx->W_len - (ctx->W_len / 8);
1376 for (offset = WORDMAXSZ, left = final_len - offset; left >= iter_max; offset += ctx->W_len, left -= ctx->W_len)
1378 make_w_with_offset (ctx, ctx->W_len, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1380 switch (ctx->dgst_len)
1382 case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1384 case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1386 case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1395 switch (ctx->dgst_len)
1397 case BLSZ384: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1398 ctx->W64[ 8] = 0x80;
1405 ctx->W64[15] = swap64 ((u64) (final_len * 8));
1406 ex = ctx->W64[7] >> 56;
1408 case BLSZ512: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1409 ctx->W64[ 8] = 0x80;
1416 ctx->W64[15] = swap64 ((u64) (final_len * 8));
1417 ex = ctx->W64[7] >> 56;
1423 switch (ctx->dgst_len)
1425 case BLSZ256: ex = ctx->W32[15] >> 24;
1426 ctx->W32[ 0] = 0x80;
1441 ctx->W32[15] = swap32 (final_len * 8);
1443 case BLSZ384: ex = ctx->W64[15] >> 56;
1444 ctx->W64[ 0] = 0x80;
1459 ctx->W64[15] = swap64 ((u64) (final_len * 8));
1461 case BLSZ512: ex = ctx->W64[15] >> 56;
1462 ctx->W64[ 0] = 0x80;
1477 ctx->W64[15] = swap64 ((u64) (final_len * 8));
1482 switch (ctx->dgst_len)
1484 case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1485 ctx->dgst32[ 0] = swap32 (ctx->dgst32[0]);
1486 ctx->dgst32[ 1] = swap32 (ctx->dgst32[1]);
1487 ctx->dgst32[ 2] = swap32 (ctx->dgst32[2]);
1488 ctx->dgst32[ 3] = swap32 (ctx->dgst32[3]);
1489 ctx->dgst32[ 4] = swap32 (ctx->dgst32[4]);
1490 ctx->dgst32[ 5] = swap32 (ctx->dgst32[5]);
1491 ctx->dgst32[ 6] = swap32 (ctx->dgst32[6]);
1492 ctx->dgst32[ 7] = swap32 (ctx->dgst32[7]);
1493 ctx->dgst32[ 8] = 0;
1494 ctx->dgst32[ 9] = 0;
1495 ctx->dgst32[10] = 0;
1496 ctx->dgst32[11] = 0;
1497 ctx->dgst32[12] = 0;
1498 ctx->dgst32[13] = 0;
1499 ctx->dgst32[14] = 0;
1500 ctx->dgst32[15] = 0;
1502 case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1503 ctx->dgst64[0] = swap64 (ctx->dgst64[0]);
1504 ctx->dgst64[1] = swap64 (ctx->dgst64[1]);
1505 ctx->dgst64[2] = swap64 (ctx->dgst64[2]);
1506 ctx->dgst64[3] = swap64 (ctx->dgst64[3]);
1507 ctx->dgst64[4] = swap64 (ctx->dgst64[4]);
1508 ctx->dgst64[5] = swap64 (ctx->dgst64[5]);
1512 case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1513 ctx->dgst64[0] = swap64 (ctx->dgst64[0]);
1514 ctx->dgst64[1] = swap64 (ctx->dgst64[1]);
1515 ctx->dgst64[2] = swap64 (ctx->dgst64[2]);
1516 ctx->dgst64[3] = swap64 (ctx->dgst64[3]);
1517 ctx->dgst64[4] = swap64 (ctx->dgst64[4]);
1518 ctx->dgst64[5] = swap64 (ctx->dgst64[5]);
1519 ctx->dgst64[6] = swap64 (ctx->dgst64[6]);
1520 ctx->dgst64[7] = swap64 (ctx->dgst64[7]);
1527 __kernel void m10700_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1533 const u32 gid = get_global_id (0);
1535 if (gid >= gid_max) return;
1539 w0[0] = pws[gid].i[0];
1540 w0[1] = pws[gid].i[1];
1541 w0[2] = pws[gid].i[2];
1542 w0[3] = pws[gid].i[3];
1544 const u32 pw_len = pws[gid].pw_len;
1552 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1553 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1555 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1561 u32 block_len = pw_len;
1591 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1593 block_len += salt_len;
1595 append_0x80_2x4 (block0, block1, block_len);
1597 block3[3] = swap32 (block_len * 8);
1601 digest[0] = SHA256M_A;
1602 digest[1] = SHA256M_B;
1603 digest[2] = SHA256M_C;
1604 digest[3] = SHA256M_D;
1605 digest[4] = SHA256M_E;
1606 digest[5] = SHA256M_F;
1607 digest[6] = SHA256M_G;
1608 digest[7] = SHA256M_H;
1610 sha256_transform (block0, block1, block2, block3, digest);
1612 digest[0] = swap32 (digest[0]);
1613 digest[1] = swap32 (digest[1]);
1614 digest[2] = swap32 (digest[2]);
1615 digest[3] = swap32 (digest[3]);
1616 digest[4] = swap32 (digest[4]);
1617 digest[5] = swap32 (digest[5]);
1618 digest[6] = swap32 (digest[6]);
1619 digest[7] = swap32 (digest[7]);
1621 tmps[gid].dgst32[0] = digest[0];
1622 tmps[gid].dgst32[1] = digest[1];
1623 tmps[gid].dgst32[2] = digest[2];
1624 tmps[gid].dgst32[3] = digest[3];
1625 tmps[gid].dgst32[4] = digest[4];
1626 tmps[gid].dgst32[5] = digest[5];
1627 tmps[gid].dgst32[6] = digest[6];
1628 tmps[gid].dgst32[7] = digest[7];
1629 tmps[gid].dgst_len = BLSZ256;
1630 tmps[gid].W_len = WORDSZ256;
1633 __kernel void m10700_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1639 const u32 gid = get_global_id (0);
1640 const u32 lid = get_local_id (0);
1641 const u32 lsz = get_local_size (0);
1647 __local u32 s_te0[256];
1648 __local u32 s_te1[256];
1649 __local u32 s_te2[256];
1650 __local u32 s_te3[256];
1651 __local u32 s_te4[256];
1653 for (u32 i = lid; i < 256; i += lsz)
1662 barrier (CLK_LOCAL_MEM_FENCE);
1664 if (gid >= gid_max) return;
1672 w0[0] = pws[gid].i[0];
1673 w0[1] = pws[gid].i[1];
1674 w0[2] = pws[gid].i[2];
1675 w0[3] = pws[gid].i[3];
1677 const u32 pw_len = pws[gid].pw_len;
1679 if (pw_len == 0) return;
1687 ctx.dgst64[0] = tmps[gid].dgst64[0];
1688 ctx.dgst64[1] = tmps[gid].dgst64[1];
1689 ctx.dgst64[2] = tmps[gid].dgst64[2];
1690 ctx.dgst64[3] = tmps[gid].dgst64[3];
1691 ctx.dgst64[4] = tmps[gid].dgst64[4];
1692 ctx.dgst64[5] = tmps[gid].dgst64[5];
1693 ctx.dgst64[6] = tmps[gid].dgst64[6];
1694 ctx.dgst64[7] = tmps[gid].dgst64[7];
1695 ctx.dgst_len = tmps[gid].dgst_len;
1696 ctx.W_len = tmps[gid].W_len;
1700 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1702 ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
1705 if ((loop_pos + loop_cnt) == 64)
1707 for (u32 i = 64; i < ex + 32; i++)
1709 ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
1713 tmps[gid].dgst64[0] = ctx.dgst64[0];
1714 tmps[gid].dgst64[1] = ctx.dgst64[1];
1715 tmps[gid].dgst64[2] = ctx.dgst64[2];
1716 tmps[gid].dgst64[3] = ctx.dgst64[3];
1717 tmps[gid].dgst64[4] = ctx.dgst64[4];
1718 tmps[gid].dgst64[5] = ctx.dgst64[5];
1719 tmps[gid].dgst64[6] = ctx.dgst64[6];
1720 tmps[gid].dgst64[7] = ctx.dgst64[7];
1721 tmps[gid].dgst_len = ctx.dgst_len;
1722 tmps[gid].W_len = ctx.W_len;
1725 __kernel void m10700_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1731 const u32 gid = get_global_id (0);
1733 if (gid >= gid_max) return;
1735 const u32 lid = get_local_id (0);
1741 const u32 r0 = swap32 (tmps[gid].dgst32[DGST_R0]);
1742 const u32 r1 = swap32 (tmps[gid].dgst32[DGST_R1]);
1743 const u32 r2 = swap32 (tmps[gid].dgst32[DGST_R2]);
1744 const u32 r3 = swap32 (tmps[gid].dgst32[DGST_R3]);