2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * Gabriele Gristina <matrix@hashcat.net>
10 #include "inc_vendor.cl"
11 #include "inc_hash_constants.h"
12 #include "inc_hash_functions.cl"
13 #include "inc_types.cl"
14 #include "inc_common.cl"
16 #define COMPARE_S "inc_comp_single.cl"
17 #define COMPARE_M "inc_comp_multi.cl"
39 __constant u32 k_sha256[64] =
41 SHA256C00, SHA256C01, SHA256C02, SHA256C03,
42 SHA256C04, SHA256C05, SHA256C06, SHA256C07,
43 SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
44 SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
45 SHA256C10, SHA256C11, SHA256C12, SHA256C13,
46 SHA256C14, SHA256C15, SHA256C16, SHA256C17,
47 SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
48 SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
49 SHA256C20, SHA256C21, SHA256C22, SHA256C23,
50 SHA256C24, SHA256C25, SHA256C26, SHA256C27,
51 SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
52 SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
53 SHA256C30, SHA256C31, SHA256C32, SHA256C33,
54 SHA256C34, SHA256C35, SHA256C36, SHA256C37,
55 SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
56 SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
59 void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8])
70 u32 w0_t = swap32 (w0[0]);
71 u32 w1_t = swap32 (w0[1]);
72 u32 w2_t = swap32 (w0[2]);
73 u32 w3_t = swap32 (w0[3]);
74 u32 w4_t = swap32 (w1[0]);
75 u32 w5_t = swap32 (w1[1]);
76 u32 w6_t = swap32 (w1[2]);
77 u32 w7_t = swap32 (w1[3]);
78 u32 w8_t = swap32 (w2[0]);
79 u32 w9_t = swap32 (w2[1]);
80 u32 wa_t = swap32 (w2[2]);
81 u32 wb_t = swap32 (w2[3]);
82 u32 wc_t = swap32 (w3[0]);
83 u32 wd_t = swap32 (w3[1]);
84 u32 we_t = swap32 (w3[2]);
85 u32 wf_t = swap32 (w3[3]);
87 #define ROUND256_EXPAND() \
89 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
90 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
91 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
92 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
93 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
94 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
95 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
96 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
97 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
98 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
99 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
100 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
101 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
102 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
103 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
104 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
107 #define ROUND256_STEP(i) \
109 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
110 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
111 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
112 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
113 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
114 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
115 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
116 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
117 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
118 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
119 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
120 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
121 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
122 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
123 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
124 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
132 for (int i = 16; i < 64; i += 16)
134 ROUND256_EXPAND (); ROUND256_STEP (i);
147 __constant u64 k_sha384[80] =
149 SHA384C00, SHA384C01, SHA384C02, SHA384C03,
150 SHA384C04, SHA384C05, SHA384C06, SHA384C07,
151 SHA384C08, SHA384C09, SHA384C0a, SHA384C0b,
152 SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f,
153 SHA384C10, SHA384C11, SHA384C12, SHA384C13,
154 SHA384C14, SHA384C15, SHA384C16, SHA384C17,
155 SHA384C18, SHA384C19, SHA384C1a, SHA384C1b,
156 SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f,
157 SHA384C20, SHA384C21, SHA384C22, SHA384C23,
158 SHA384C24, SHA384C25, SHA384C26, SHA384C27,
159 SHA384C28, SHA384C29, SHA384C2a, SHA384C2b,
160 SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f,
161 SHA384C30, SHA384C31, SHA384C32, SHA384C33,
162 SHA384C34, SHA384C35, SHA384C36, SHA384C37,
163 SHA384C38, SHA384C39, SHA384C3a, SHA384C3b,
164 SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f,
165 SHA384C40, SHA384C41, SHA384C42, SHA384C43,
166 SHA384C44, SHA384C45, SHA384C46, SHA384C47,
167 SHA384C48, SHA384C49, SHA384C4a, SHA384C4b,
168 SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f,
171 void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8])
182 u64 w0_t = swap64 (w0[0]);
183 u64 w1_t = swap64 (w0[1]);
184 u64 w2_t = swap64 (w0[2]);
185 u64 w3_t = swap64 (w0[3]);
186 u64 w4_t = swap64 (w1[0]);
187 u64 w5_t = swap64 (w1[1]);
188 u64 w6_t = swap64 (w1[2]);
189 u64 w7_t = swap64 (w1[3]);
190 u64 w8_t = swap64 (w2[0]);
191 u64 w9_t = swap64 (w2[1]);
192 u64 wa_t = swap64 (w2[2]);
193 u64 wb_t = swap64 (w2[3]);
194 u64 wc_t = swap64 (w3[0]);
195 u64 wd_t = swap64 (w3[1]);
196 u64 we_t = swap64 (w3[2]);
197 u64 wf_t = swap64 (w3[3]);
199 #define ROUND384_EXPAND() \
201 w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \
202 w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \
203 w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \
204 w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \
205 w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \
206 w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \
207 w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \
208 w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \
209 w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \
210 w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \
211 wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \
212 wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \
213 wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \
214 wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \
215 we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \
216 wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \
219 #define ROUND384_STEP(i) \
221 SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \
222 SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \
223 SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \
224 SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \
225 SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \
226 SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \
227 SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \
228 SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \
229 SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \
230 SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \
231 SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \
232 SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \
233 SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \
234 SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \
235 SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \
236 SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \
244 for (int i = 16; i < 80; i += 16)
246 ROUND384_EXPAND (); ROUND384_STEP (i);
259 __constant u64 k_sha512[80] =
261 SHA384C00, SHA384C01, SHA384C02, SHA384C03,
262 SHA384C04, SHA384C05, SHA384C06, SHA384C07,
263 SHA384C08, SHA384C09, SHA384C0a, SHA384C0b,
264 SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f,
265 SHA384C10, SHA384C11, SHA384C12, SHA384C13,
266 SHA384C14, SHA384C15, SHA384C16, SHA384C17,
267 SHA384C18, SHA384C19, SHA384C1a, SHA384C1b,
268 SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f,
269 SHA384C20, SHA384C21, SHA384C22, SHA384C23,
270 SHA384C24, SHA384C25, SHA384C26, SHA384C27,
271 SHA384C28, SHA384C29, SHA384C2a, SHA384C2b,
272 SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f,
273 SHA384C30, SHA384C31, SHA384C32, SHA384C33,
274 SHA384C34, SHA384C35, SHA384C36, SHA384C37,
275 SHA384C38, SHA384C39, SHA384C3a, SHA384C3b,
276 SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f,
277 SHA384C40, SHA384C41, SHA384C42, SHA384C43,
278 SHA384C44, SHA384C45, SHA384C46, SHA384C47,
279 SHA384C48, SHA384C49, SHA384C4a, SHA384C4b,
280 SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f,
283 void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8])
294 u64 w0_t = swap64 (w0[0]);
295 u64 w1_t = swap64 (w0[1]);
296 u64 w2_t = swap64 (w0[2]);
297 u64 w3_t = swap64 (w0[3]);
298 u64 w4_t = swap64 (w1[0]);
299 u64 w5_t = swap64 (w1[1]);
300 u64 w6_t = swap64 (w1[2]);
301 u64 w7_t = swap64 (w1[3]);
302 u64 w8_t = swap64 (w2[0]);
303 u64 w9_t = swap64 (w2[1]);
304 u64 wa_t = swap64 (w2[2]);
305 u64 wb_t = swap64 (w2[3]);
306 u64 wc_t = swap64 (w3[0]);
307 u64 wd_t = swap64 (w3[1]);
308 u64 we_t = swap64 (w3[2]);
309 u64 wf_t = swap64 (w3[3]);
311 #define ROUND512_EXPAND() \
313 w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
314 w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
315 w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
316 w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
317 w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
318 w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
319 w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
320 w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
321 w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
322 w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
323 wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
324 wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
325 wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
326 wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
327 we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
328 wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
331 #define ROUND512_STEP(i) \
333 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
334 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
335 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
336 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
337 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
338 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
339 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
340 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
341 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
342 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
343 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
344 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
345 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
346 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
347 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
348 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
356 for (int i = 16; i < 80; i += 16)
358 ROUND512_EXPAND (); ROUND512_STEP (i);
371 __constant u32 te0[256] =
373 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
374 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
375 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
376 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
377 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
378 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
379 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
380 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
381 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
382 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
383 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
384 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
385 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
386 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
387 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
388 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
389 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
390 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
391 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
392 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
393 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
394 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
395 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
396 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
397 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
398 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
399 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
400 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
401 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
402 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
403 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
404 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
405 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
406 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
407 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
408 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
409 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
410 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
411 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
412 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
413 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
414 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
415 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
416 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
417 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
418 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
419 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
420 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
421 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
422 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
423 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
424 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
425 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
426 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
427 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
428 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
429 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
430 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
431 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
432 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
433 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
434 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
435 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
436 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
439 __constant u32 te1[256] =
441 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
442 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
443 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
444 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
445 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
446 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
447 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
448 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
449 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
450 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
451 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
452 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
453 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
454 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
455 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
456 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
457 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
458 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
459 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
460 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
461 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
462 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
463 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
464 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
465 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
466 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
467 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
468 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
469 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
470 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
471 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
472 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
473 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
474 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
475 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
476 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
477 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
478 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
479 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
480 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
481 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
482 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
483 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
484 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
485 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
486 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
487 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
488 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
489 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
490 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
491 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
492 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
493 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
494 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
495 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
496 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
497 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
498 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
499 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
500 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
501 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
502 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
503 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
504 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
507 __constant u32 te2[256] =
509 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
510 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
511 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
512 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
513 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
514 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
515 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
516 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
517 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
518 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
519 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
520 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
521 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
522 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
523 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
524 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
525 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
526 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
527 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
528 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
529 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
530 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
531 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
532 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
533 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
534 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
535 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
536 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
537 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
538 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
539 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
540 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
541 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
542 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
543 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
544 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
545 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
546 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
547 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
548 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
549 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
550 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
551 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
552 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
553 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
554 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
555 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
556 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
557 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
558 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
559 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
560 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
561 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
562 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
563 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
564 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
565 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
566 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
567 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
568 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
569 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
570 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
571 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
572 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
575 __constant u32 te3[256] =
577 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
578 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
579 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
580 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
581 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
582 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
583 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
584 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
585 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
586 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
587 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
588 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
589 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
590 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
591 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
592 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
593 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
594 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
595 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
596 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
597 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
598 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
599 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
600 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
601 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
602 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
603 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
604 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
605 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
606 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
607 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
608 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
609 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
610 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
611 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
612 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
613 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
614 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
615 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
616 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
617 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
618 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
619 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
620 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
621 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
622 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
623 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
624 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
625 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
626 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
627 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
628 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
629 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
630 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
631 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
632 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
633 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
634 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
635 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
636 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
637 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
638 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
639 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
640 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
643 __constant u32 te4[256] =
645 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
646 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
647 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
648 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
649 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
650 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
651 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
652 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
653 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
654 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
655 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
656 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
657 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
658 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
659 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
660 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
661 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
662 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
663 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
664 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
665 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
666 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
667 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
668 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
669 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
670 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
671 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
672 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
673 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
674 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
675 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
676 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
677 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
678 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
679 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
680 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
681 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
682 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
683 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
684 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
685 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
686 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
687 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
688 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
689 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
690 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
691 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
692 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
693 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
694 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
695 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
696 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
697 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
698 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
699 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
700 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
701 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
702 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
703 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
704 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
705 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
706 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
707 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
708 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
711 __constant u32 rcon[] =
713 0x01000000, 0x02000000, 0x04000000, 0x08000000,
714 0x10000000, 0x20000000, 0x40000000, 0x80000000,
715 0x1b000000, 0x36000000,
718 void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
720 rek[0] = swap32 (userkey[0]);
721 rek[1] = swap32 (userkey[1]);
722 rek[2] = swap32 (userkey[2]);
723 rek[3] = swap32 (userkey[3]);
725 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
727 u32 temp = rek[j + 3];
729 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
730 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
731 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
732 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
734 rek[j + 4] = rek[j + 0]
738 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
739 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
740 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
744 void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
748 in_swap[0] = swap32 (in[0]);
749 in_swap[1] = swap32 (in[1]);
750 in_swap[2] = swap32 (in[2]);
751 in_swap[3] = swap32 (in[3]);
753 u32 s0 = in_swap[0] ^ rek[0];
754 u32 s1 = in_swap[1] ^ rek[1];
755 u32 s2 = in_swap[2] ^ rek[2];
756 u32 s3 = in_swap[3] ^ rek[3];
763 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
764 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
765 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
766 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
767 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
768 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
769 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
770 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
771 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
772 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
773 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
774 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
775 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
776 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
777 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
778 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
779 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
780 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
781 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
782 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
783 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
784 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
785 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
786 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
787 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
788 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
789 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
790 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
791 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
792 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
793 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
794 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
795 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
796 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
797 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
798 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
800 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
801 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
802 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
803 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
806 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
807 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
808 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
809 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
812 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
813 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
814 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
815 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
818 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
819 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
820 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
821 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
824 out[0] = swap32 (out[0]);
825 out[1] = swap32 (out[1]);
826 out[2] = swap32 (out[2]);
827 out[3] = swap32 (out[3]);
830 void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
835 block0[0] = append[0];
836 block0[1] = append[1];
840 block0[0] = block0[0] | append[0] << 8;
841 block0[1] = append[0] >> 24 | append[1] << 8;
842 block0[2] = append[1] >> 24;
846 block0[0] = block0[0] | append[0] << 16;
847 block0[1] = append[0] >> 16 | append[1] << 16;
848 block0[2] = append[1] >> 16;
852 block0[0] = block0[0] | append[0] << 24;
853 block0[1] = append[0] >> 8 | append[1] << 24;
854 block0[2] = append[1] >> 8;
858 block0[1] = append[0];
859 block0[2] = append[1];
863 block0[1] = block0[1] | append[0] << 8;
864 block0[2] = append[0] >> 24 | append[1] << 8;
865 block0[3] = append[1] >> 24;
869 block0[1] = block0[1] | append[0] << 16;
870 block0[2] = append[0] >> 16 | append[1] << 16;
871 block0[3] = append[1] >> 16;
875 block0[1] = block0[1] | append[0] << 24;
876 block0[2] = append[0] >> 8 | append[1] << 24;
877 block0[3] = append[1] >> 8;
881 block0[2] = append[0];
882 block0[3] = append[1];
886 block0[2] = block0[2] | append[0] << 8;
887 block0[3] = append[0] >> 24 | append[1] << 8;
888 block1[0] = append[1] >> 24;
892 block0[2] = block0[2] | append[0] << 16;
893 block0[3] = append[0] >> 16 | append[1] << 16;
894 block1[0] = append[1] >> 16;
898 block0[2] = block0[2] | append[0] << 24;
899 block0[3] = append[0] >> 8 | append[1] << 24;
900 block1[0] = append[1] >> 8;
904 block0[3] = append[0];
905 block1[0] = append[1];
909 block0[3] = block0[3] | append[0] << 8;
910 block1[0] = append[0] >> 24 | append[1] << 8;
911 block1[1] = append[1] >> 24;
915 block0[3] = block0[3] | append[0] << 16;
916 block1[0] = append[0] >> 16 | append[1] << 16;
917 block1[1] = append[1] >> 16;
921 block0[3] = block0[3] | append[0] << 24;
922 block1[0] = append[0] >> 8 | append[1] << 24;
923 block1[1] = append[1] >> 8;
927 block1[0] = append[0];
928 block1[1] = append[1];
932 block1[0] = block1[0] | append[0] << 8;
933 block1[1] = append[0] >> 24 | append[1] << 8;
934 block1[2] = append[1] >> 24;
938 block1[0] = block1[0] | append[0] << 16;
939 block1[1] = append[0] >> 16 | append[1] << 16;
940 block1[2] = append[1] >> 16;
944 block1[0] = block1[0] | append[0] << 24;
945 block1[1] = append[0] >> 8 | append[1] << 24;
946 block1[2] = append[1] >> 8;
950 block1[1] = append[0];
951 block1[2] = append[1];
955 block1[1] = block1[1] | append[0] << 8;
956 block1[2] = append[0] >> 24 | append[1] << 8;
957 block1[3] = append[1] >> 24;
961 block1[1] = block1[1] | append[0] << 16;
962 block1[2] = append[0] >> 16 | append[1] << 16;
963 block1[3] = append[1] >> 16;
967 block1[1] = block1[1] | append[0] << 24;
968 block1[2] = append[0] >> 8 | append[1] << 24;
969 block1[3] = append[1] >> 8;
973 block1[2] = append[0];
974 block1[3] = append[1];
978 block1[2] = block1[2] | append[0] << 8;
979 block1[3] = append[0] >> 24 | append[1] << 8;
980 block2[0] = append[1] >> 24;
984 block1[2] = block1[2] | append[0] << 16;
985 block1[3] = append[0] >> 16 | append[1] << 16;
986 block2[0] = append[1] >> 16;
990 block1[2] = block1[2] | append[0] << 24;
991 block1[3] = append[0] >> 8 | append[1] << 24;
992 block2[0] = append[1] >> 8;
996 block1[3] = append[0];
997 block2[0] = append[1];
1001 block1[3] = block1[3] | append[0] << 8;
1002 block2[0] = append[0] >> 24 | append[1] << 8;
1003 block2[1] = append[1] >> 24;
1007 block1[3] = block1[3] | append[0] << 16;
1008 block2[0] = append[0] >> 16 | append[1] << 16;
1009 block2[1] = append[1] >> 16;
1013 block1[3] = block1[3] | append[0] << 24;
1014 block2[0] = append[0] >> 8 | append[1] << 24;
1015 block2[1] = append[1] >> 8;
1019 block2[0] = append[0];
1020 block2[1] = append[1];
1024 block2[0] = block2[0] | append[0] << 8;
1025 block2[1] = append[0] >> 24 | append[1] << 8;
1026 block2[2] = append[1] >> 24;
1030 block2[0] = block2[0] | append[0] << 16;
1031 block2[1] = append[0] >> 16 | append[1] << 16;
1032 block2[2] = append[1] >> 16;
1036 block2[0] = block2[0] | append[0] << 24;
1037 block2[1] = append[0] >> 8 | append[1] << 24;
1038 block2[2] = append[1] >> 8;
1042 block2[1] = append[0];
1043 block2[2] = append[1];
1047 block2[1] = block2[1] | append[0] << 8;
1048 block2[2] = append[0] >> 24 | append[1] << 8;
1049 block2[3] = append[1] >> 24;
1053 block2[1] = block2[1] | append[0] << 16;
1054 block2[2] = append[0] >> 16 | append[1] << 16;
1055 block2[3] = append[1] >> 16;
1059 block2[1] = block2[1] | append[0] << 24;
1060 block2[2] = append[0] >> 8 | append[1] << 24;
1061 block2[3] = append[1] >> 8;
1065 block2[2] = append[0];
1066 block2[3] = append[1];
1070 block2[2] = block2[2] | append[0] << 8;
1071 block2[3] = append[0] >> 24 | append[1] << 8;
1072 block3[0] = append[1] >> 24;
1076 block2[2] = block2[2] | append[0] << 16;
1077 block2[3] = append[0] >> 16 | append[1] << 16;
1078 block3[0] = append[1] >> 16;
1082 block2[2] = block2[2] | append[0] << 24;
1083 block2[3] = append[0] >> 8 | append[1] << 24;
1084 block3[0] = append[1] >> 8;
1088 block2[3] = append[0];
1089 block3[0] = append[1];
1093 block2[3] = block2[3] | append[0] << 8;
1094 block3[0] = append[0] >> 24 | append[1] << 8;
1095 block3[1] = append[1] >> 24;
1099 block2[3] = block2[3] | append[0] << 16;
1100 block3[0] = append[0] >> 16 | append[1] << 16;
1101 block3[1] = append[1] >> 16;
1105 block2[3] = block2[3] | append[0] << 24;
1106 block3[0] = append[0] >> 8 | append[1] << 24;
1107 block3[1] = append[1] >> 8;
1111 block3[0] = append[0];
1112 block3[1] = append[1];
1116 block3[0] = block3[0] | append[0] << 8;
1117 block3[1] = append[0] >> 24 | append[1] << 8;
1118 block3[2] = append[1] >> 24;
1122 block3[0] = block3[0] | append[0] << 16;
1123 block3[1] = append[0] >> 16 | append[1] << 16;
1124 block3[2] = append[1] >> 16;
1128 block3[0] = block3[0] | append[0] << 24;
1129 block3[1] = append[0] >> 8 | append[1] << 24;
1130 block3[2] = append[1] >> 8;
1134 block3[1] = append[0];
1135 block3[2] = append[1];
1139 block3[1] = block3[1] | append[0] << 8;
1140 block3[2] = append[0] >> 24 | append[1] << 8;
1141 block3[3] = append[1] >> 24;
1145 block3[1] = block3[1] | append[0] << 16;
1146 block3[2] = append[0] >> 16 | append[1] << 16;
1147 block3[3] = append[1] >> 16;
1151 block3[1] = block3[1] | append[0] << 24;
1152 block3[2] = append[0] >> 8 | append[1] << 24;
1153 block3[3] = append[1] >> 8;
1157 block3[2] = append[0];
1158 block3[3] = append[1];
1163 #define AESSZ 16 // AES_BLOCK_SIZE
1169 #define WORDSZ256 64
1170 #define WORDSZ384 128
1171 #define WORDSZ512 128
1173 #define PWMAXSZ 32 // hashcat password length limit
1174 #define BLMAXSZ BLSZ512
1175 #define WORDMAXSZ WORDSZ512
1177 #define PWMAXSZ4 (PWMAXSZ / 4)
1178 #define BLMAXSZ4 (BLMAXSZ / 4)
1179 #define WORDMAXSZ4 (WORDMAXSZ / 4)
1180 #define AESSZ4 (AESSZ / 4)
1182 void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, const u32 bl_len)
1184 const u32 bd = bl_len / 4;
1186 const u32 pm = pw_len % 4;
1187 const u32 pd = pw_len / 4;
1193 for (u32 i = 0; i < pd; i++) sc[idx++] = pw[i];
1194 for (u32 i = 0; i < bd; i++) sc[idx++] = bl[i];
1195 for (u32 i = 0; i < 4; i++) sc[idx++] = sc[i];
1203 #if defined IS_AMD || defined IS_GENERIC
1204 for (i = 0; i < pd; i++) sc[idx++] = pw[i];
1206 | amd_bytealign (bl[0], 0, pm4);
1207 for (i = 1; i < bd; i++) sc[idx++] = amd_bytealign (bl[i], bl[i - 1], pm4);
1208 sc[idx++] = amd_bytealign (sc[0], bl[i - 1], pm4);
1209 for (i = 1; i < 4; i++) sc[idx++] = amd_bytealign (sc[i], sc[i - 1], pm4);
1210 sc[idx++] = amd_bytealign ( 0, sc[i - 1], pm4);
1214 int selector = (0x76543210 >> (pm4 * 4)) & 0xffff;
1216 for (i = 0; i < pd; i++) sc[idx++] = pw[i];
1218 | __byte_perm ( 0, bl[0], selector);
1219 for (i = 1; i < bd; i++) sc[idx++] = __byte_perm (bl[i - 1], bl[i], selector);
1220 sc[idx++] = __byte_perm (bl[i - 1], sc[0], selector);
1221 for (i = 1; i < 4; i++) sc[idx++] = __byte_perm (sc[i - 1], sc[i], selector);
1222 sc[idx++] = __byte_perm (sc[i - 1], 0, selector);
1227 void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, const u32 pwbl_len)
1229 const u32 m = offset % pwbl_len;
1231 const u32 om = m % 4;
1232 const u32 od = m / 4;
1234 #if defined IS_AMD || defined IS_GENERIC
1235 pt[0] = amd_bytealign (sc[od + 1], sc[od + 0], om);
1236 pt[1] = amd_bytealign (sc[od + 2], sc[od + 1], om);
1237 pt[2] = amd_bytealign (sc[od + 3], sc[od + 2], om);
1238 pt[3] = amd_bytealign (sc[od + 4], sc[od + 3], om);
1242 int selector = (0x76543210 >> (om * 4)) & 0xffff;
1244 pt[0] = __byte_perm (sc[od + 0], sc[od + 1], selector);
1245 pt[1] = __byte_perm (sc[od + 1], sc[od + 2], selector);
1246 pt[2] = __byte_perm (sc[od + 2], sc[od + 3], selector);
1247 pt[3] = __byte_perm (sc[od + 3], sc[od + 4], selector);
1251 void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, const u32 *sc, const u32 pwbl_len, u32 *iv, const u32 *rek, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
1253 for (u32 k = 0, wk = 0; k < W_len; k += AESSZ, wk += AESSZ4)
1257 make_pt_with_offset (pt, offset + k, sc, pwbl_len);
1264 AES128_encrypt (pt, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1266 ctx->W32[wk + 0] = iv[0];
1267 ctx->W32[wk + 1] = iv[1];
1268 ctx->W32[wk + 2] = iv[2];
1269 ctx->W32[wk + 3] = iv[3];
1273 u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
1275 // make scratch buffer
1277 u32 sc[PWMAXSZ4 + BLMAXSZ4 + AESSZ4];
1279 make_sc (sc, pw, pw_len, ctx->dgst32, ctx->dgst_len);
1281 // make sure pwbl_len is calculcated before it gets changed
1283 const u32 pwbl_len = pw_len + ctx->dgst_len;
1289 iv[0] = ctx->dgst32[4];
1290 iv[1] = ctx->dgst32[5];
1291 iv[2] = ctx->dgst32[6];
1292 iv[3] = ctx->dgst32[7];
1298 AES128_ExpandKey (ctx->dgst32, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1300 // first call is special as the hash depends on the result of it
1301 // but since we do not know about the outcome at this time
1302 // we must use the max
1304 make_w_with_offset (ctx, WORDMAXSZ, 0, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1306 // now we can find out hash to use
1310 for (u32 i = 0; i < 4; i++)
1312 sum += (ctx->W32[i] >> 24) & 0xff;
1313 sum += (ctx->W32[i] >> 16) & 0xff;
1314 sum += (ctx->W32[i] >> 8) & 0xff;
1315 sum += (ctx->W32[i] >> 0) & 0xff;
1322 case 0: ctx->dgst32[0] = SHA256M_A;
1323 ctx->dgst32[1] = SHA256M_B;
1324 ctx->dgst32[2] = SHA256M_C;
1325 ctx->dgst32[3] = SHA256M_D;
1326 ctx->dgst32[4] = SHA256M_E;
1327 ctx->dgst32[5] = SHA256M_F;
1328 ctx->dgst32[6] = SHA256M_G;
1329 ctx->dgst32[7] = SHA256M_H;
1330 ctx->dgst_len = BLSZ256;
1331 ctx->W_len = WORDSZ256;
1332 sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1333 sha256_transform (&ctx->W32[16], &ctx->W32[20], &ctx->W32[24], &ctx->W32[28], ctx->dgst32);
1335 case 1: ctx->dgst64[0] = SHA384M_A;
1336 ctx->dgst64[1] = SHA384M_B;
1337 ctx->dgst64[2] = SHA384M_C;
1338 ctx->dgst64[3] = SHA384M_D;
1339 ctx->dgst64[4] = SHA384M_E;
1340 ctx->dgst64[5] = SHA384M_F;
1341 ctx->dgst64[6] = SHA384M_G;
1342 ctx->dgst64[7] = SHA384M_H;
1343 ctx->dgst_len = BLSZ384;
1344 ctx->W_len = WORDSZ384;
1345 sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1347 case 2: ctx->dgst64[0] = SHA512M_A;
1348 ctx->dgst64[1] = SHA512M_B;
1349 ctx->dgst64[2] = SHA512M_C;
1350 ctx->dgst64[3] = SHA512M_D;
1351 ctx->dgst64[4] = SHA512M_E;
1352 ctx->dgst64[5] = SHA512M_F;
1353 ctx->dgst64[6] = SHA512M_G;
1354 ctx->dgst64[7] = SHA512M_H;
1355 ctx->dgst_len = BLSZ512;
1356 ctx->W_len = WORDSZ512;
1357 sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1363 const u32 final_len = pwbl_len * 64;
1365 const u32 iter_max = ctx->W_len - (ctx->W_len / 8);
1370 for (offset = WORDMAXSZ, left = final_len - offset; left >= iter_max; offset += ctx->W_len, left -= ctx->W_len)
1372 make_w_with_offset (ctx, ctx->W_len, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1374 switch (ctx->dgst_len)
1376 case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1378 case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1380 case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1389 switch (ctx->dgst_len)
1391 case BLSZ384: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1392 ctx->W64[ 8] = 0x80;
1399 ctx->W64[15] = swap64 ((u64) (final_len * 8));
1400 ex = ctx->W64[7] >> 56;
1402 case BLSZ512: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1403 ctx->W64[ 8] = 0x80;
1410 ctx->W64[15] = swap64 ((u64) (final_len * 8));
1411 ex = ctx->W64[7] >> 56;
1417 switch (ctx->dgst_len)
1419 case BLSZ256: ex = ctx->W32[15] >> 24;
1420 ctx->W32[ 0] = 0x80;
1435 ctx->W32[15] = swap32 (final_len * 8);
1437 case BLSZ384: ex = ctx->W64[15] >> 56;
1438 ctx->W64[ 0] = 0x80;
1453 ctx->W64[15] = swap64 ((u64) (final_len * 8));
1455 case BLSZ512: ex = ctx->W64[15] >> 56;
1456 ctx->W64[ 0] = 0x80;
1471 ctx->W64[15] = swap64 ((u64) (final_len * 8));
1476 switch (ctx->dgst_len)
1478 case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1479 ctx->dgst32[ 0] = swap32 (ctx->dgst32[0]);
1480 ctx->dgst32[ 1] = swap32 (ctx->dgst32[1]);
1481 ctx->dgst32[ 2] = swap32 (ctx->dgst32[2]);
1482 ctx->dgst32[ 3] = swap32 (ctx->dgst32[3]);
1483 ctx->dgst32[ 4] = swap32 (ctx->dgst32[4]);
1484 ctx->dgst32[ 5] = swap32 (ctx->dgst32[5]);
1485 ctx->dgst32[ 6] = swap32 (ctx->dgst32[6]);
1486 ctx->dgst32[ 7] = swap32 (ctx->dgst32[7]);
1487 ctx->dgst32[ 8] = 0;
1488 ctx->dgst32[ 9] = 0;
1489 ctx->dgst32[10] = 0;
1490 ctx->dgst32[11] = 0;
1491 ctx->dgst32[12] = 0;
1492 ctx->dgst32[13] = 0;
1493 ctx->dgst32[14] = 0;
1494 ctx->dgst32[15] = 0;
1496 case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1497 ctx->dgst64[0] = swap64 (ctx->dgst64[0]);
1498 ctx->dgst64[1] = swap64 (ctx->dgst64[1]);
1499 ctx->dgst64[2] = swap64 (ctx->dgst64[2]);
1500 ctx->dgst64[3] = swap64 (ctx->dgst64[3]);
1501 ctx->dgst64[4] = swap64 (ctx->dgst64[4]);
1502 ctx->dgst64[5] = swap64 (ctx->dgst64[5]);
1506 case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1507 ctx->dgst64[0] = swap64 (ctx->dgst64[0]);
1508 ctx->dgst64[1] = swap64 (ctx->dgst64[1]);
1509 ctx->dgst64[2] = swap64 (ctx->dgst64[2]);
1510 ctx->dgst64[3] = swap64 (ctx->dgst64[3]);
1511 ctx->dgst64[4] = swap64 (ctx->dgst64[4]);
1512 ctx->dgst64[5] = swap64 (ctx->dgst64[5]);
1513 ctx->dgst64[6] = swap64 (ctx->dgst64[6]);
1514 ctx->dgst64[7] = swap64 (ctx->dgst64[7]);
1521 __kernel void m10700_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1527 const u32 gid = get_global_id (0);
1529 if (gid >= gid_max) return;
1533 w0[0] = pws[gid].i[0];
1534 w0[1] = pws[gid].i[1];
1535 w0[2] = pws[gid].i[2];
1536 w0[3] = pws[gid].i[3];
1538 const u32 pw_len = pws[gid].pw_len;
1546 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1547 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1549 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1555 u32 block_len = pw_len;
1585 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1587 block_len += salt_len;
1589 append_0x80_2x4 (block0, block1, block_len);
1591 block3[3] = swap32 (block_len * 8);
1595 digest[0] = SHA256M_A;
1596 digest[1] = SHA256M_B;
1597 digest[2] = SHA256M_C;
1598 digest[3] = SHA256M_D;
1599 digest[4] = SHA256M_E;
1600 digest[5] = SHA256M_F;
1601 digest[6] = SHA256M_G;
1602 digest[7] = SHA256M_H;
1604 sha256_transform (block0, block1, block2, block3, digest);
1606 digest[0] = swap32 (digest[0]);
1607 digest[1] = swap32 (digest[1]);
1608 digest[2] = swap32 (digest[2]);
1609 digest[3] = swap32 (digest[3]);
1610 digest[4] = swap32 (digest[4]);
1611 digest[5] = swap32 (digest[5]);
1612 digest[6] = swap32 (digest[6]);
1613 digest[7] = swap32 (digest[7]);
1615 tmps[gid].dgst32[0] = digest[0];
1616 tmps[gid].dgst32[1] = digest[1];
1617 tmps[gid].dgst32[2] = digest[2];
1618 tmps[gid].dgst32[3] = digest[3];
1619 tmps[gid].dgst32[4] = digest[4];
1620 tmps[gid].dgst32[5] = digest[5];
1621 tmps[gid].dgst32[6] = digest[6];
1622 tmps[gid].dgst32[7] = digest[7];
1623 tmps[gid].dgst_len = BLSZ256;
1624 tmps[gid].W_len = WORDSZ256;
1627 __kernel void m10700_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1633 const u32 gid = get_global_id (0);
1634 const u32 lid = get_local_id (0);
1635 const u32 lsz = get_local_size (0);
1641 __local u32 s_te0[256];
1642 __local u32 s_te1[256];
1643 __local u32 s_te2[256];
1644 __local u32 s_te3[256];
1645 __local u32 s_te4[256];
1647 for (u32 i = lid; i < 256; i += lsz)
1656 barrier (CLK_LOCAL_MEM_FENCE);
1658 if (gid >= gid_max) return;
1666 w0[0] = pws[gid].i[0];
1667 w0[1] = pws[gid].i[1];
1668 w0[2] = pws[gid].i[2];
1669 w0[3] = pws[gid].i[3];
1671 const u32 pw_len = pws[gid].pw_len;
1673 if (pw_len == 0) return;
1681 ctx.dgst64[0] = tmps[gid].dgst64[0];
1682 ctx.dgst64[1] = tmps[gid].dgst64[1];
1683 ctx.dgst64[2] = tmps[gid].dgst64[2];
1684 ctx.dgst64[3] = tmps[gid].dgst64[3];
1685 ctx.dgst64[4] = tmps[gid].dgst64[4];
1686 ctx.dgst64[5] = tmps[gid].dgst64[5];
1687 ctx.dgst64[6] = tmps[gid].dgst64[6];
1688 ctx.dgst64[7] = tmps[gid].dgst64[7];
1689 ctx.dgst_len = tmps[gid].dgst_len;
1690 ctx.W_len = tmps[gid].W_len;
1694 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1696 ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
1699 if ((loop_pos + loop_cnt) == 64)
1701 for (u32 i = 64; i < ex + 32; i++)
1703 ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
1707 tmps[gid].dgst64[0] = ctx.dgst64[0];
1708 tmps[gid].dgst64[1] = ctx.dgst64[1];
1709 tmps[gid].dgst64[2] = ctx.dgst64[2];
1710 tmps[gid].dgst64[3] = ctx.dgst64[3];
1711 tmps[gid].dgst64[4] = ctx.dgst64[4];
1712 tmps[gid].dgst64[5] = ctx.dgst64[5];
1713 tmps[gid].dgst64[6] = ctx.dgst64[6];
1714 tmps[gid].dgst64[7] = ctx.dgst64[7];
1715 tmps[gid].dgst_len = ctx.dgst_len;
1716 tmps[gid].W_len = ctx.W_len;
1719 __kernel void m10700_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1725 const u32 gid = get_global_id (0);
1727 if (gid >= gid_max) return;
1729 const u32 lid = get_local_id (0);
1735 const u32 r0 = swap32 (tmps[gid].dgst32[DGST_R0]);
1736 const u32 r1 = swap32 (tmps[gid].dgst32[DGST_R1]);
1737 const u32 r2 = swap32 (tmps[gid].dgst32[DGST_R2]);
1738 const u32 r3 = swap32 (tmps[gid].dgst32[DGST_R3]);