2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
21 #define COMPARE_M "check_multi_vect1_comp4.c"
25 #define COMPARE_M "check_multi_vect4_comp4.c"
48 __constant u32 k_sha256[64] =
50 SHA256C00, SHA256C01, SHA256C02, SHA256C03,
51 SHA256C04, SHA256C05, SHA256C06, SHA256C07,
52 SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
53 SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
54 SHA256C10, SHA256C11, SHA256C12, SHA256C13,
55 SHA256C14, SHA256C15, SHA256C16, SHA256C17,
56 SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
57 SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
58 SHA256C20, SHA256C21, SHA256C22, SHA256C23,
59 SHA256C24, SHA256C25, SHA256C26, SHA256C27,
60 SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
61 SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
62 SHA256C30, SHA256C31, SHA256C32, SHA256C33,
63 SHA256C34, SHA256C35, SHA256C36, SHA256C37,
64 SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
65 SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
68 static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8])
79 u32 w0_t = swap_workaround (w0[0]);
80 u32 w1_t = swap_workaround (w0[1]);
81 u32 w2_t = swap_workaround (w0[2]);
82 u32 w3_t = swap_workaround (w0[3]);
83 u32 w4_t = swap_workaround (w1[0]);
84 u32 w5_t = swap_workaround (w1[1]);
85 u32 w6_t = swap_workaround (w1[2]);
86 u32 w7_t = swap_workaround (w1[3]);
87 u32 w8_t = swap_workaround (w2[0]);
88 u32 w9_t = swap_workaround (w2[1]);
89 u32 wa_t = swap_workaround (w2[2]);
90 u32 wb_t = swap_workaround (w2[3]);
91 u32 wc_t = swap_workaround (w3[0]);
92 u32 wd_t = swap_workaround (w3[1]);
93 u32 we_t = swap_workaround (w3[2]);
94 u32 wf_t = swap_workaround (w3[3]);
96 #define ROUND256_EXPAND() \
98 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
99 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
100 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
101 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
102 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
103 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
104 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
105 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
106 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
107 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
108 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
109 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
110 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
111 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
112 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
113 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
116 #define ROUND256_STEP(i) \
118 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
119 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
120 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
121 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
122 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
123 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
124 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
125 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
126 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
127 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
128 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
129 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
130 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
131 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
132 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
133 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
139 for (int i = 16; i < 64; i += 16)
141 ROUND256_EXPAND (); ROUND256_STEP (i);
154 __constant u64 k_sha384[80] =
156 SHA384C00, SHA384C01, SHA384C02, SHA384C03,
157 SHA384C04, SHA384C05, SHA384C06, SHA384C07,
158 SHA384C08, SHA384C09, SHA384C0a, SHA384C0b,
159 SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f,
160 SHA384C10, SHA384C11, SHA384C12, SHA384C13,
161 SHA384C14, SHA384C15, SHA384C16, SHA384C17,
162 SHA384C18, SHA384C19, SHA384C1a, SHA384C1b,
163 SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f,
164 SHA384C20, SHA384C21, SHA384C22, SHA384C23,
165 SHA384C24, SHA384C25, SHA384C26, SHA384C27,
166 SHA384C28, SHA384C29, SHA384C2a, SHA384C2b,
167 SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f,
168 SHA384C30, SHA384C31, SHA384C32, SHA384C33,
169 SHA384C34, SHA384C35, SHA384C36, SHA384C37,
170 SHA384C38, SHA384C39, SHA384C3a, SHA384C3b,
171 SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f,
172 SHA384C40, SHA384C41, SHA384C42, SHA384C43,
173 SHA384C44, SHA384C45, SHA384C46, SHA384C47,
174 SHA384C48, SHA384C49, SHA384C4a, SHA384C4b,
175 SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f,
178 static void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8])
189 u64 w0_t = swap_workaround (w0[0]);
190 u64 w1_t = swap_workaround (w0[1]);
191 u64 w2_t = swap_workaround (w0[2]);
192 u64 w3_t = swap_workaround (w0[3]);
193 u64 w4_t = swap_workaround (w1[0]);
194 u64 w5_t = swap_workaround (w1[1]);
195 u64 w6_t = swap_workaround (w1[2]);
196 u64 w7_t = swap_workaround (w1[3]);
197 u64 w8_t = swap_workaround (w2[0]);
198 u64 w9_t = swap_workaround (w2[1]);
199 u64 wa_t = swap_workaround (w2[2]);
200 u64 wb_t = swap_workaround (w2[3]);
201 u64 wc_t = swap_workaround (w3[0]);
202 u64 wd_t = swap_workaround (w3[1]);
203 u64 we_t = swap_workaround (w3[2]);
204 u64 wf_t = swap_workaround (w3[3]);
206 #define ROUND384_EXPAND() \
208 w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \
209 w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \
210 w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \
211 w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \
212 w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \
213 w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \
214 w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \
215 w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \
216 w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \
217 w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \
218 wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \
219 wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \
220 wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \
221 wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \
222 we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \
223 wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \
226 #define ROUND384_STEP(i) \
228 SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \
229 SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \
230 SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \
231 SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \
232 SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \
233 SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \
234 SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \
235 SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \
236 SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \
237 SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \
238 SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \
239 SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \
240 SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \
241 SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \
242 SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \
243 SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \
249 for (int i = 16; i < 80; i += 16)
251 ROUND384_EXPAND (); ROUND384_STEP (i);
264 __constant u64 k_sha512[80] =
266 SHA384C00, SHA384C01, SHA384C02, SHA384C03,
267 SHA384C04, SHA384C05, SHA384C06, SHA384C07,
268 SHA384C08, SHA384C09, SHA384C0a, SHA384C0b,
269 SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f,
270 SHA384C10, SHA384C11, SHA384C12, SHA384C13,
271 SHA384C14, SHA384C15, SHA384C16, SHA384C17,
272 SHA384C18, SHA384C19, SHA384C1a, SHA384C1b,
273 SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f,
274 SHA384C20, SHA384C21, SHA384C22, SHA384C23,
275 SHA384C24, SHA384C25, SHA384C26, SHA384C27,
276 SHA384C28, SHA384C29, SHA384C2a, SHA384C2b,
277 SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f,
278 SHA384C30, SHA384C31, SHA384C32, SHA384C33,
279 SHA384C34, SHA384C35, SHA384C36, SHA384C37,
280 SHA384C38, SHA384C39, SHA384C3a, SHA384C3b,
281 SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f,
282 SHA384C40, SHA384C41, SHA384C42, SHA384C43,
283 SHA384C44, SHA384C45, SHA384C46, SHA384C47,
284 SHA384C48, SHA384C49, SHA384C4a, SHA384C4b,
285 SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f,
288 static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8])
299 u64 w0_t = swap_workaround (w0[0]);
300 u64 w1_t = swap_workaround (w0[1]);
301 u64 w2_t = swap_workaround (w0[2]);
302 u64 w3_t = swap_workaround (w0[3]);
303 u64 w4_t = swap_workaround (w1[0]);
304 u64 w5_t = swap_workaround (w1[1]);
305 u64 w6_t = swap_workaround (w1[2]);
306 u64 w7_t = swap_workaround (w1[3]);
307 u64 w8_t = swap_workaround (w2[0]);
308 u64 w9_t = swap_workaround (w2[1]);
309 u64 wa_t = swap_workaround (w2[2]);
310 u64 wb_t = swap_workaround (w2[3]);
311 u64 wc_t = swap_workaround (w3[0]);
312 u64 wd_t = swap_workaround (w3[1]);
313 u64 we_t = swap_workaround (w3[2]);
314 u64 wf_t = swap_workaround (w3[3]);
316 #define ROUND512_EXPAND() \
318 w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
319 w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
320 w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
321 w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
322 w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
323 w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
324 w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
325 w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
326 w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
327 w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
328 wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
329 wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
330 wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
331 wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
332 we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
333 wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
336 #define ROUND512_STEP(i) \
338 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
339 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
340 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
341 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
342 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
343 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
344 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
345 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
346 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
347 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
348 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
349 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
350 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
351 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
352 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
353 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
359 for (int i = 16; i < 80; i += 16)
361 ROUND512_EXPAND (); ROUND512_STEP (i);
374 __constant u32 te0[256] =
376 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
377 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
378 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
379 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
380 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
381 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
382 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
383 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
384 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
385 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
386 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
387 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
388 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
389 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
390 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
391 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
392 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
393 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
394 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
395 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
396 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
397 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
398 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
399 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
400 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
401 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
402 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
403 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
404 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
405 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
406 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
407 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
408 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
409 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
410 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
411 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
412 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
413 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
414 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
415 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
416 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
417 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
418 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
419 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
420 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
421 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
422 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
423 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
424 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
425 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
426 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
427 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
428 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
429 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
430 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
431 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
432 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
433 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
434 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
435 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
436 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
437 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
438 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
439 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
442 __constant u32 te1[256] =
444 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
445 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
446 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
447 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
448 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
449 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
450 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
451 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
452 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
453 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
454 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
455 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
456 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
457 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
458 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
459 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
460 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
461 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
462 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
463 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
464 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
465 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
466 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
467 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
468 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
469 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
470 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
471 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
472 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
473 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
474 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
475 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
476 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
477 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
478 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
479 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
480 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
481 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
482 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
483 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
484 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
485 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
486 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
487 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
488 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
489 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
490 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
491 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
492 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
493 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
494 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
495 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
496 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
497 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
498 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
499 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
500 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
501 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
502 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
503 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
504 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
505 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
506 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
507 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
510 __constant u32 te2[256] =
512 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
513 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
514 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
515 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
516 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
517 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
518 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
519 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
520 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
521 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
522 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
523 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
524 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
525 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
526 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
527 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
528 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
529 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
530 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
531 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
532 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
533 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
534 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
535 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
536 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
537 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
538 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
539 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
540 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
541 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
542 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
543 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
544 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
545 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
546 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
547 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
548 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
549 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
550 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
551 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
552 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
553 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
554 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
555 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
556 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
557 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
558 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
559 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
560 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
561 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
562 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
563 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
564 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
565 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
566 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
567 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
568 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
569 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
570 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
571 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
572 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
573 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
574 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
575 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
578 __constant u32 te3[256] =
580 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
581 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
582 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
583 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
584 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
585 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
586 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
587 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
588 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
589 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
590 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
591 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
592 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
593 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
594 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
595 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
596 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
597 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
598 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
599 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
600 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
601 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
602 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
603 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
604 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
605 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
606 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
607 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
608 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
609 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
610 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
611 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
612 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
613 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
614 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
615 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
616 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
617 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
618 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
619 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
620 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
621 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
622 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
623 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
624 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
625 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
626 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
627 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
628 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
629 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
630 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
631 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
632 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
633 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
634 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
635 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
636 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
637 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
638 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
639 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
640 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
641 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
642 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
643 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
646 __constant u32 te4[256] =
648 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
649 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
650 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
651 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
652 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
653 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
654 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
655 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
656 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
657 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
658 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
659 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
660 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
661 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
662 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
663 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
664 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
665 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
666 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
667 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
668 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
669 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
670 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
671 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
672 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
673 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
674 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
675 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
676 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
677 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
678 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
679 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
680 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
681 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
682 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
683 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
684 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
685 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
686 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
687 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
688 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
689 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
690 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
691 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
692 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
693 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
694 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
695 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
696 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
697 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
698 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
699 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
700 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
701 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
702 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
703 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
704 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
705 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
706 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
707 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
708 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
709 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
710 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
711 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
714 __constant u32 rcon[] =
716 0x01000000, 0x02000000, 0x04000000, 0x08000000,
717 0x10000000, 0x20000000, 0x40000000, 0x80000000,
718 0x1b000000, 0x36000000,
721 static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
723 rek[0] = swap_workaround (userkey[0]);
724 rek[1] = swap_workaround (userkey[1]);
725 rek[2] = swap_workaround (userkey[2]);
726 rek[3] = swap_workaround (userkey[3]);
728 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
730 u32 temp = rek[j + 3];
732 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
733 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
734 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
735 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
737 rek[j + 4] = rek[j + 0]
741 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
742 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
743 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
747 static void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
751 in_swap[0] = swap_workaround (in[0]);
752 in_swap[1] = swap_workaround (in[1]);
753 in_swap[2] = swap_workaround (in[2]);
754 in_swap[3] = swap_workaround (in[3]);
756 u32 s0 = in_swap[0] ^ rek[0];
757 u32 s1 = in_swap[1] ^ rek[1];
758 u32 s2 = in_swap[2] ^ rek[2];
759 u32 s3 = in_swap[3] ^ rek[3];
766 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
767 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
768 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
769 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
770 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
771 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
772 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
773 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
774 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
775 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
776 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
777 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
778 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
779 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
780 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
781 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
782 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
783 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
784 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
785 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
786 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
787 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
788 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
789 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
790 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
791 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
792 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
793 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
794 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
795 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
796 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
797 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
798 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
799 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
800 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
801 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
803 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
804 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
805 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
806 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
809 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
810 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
811 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
812 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
815 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
816 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
817 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
818 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
821 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
822 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
823 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
824 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
827 out[0] = swap_workaround (out[0]);
828 out[1] = swap_workaround (out[1]);
829 out[2] = swap_workaround (out[2]);
830 out[3] = swap_workaround (out[3]);
833 static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
838 block0[0] = append[0];
839 block0[1] = append[1];
843 block0[0] = block0[0] | append[0] << 8;
844 block0[1] = append[0] >> 24 | append[1] << 8;
845 block0[2] = append[1] >> 24;
849 block0[0] = block0[0] | append[0] << 16;
850 block0[1] = append[0] >> 16 | append[1] << 16;
851 block0[2] = append[1] >> 16;
855 block0[0] = block0[0] | append[0] << 24;
856 block0[1] = append[0] >> 8 | append[1] << 24;
857 block0[2] = append[1] >> 8;
861 block0[1] = append[0];
862 block0[2] = append[1];
866 block0[1] = block0[1] | append[0] << 8;
867 block0[2] = append[0] >> 24 | append[1] << 8;
868 block0[3] = append[1] >> 24;
872 block0[1] = block0[1] | append[0] << 16;
873 block0[2] = append[0] >> 16 | append[1] << 16;
874 block0[3] = append[1] >> 16;
878 block0[1] = block0[1] | append[0] << 24;
879 block0[2] = append[0] >> 8 | append[1] << 24;
880 block0[3] = append[1] >> 8;
884 block0[2] = append[0];
885 block0[3] = append[1];
889 block0[2] = block0[2] | append[0] << 8;
890 block0[3] = append[0] >> 24 | append[1] << 8;
891 block1[0] = append[1] >> 24;
895 block0[2] = block0[2] | append[0] << 16;
896 block0[3] = append[0] >> 16 | append[1] << 16;
897 block1[0] = append[1] >> 16;
901 block0[2] = block0[2] | append[0] << 24;
902 block0[3] = append[0] >> 8 | append[1] << 24;
903 block1[0] = append[1] >> 8;
907 block0[3] = append[0];
908 block1[0] = append[1];
912 block0[3] = block0[3] | append[0] << 8;
913 block1[0] = append[0] >> 24 | append[1] << 8;
914 block1[1] = append[1] >> 24;
918 block0[3] = block0[3] | append[0] << 16;
919 block1[0] = append[0] >> 16 | append[1] << 16;
920 block1[1] = append[1] >> 16;
924 block0[3] = block0[3] | append[0] << 24;
925 block1[0] = append[0] >> 8 | append[1] << 24;
926 block1[1] = append[1] >> 8;
930 block1[0] = append[0];
931 block1[1] = append[1];
935 block1[0] = block1[0] | append[0] << 8;
936 block1[1] = append[0] >> 24 | append[1] << 8;
937 block1[2] = append[1] >> 24;
941 block1[0] = block1[0] | append[0] << 16;
942 block1[1] = append[0] >> 16 | append[1] << 16;
943 block1[2] = append[1] >> 16;
947 block1[0] = block1[0] | append[0] << 24;
948 block1[1] = append[0] >> 8 | append[1] << 24;
949 block1[2] = append[1] >> 8;
953 block1[1] = append[0];
954 block1[2] = append[1];
958 block1[1] = block1[1] | append[0] << 8;
959 block1[2] = append[0] >> 24 | append[1] << 8;
960 block1[3] = append[1] >> 24;
964 block1[1] = block1[1] | append[0] << 16;
965 block1[2] = append[0] >> 16 | append[1] << 16;
966 block1[3] = append[1] >> 16;
970 block1[1] = block1[1] | append[0] << 24;
971 block1[2] = append[0] >> 8 | append[1] << 24;
972 block1[3] = append[1] >> 8;
976 block1[2] = append[0];
977 block1[3] = append[1];
981 block1[2] = block1[2] | append[0] << 8;
982 block1[3] = append[0] >> 24 | append[1] << 8;
983 block2[0] = append[1] >> 24;
987 block1[2] = block1[2] | append[0] << 16;
988 block1[3] = append[0] >> 16 | append[1] << 16;
989 block2[0] = append[1] >> 16;
993 block1[2] = block1[2] | append[0] << 24;
994 block1[3] = append[0] >> 8 | append[1] << 24;
995 block2[0] = append[1] >> 8;
999 block1[3] = append[0];
1000 block2[0] = append[1];
1004 block1[3] = block1[3] | append[0] << 8;
1005 block2[0] = append[0] >> 24 | append[1] << 8;
1006 block2[1] = append[1] >> 24;
1010 block1[3] = block1[3] | append[0] << 16;
1011 block2[0] = append[0] >> 16 | append[1] << 16;
1012 block2[1] = append[1] >> 16;
1016 block1[3] = block1[3] | append[0] << 24;
1017 block2[0] = append[0] >> 8 | append[1] << 24;
1018 block2[1] = append[1] >> 8;
1022 block2[0] = append[0];
1023 block2[1] = append[1];
1027 block2[0] = block2[0] | append[0] << 8;
1028 block2[1] = append[0] >> 24 | append[1] << 8;
1029 block2[2] = append[1] >> 24;
1033 block2[0] = block2[0] | append[0] << 16;
1034 block2[1] = append[0] >> 16 | append[1] << 16;
1035 block2[2] = append[1] >> 16;
1039 block2[0] = block2[0] | append[0] << 24;
1040 block2[1] = append[0] >> 8 | append[1] << 24;
1041 block2[2] = append[1] >> 8;
1045 block2[1] = append[0];
1046 block2[2] = append[1];
1050 block2[1] = block2[1] | append[0] << 8;
1051 block2[2] = append[0] >> 24 | append[1] << 8;
1052 block2[3] = append[1] >> 24;
1056 block2[1] = block2[1] | append[0] << 16;
1057 block2[2] = append[0] >> 16 | append[1] << 16;
1058 block2[3] = append[1] >> 16;
1062 block2[1] = block2[1] | append[0] << 24;
1063 block2[2] = append[0] >> 8 | append[1] << 24;
1064 block2[3] = append[1] >> 8;
1068 block2[2] = append[0];
1069 block2[3] = append[1];
1073 block2[2] = block2[2] | append[0] << 8;
1074 block2[3] = append[0] >> 24 | append[1] << 8;
1075 block3[0] = append[1] >> 24;
1079 block2[2] = block2[2] | append[0] << 16;
1080 block2[3] = append[0] >> 16 | append[1] << 16;
1081 block3[0] = append[1] >> 16;
1085 block2[2] = block2[2] | append[0] << 24;
1086 block2[3] = append[0] >> 8 | append[1] << 24;
1087 block3[0] = append[1] >> 8;
1091 block2[3] = append[0];
1092 block3[0] = append[1];
1096 block2[3] = block2[3] | append[0] << 8;
1097 block3[0] = append[0] >> 24 | append[1] << 8;
1098 block3[1] = append[1] >> 24;
1102 block2[3] = block2[3] | append[0] << 16;
1103 block3[0] = append[0] >> 16 | append[1] << 16;
1104 block3[1] = append[1] >> 16;
1108 block2[3] = block2[3] | append[0] << 24;
1109 block3[0] = append[0] >> 8 | append[1] << 24;
1110 block3[1] = append[1] >> 8;
1114 block3[0] = append[0];
1115 block3[1] = append[1];
1119 block3[0] = block3[0] | append[0] << 8;
1120 block3[1] = append[0] >> 24 | append[1] << 8;
1121 block3[2] = append[1] >> 24;
1125 block3[0] = block3[0] | append[0] << 16;
1126 block3[1] = append[0] >> 16 | append[1] << 16;
1127 block3[2] = append[1] >> 16;
1131 block3[0] = block3[0] | append[0] << 24;
1132 block3[1] = append[0] >> 8 | append[1] << 24;
1133 block3[2] = append[1] >> 8;
1137 block3[1] = append[0];
1138 block3[2] = append[1];
1142 block3[1] = block3[1] | append[0] << 8;
1143 block3[2] = append[0] >> 24 | append[1] << 8;
1144 block3[3] = append[1] >> 24;
1148 block3[1] = block3[1] | append[0] << 16;
1149 block3[2] = append[0] >> 16 | append[1] << 16;
1150 block3[3] = append[1] >> 16;
1154 block3[1] = block3[1] | append[0] << 24;
1155 block3[2] = append[0] >> 8 | append[1] << 24;
1156 block3[3] = append[1] >> 8;
1160 block3[2] = append[0];
1161 block3[3] = append[1];
1166 #define AESSZ 16 // AES_BLOCK_SIZE
1172 #define WORDSZ256 64
1173 #define WORDSZ384 128
1174 #define WORDSZ512 128
1176 #define PWMAXSZ 32 // oclHashcat password length limit
1177 #define BLMAXSZ BLSZ512
1178 #define WORDMAXSZ WORDSZ512
1180 #define PWMAXSZ4 (PWMAXSZ / 4)
1181 #define BLMAXSZ4 (BLMAXSZ / 4)
1182 #define WORDMAXSZ4 (WORDMAXSZ / 4)
1183 #define AESSZ4 (AESSZ / 4)
1185 static void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, const u32 bl_len)
1187 const u32 bd = bl_len / 4;
1189 const u32 pm = pw_len % 4;
1190 const u32 pd = pw_len / 4;
1196 for (u32 i = 0; i < pd; i++) sc[idx++] = pw[i];
1197 for (u32 i = 0; i < bd; i++) sc[idx++] = bl[i];
1198 for (u32 i = 0; i < 4; i++) sc[idx++] = sc[i];
1206 for (i = 0; i < pd; i++) sc[idx++] = pw[i];
1208 | amd_bytealign (bl[0], 0, pm4);
1209 for (i = 1; i < bd; i++) sc[idx++] = amd_bytealign (bl[i], bl[i - 1], pm4);
1210 sc[idx++] = amd_bytealign (sc[0], bl[i - 1], pm4);
1211 for (i = 1; i < 4; i++) sc[idx++] = amd_bytealign (sc[i], sc[i - 1], pm4);
1212 sc[idx++] = amd_bytealign ( 0, sc[i - 1], pm4);
1216 static void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, const u32 pwbl_len)
1218 const u32 m = offset % pwbl_len;
1220 const u32 om = m % 4;
1221 const u32 od = m / 4;
1223 pt[0] = amd_bytealign (sc[od + 1], sc[od + 0], om);
1224 pt[1] = amd_bytealign (sc[od + 2], sc[od + 1], om);
1225 pt[2] = amd_bytealign (sc[od + 3], sc[od + 2], om);
1226 pt[3] = amd_bytealign (sc[od + 4], sc[od + 3], om);
1229 static void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, const u32 *sc, const u32 pwbl_len, u32 *iv, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
1231 for (u32 k = 0, wk = 0; k < W_len; k += AESSZ, wk += AESSZ4)
1235 make_pt_with_offset (pt, offset + k, sc, pwbl_len);
1242 AES128_encrypt (pt, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1244 ctx->W32[wk + 0] = iv[0];
1245 ctx->W32[wk + 1] = iv[1];
1246 ctx->W32[wk + 2] = iv[2];
1247 ctx->W32[wk + 3] = iv[3];
1251 static u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
1253 // make scratch buffer
1255 u32 sc[PWMAXSZ4 + BLMAXSZ4 + AESSZ4];
1257 make_sc (sc, pw, pw_len, ctx->dgst32, ctx->dgst_len);
1259 // make sure pwbl_len is calculcated before it gets changed
1261 const u32 pwbl_len = pw_len + ctx->dgst_len;
1267 iv[0] = ctx->dgst32[4];
1268 iv[1] = ctx->dgst32[5];
1269 iv[2] = ctx->dgst32[6];
1270 iv[3] = ctx->dgst32[7];
1276 AES128_ExpandKey (ctx->dgst32, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1278 // first call is special as the hash depends on the result of it
1279 // but since we do not know about the outcome at this time
1280 // we must use the max
1282 make_w_with_offset (ctx, WORDMAXSZ, 0, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1284 // now we can find out hash to use
1288 for (u32 i = 0; i < 4; i++)
1290 sum += (ctx->W32[i] >> 24) & 0xff;
1291 sum += (ctx->W32[i] >> 16) & 0xff;
1292 sum += (ctx->W32[i] >> 8) & 0xff;
1293 sum += (ctx->W32[i] >> 0) & 0xff;
1300 case 0: ctx->dgst32[0] = SHA256M_A;
1301 ctx->dgst32[1] = SHA256M_B;
1302 ctx->dgst32[2] = SHA256M_C;
1303 ctx->dgst32[3] = SHA256M_D;
1304 ctx->dgst32[4] = SHA256M_E;
1305 ctx->dgst32[5] = SHA256M_F;
1306 ctx->dgst32[6] = SHA256M_G;
1307 ctx->dgst32[7] = SHA256M_H;
1308 ctx->dgst_len = BLSZ256;
1309 ctx->W_len = WORDSZ256;
1310 sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1311 sha256_transform (&ctx->W32[16], &ctx->W32[20], &ctx->W32[24], &ctx->W32[28], ctx->dgst32);
1313 case 1: ctx->dgst64[0] = SHA384M_A;
1314 ctx->dgst64[1] = SHA384M_B;
1315 ctx->dgst64[2] = SHA384M_C;
1316 ctx->dgst64[3] = SHA384M_D;
1317 ctx->dgst64[4] = SHA384M_E;
1318 ctx->dgst64[5] = SHA384M_F;
1319 ctx->dgst64[6] = SHA384M_G;
1320 ctx->dgst64[7] = SHA384M_H;
1321 ctx->dgst_len = BLSZ384;
1322 ctx->W_len = WORDSZ384;
1323 sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1325 case 2: ctx->dgst64[0] = SHA512M_A;
1326 ctx->dgst64[1] = SHA512M_B;
1327 ctx->dgst64[2] = SHA512M_C;
1328 ctx->dgst64[3] = SHA512M_D;
1329 ctx->dgst64[4] = SHA512M_E;
1330 ctx->dgst64[5] = SHA512M_F;
1331 ctx->dgst64[6] = SHA512M_G;
1332 ctx->dgst64[7] = SHA512M_H;
1333 ctx->dgst_len = BLSZ512;
1334 ctx->W_len = WORDSZ512;
1335 sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1341 const u32 final_len = pwbl_len * 64;
1343 const u32 iter_max = ctx->W_len - (ctx->W_len / 8);
1348 for (offset = WORDMAXSZ, left = final_len - offset; left >= iter_max; offset += ctx->W_len, left -= ctx->W_len)
1350 make_w_with_offset (ctx, ctx->W_len, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1352 switch (ctx->dgst_len)
1354 case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1356 case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1358 case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1367 switch (ctx->dgst_len)
1369 case BLSZ384: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1370 ctx->W64[ 8] = 0x80;
1377 ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
1378 ex = ctx->W64[7] >> 56;
1380 case BLSZ512: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1381 ctx->W64[ 8] = 0x80;
1388 ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
1389 ex = ctx->W64[7] >> 56;
1395 switch (ctx->dgst_len)
1397 case BLSZ256: ex = ctx->W32[15] >> 24;
1398 ctx->W32[ 0] = 0x80;
1413 ctx->W32[15] = swap_workaround (final_len * 8);
1415 case BLSZ384: ex = ctx->W64[15] >> 56;
1416 ctx->W64[ 0] = 0x80;
1431 ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
1433 case BLSZ512: ex = ctx->W64[15] >> 56;
1434 ctx->W64[ 0] = 0x80;
1449 ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
1454 switch (ctx->dgst_len)
1456 case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1457 ctx->dgst32[ 0] = swap_workaround (ctx->dgst32[0]);
1458 ctx->dgst32[ 1] = swap_workaround (ctx->dgst32[1]);
1459 ctx->dgst32[ 2] = swap_workaround (ctx->dgst32[2]);
1460 ctx->dgst32[ 3] = swap_workaround (ctx->dgst32[3]);
1461 ctx->dgst32[ 4] = swap_workaround (ctx->dgst32[4]);
1462 ctx->dgst32[ 5] = swap_workaround (ctx->dgst32[5]);
1463 ctx->dgst32[ 6] = swap_workaround (ctx->dgst32[6]);
1464 ctx->dgst32[ 7] = swap_workaround (ctx->dgst32[7]);
1465 ctx->dgst32[ 8] = 0;
1466 ctx->dgst32[ 9] = 0;
1467 ctx->dgst32[10] = 0;
1468 ctx->dgst32[11] = 0;
1469 ctx->dgst32[12] = 0;
1470 ctx->dgst32[13] = 0;
1471 ctx->dgst32[14] = 0;
1472 ctx->dgst32[15] = 0;
1474 case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1475 ctx->dgst64[0] = swap_workaround (ctx->dgst64[0]);
1476 ctx->dgst64[1] = swap_workaround (ctx->dgst64[1]);
1477 ctx->dgst64[2] = swap_workaround (ctx->dgst64[2]);
1478 ctx->dgst64[3] = swap_workaround (ctx->dgst64[3]);
1479 ctx->dgst64[4] = swap_workaround (ctx->dgst64[4]);
1480 ctx->dgst64[5] = swap_workaround (ctx->dgst64[5]);
1484 case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1485 ctx->dgst64[0] = swap_workaround (ctx->dgst64[0]);
1486 ctx->dgst64[1] = swap_workaround (ctx->dgst64[1]);
1487 ctx->dgst64[2] = swap_workaround (ctx->dgst64[2]);
1488 ctx->dgst64[3] = swap_workaround (ctx->dgst64[3]);
1489 ctx->dgst64[4] = swap_workaround (ctx->dgst64[4]);
1490 ctx->dgst64[5] = swap_workaround (ctx->dgst64[5]);
1491 ctx->dgst64[6] = swap_workaround (ctx->dgst64[6]);
1492 ctx->dgst64[7] = swap_workaround (ctx->dgst64[7]);
1499 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1505 const u32 gid = get_global_id (0);
1507 if (gid >= gid_max) return;
1511 w0[0] = pws[gid].i[0];
1512 w0[1] = pws[gid].i[1];
1513 w0[2] = pws[gid].i[2];
1514 w0[3] = pws[gid].i[3];
1516 const u32 pw_len = pws[gid].pw_len;
1524 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1525 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1527 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1533 u32 block_len = pw_len;
1563 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1565 block_len += salt_len;
1567 append_0x80_2x4 (block0, block1, block_len);
1569 block3[3] = swap_workaround (block_len * 8);
1573 digest[0] = SHA256M_A;
1574 digest[1] = SHA256M_B;
1575 digest[2] = SHA256M_C;
1576 digest[3] = SHA256M_D;
1577 digest[4] = SHA256M_E;
1578 digest[5] = SHA256M_F;
1579 digest[6] = SHA256M_G;
1580 digest[7] = SHA256M_H;
1582 sha256_transform (block0, block1, block2, block3, digest);
1584 digest[0] = swap_workaround (digest[0]);
1585 digest[1] = swap_workaround (digest[1]);
1586 digest[2] = swap_workaround (digest[2]);
1587 digest[3] = swap_workaround (digest[3]);
1588 digest[4] = swap_workaround (digest[4]);
1589 digest[5] = swap_workaround (digest[5]);
1590 digest[6] = swap_workaround (digest[6]);
1591 digest[7] = swap_workaround (digest[7]);
1593 tmps[gid].dgst32[0] = digest[0];
1594 tmps[gid].dgst32[1] = digest[1];
1595 tmps[gid].dgst32[2] = digest[2];
1596 tmps[gid].dgst32[3] = digest[3];
1597 tmps[gid].dgst32[4] = digest[4];
1598 tmps[gid].dgst32[5] = digest[5];
1599 tmps[gid].dgst32[6] = digest[6];
1600 tmps[gid].dgst32[7] = digest[7];
1601 tmps[gid].dgst_len = BLSZ256;
1602 tmps[gid].W_len = WORDSZ256;
1605 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1607 const u32 gid = get_global_id (0);
1608 const u32 lid = get_local_id (0);
1614 const u32 lid4 = lid * 4;
1616 __local u32 s_te0[256];
1617 __local u32 s_te1[256];
1618 __local u32 s_te2[256];
1619 __local u32 s_te3[256];
1620 __local u32 s_te4[256];
1622 s_te0[lid4 + 0] = te0[lid4 + 0];
1623 s_te0[lid4 + 1] = te0[lid4 + 1];
1624 s_te0[lid4 + 2] = te0[lid4 + 2];
1625 s_te0[lid4 + 3] = te0[lid4 + 3];
1627 s_te1[lid4 + 0] = te1[lid4 + 0];
1628 s_te1[lid4 + 1] = te1[lid4 + 1];
1629 s_te1[lid4 + 2] = te1[lid4 + 2];
1630 s_te1[lid4 + 3] = te1[lid4 + 3];
1632 s_te2[lid4 + 0] = te2[lid4 + 0];
1633 s_te2[lid4 + 1] = te2[lid4 + 1];
1634 s_te2[lid4 + 2] = te2[lid4 + 2];
1635 s_te2[lid4 + 3] = te2[lid4 + 3];
1637 s_te3[lid4 + 0] = te3[lid4 + 0];
1638 s_te3[lid4 + 1] = te3[lid4 + 1];
1639 s_te3[lid4 + 2] = te3[lid4 + 2];
1640 s_te3[lid4 + 3] = te3[lid4 + 3];
1642 s_te4[lid4 + 0] = te4[lid4 + 0];
1643 s_te4[lid4 + 1] = te4[lid4 + 1];
1644 s_te4[lid4 + 2] = te4[lid4 + 2];
1645 s_te4[lid4 + 3] = te4[lid4 + 3];
1647 barrier (CLK_LOCAL_MEM_FENCE);
1649 if (gid >= gid_max) return;
1657 w0[0] = pws[gid].i[0];
1658 w0[1] = pws[gid].i[1];
1659 w0[2] = pws[gid].i[2];
1660 w0[3] = pws[gid].i[3];
1662 const u32 pw_len = pws[gid].pw_len;
1670 ctx.dgst64[0] = tmps[gid].dgst64[0];
1671 ctx.dgst64[1] = tmps[gid].dgst64[1];
1672 ctx.dgst64[2] = tmps[gid].dgst64[2];
1673 ctx.dgst64[3] = tmps[gid].dgst64[3];
1674 ctx.dgst64[4] = tmps[gid].dgst64[4];
1675 ctx.dgst64[5] = tmps[gid].dgst64[5];
1676 ctx.dgst64[6] = tmps[gid].dgst64[6];
1677 ctx.dgst64[7] = tmps[gid].dgst64[7];
1678 ctx.dgst_len = tmps[gid].dgst_len;
1679 ctx.W_len = tmps[gid].W_len;
1683 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1685 ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
1688 if ((loop_pos + loop_cnt) == 64)
1690 for (u32 i = 64; i < ex + 32; i++)
1692 ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
1696 tmps[gid].dgst64[0] = ctx.dgst64[0];
1697 tmps[gid].dgst64[1] = ctx.dgst64[1];
1698 tmps[gid].dgst64[2] = ctx.dgst64[2];
1699 tmps[gid].dgst64[3] = ctx.dgst64[3];
1700 tmps[gid].dgst64[4] = ctx.dgst64[4];
1701 tmps[gid].dgst64[5] = ctx.dgst64[5];
1702 tmps[gid].dgst64[6] = ctx.dgst64[6];
1703 tmps[gid].dgst64[7] = ctx.dgst64[7];
1704 tmps[gid].dgst_len = ctx.dgst_len;
1705 tmps[gid].W_len = ctx.W_len;
1708 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1714 const u32 gid = get_global_id (0);
1716 if (gid >= gid_max) return;
1718 const u32 lid = get_local_id (0);
1724 const u32 r0 = swap_workaround (tmps[gid].dgst32[DGST_R0]);
1725 const u32 r1 = swap_workaround (tmps[gid].dgst32[DGST_R1]);
1726 const u32 r2 = swap_workaround (tmps[gid].dgst32[DGST_R2]);
1727 const u32 r3 = swap_workaround (tmps[gid].dgst32[DGST_R3]);