2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
60 __constant u32 k_sha256[64] =
62 SHA256C00, SHA256C01, SHA256C02, SHA256C03,
63 SHA256C04, SHA256C05, SHA256C06, SHA256C07,
64 SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
65 SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
66 SHA256C10, SHA256C11, SHA256C12, SHA256C13,
67 SHA256C14, SHA256C15, SHA256C16, SHA256C17,
68 SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
69 SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
70 SHA256C20, SHA256C21, SHA256C22, SHA256C23,
71 SHA256C24, SHA256C25, SHA256C26, SHA256C27,
72 SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
73 SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
74 SHA256C30, SHA256C31, SHA256C32, SHA256C33,
75 SHA256C34, SHA256C35, SHA256C36, SHA256C37,
76 SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
77 SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
80 static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8])
91 u32x w0_t = swap_workaround (w0[0]);
92 u32x w1_t = swap_workaround (w0[1]);
93 u32x w2_t = swap_workaround (w0[2]);
94 u32x w3_t = swap_workaround (w0[3]);
95 u32x w4_t = swap_workaround (w1[0]);
96 u32x w5_t = swap_workaround (w1[1]);
97 u32x w6_t = swap_workaround (w1[2]);
98 u32x w7_t = swap_workaround (w1[3]);
99 u32x w8_t = swap_workaround (w2[0]);
100 u32x w9_t = swap_workaround (w2[1]);
101 u32x wa_t = swap_workaround (w2[2]);
102 u32x wb_t = swap_workaround (w2[3]);
103 u32x wc_t = swap_workaround (w3[0]);
104 u32x wd_t = swap_workaround (w3[1]);
105 u32x we_t = swap_workaround (w3[2]);
106 u32x wf_t = swap_workaround (w3[3]);
108 #define ROUND256_EXPAND() \
110 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
111 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
112 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
113 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
114 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
115 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
116 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
117 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
118 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
119 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
120 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
121 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
122 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
123 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
124 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
125 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
128 #define ROUND256_STEP(i) \
130 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
131 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
132 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
133 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
134 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
135 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
136 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
137 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
138 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
139 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
140 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
141 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
142 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
143 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
144 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
145 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
151 for (int i = 16; i < 64; i += 16)
153 ROUND256_EXPAND (); ROUND256_STEP (i);
166 __constant u64 k_sha384[80] =
168 SHA384C00, SHA384C01, SHA384C02, SHA384C03,
169 SHA384C04, SHA384C05, SHA384C06, SHA384C07,
170 SHA384C08, SHA384C09, SHA384C0a, SHA384C0b,
171 SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f,
172 SHA384C10, SHA384C11, SHA384C12, SHA384C13,
173 SHA384C14, SHA384C15, SHA384C16, SHA384C17,
174 SHA384C18, SHA384C19, SHA384C1a, SHA384C1b,
175 SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f,
176 SHA384C20, SHA384C21, SHA384C22, SHA384C23,
177 SHA384C24, SHA384C25, SHA384C26, SHA384C27,
178 SHA384C28, SHA384C29, SHA384C2a, SHA384C2b,
179 SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f,
180 SHA384C30, SHA384C31, SHA384C32, SHA384C33,
181 SHA384C34, SHA384C35, SHA384C36, SHA384C37,
182 SHA384C38, SHA384C39, SHA384C3a, SHA384C3b,
183 SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f,
184 SHA384C40, SHA384C41, SHA384C42, SHA384C43,
185 SHA384C44, SHA384C45, SHA384C46, SHA384C47,
186 SHA384C48, SHA384C49, SHA384C4a, SHA384C4b,
187 SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f,
190 static void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64x digest[8])
201 u64x w0_t = swap_workaround (w0[0]);
202 u64x w1_t = swap_workaround (w0[1]);
203 u64x w2_t = swap_workaround (w0[2]);
204 u64x w3_t = swap_workaround (w0[3]);
205 u64x w4_t = swap_workaround (w1[0]);
206 u64x w5_t = swap_workaround (w1[1]);
207 u64x w6_t = swap_workaround (w1[2]);
208 u64x w7_t = swap_workaround (w1[3]);
209 u64x w8_t = swap_workaround (w2[0]);
210 u64x w9_t = swap_workaround (w2[1]);
211 u64x wa_t = swap_workaround (w2[2]);
212 u64x wb_t = swap_workaround (w2[3]);
213 u64x wc_t = swap_workaround (w3[0]);
214 u64x wd_t = swap_workaround (w3[1]);
215 u64x we_t = swap_workaround (w3[2]);
216 u64x wf_t = swap_workaround (w3[3]);
218 #define ROUND384_EXPAND() \
220 w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \
221 w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \
222 w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \
223 w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \
224 w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \
225 w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \
226 w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \
227 w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \
228 w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \
229 w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \
230 wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \
231 wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \
232 wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \
233 wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \
234 we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \
235 wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \
238 #define ROUND384_STEP(i) \
240 SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \
241 SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \
242 SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \
243 SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \
244 SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \
245 SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \
246 SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \
247 SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \
248 SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \
249 SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \
250 SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \
251 SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \
252 SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \
253 SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \
254 SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \
255 SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \
261 for (int i = 16; i < 80; i += 16)
263 ROUND384_EXPAND (); ROUND384_STEP (i);
276 __constant u64 k_sha512[80] =
278 SHA384C00, SHA384C01, SHA384C02, SHA384C03,
279 SHA384C04, SHA384C05, SHA384C06, SHA384C07,
280 SHA384C08, SHA384C09, SHA384C0a, SHA384C0b,
281 SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f,
282 SHA384C10, SHA384C11, SHA384C12, SHA384C13,
283 SHA384C14, SHA384C15, SHA384C16, SHA384C17,
284 SHA384C18, SHA384C19, SHA384C1a, SHA384C1b,
285 SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f,
286 SHA384C20, SHA384C21, SHA384C22, SHA384C23,
287 SHA384C24, SHA384C25, SHA384C26, SHA384C27,
288 SHA384C28, SHA384C29, SHA384C2a, SHA384C2b,
289 SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f,
290 SHA384C30, SHA384C31, SHA384C32, SHA384C33,
291 SHA384C34, SHA384C35, SHA384C36, SHA384C37,
292 SHA384C38, SHA384C39, SHA384C3a, SHA384C3b,
293 SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f,
294 SHA384C40, SHA384C41, SHA384C42, SHA384C43,
295 SHA384C44, SHA384C45, SHA384C46, SHA384C47,
296 SHA384C48, SHA384C49, SHA384C4a, SHA384C4b,
297 SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f,
300 static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8])
311 u64x w0_t = swap_workaround (w0[0]);
312 u64x w1_t = swap_workaround (w0[1]);
313 u64x w2_t = swap_workaround (w0[2]);
314 u64x w3_t = swap_workaround (w0[3]);
315 u64x w4_t = swap_workaround (w1[0]);
316 u64x w5_t = swap_workaround (w1[1]);
317 u64x w6_t = swap_workaround (w1[2]);
318 u64x w7_t = swap_workaround (w1[3]);
319 u64x w8_t = swap_workaround (w2[0]);
320 u64x w9_t = swap_workaround (w2[1]);
321 u64x wa_t = swap_workaround (w2[2]);
322 u64x wb_t = swap_workaround (w2[3]);
323 u64x wc_t = swap_workaround (w3[0]);
324 u64x wd_t = swap_workaround (w3[1]);
325 u64x we_t = swap_workaround (w3[2]);
326 u64x wf_t = swap_workaround (w3[3]);
328 #define ROUND512_EXPAND() \
330 w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
331 w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
332 w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
333 w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
334 w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
335 w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
336 w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
337 w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
338 w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
339 w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
340 wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
341 wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
342 wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
343 wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
344 we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
345 wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
348 #define ROUND512_STEP(i) \
350 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
351 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
352 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
353 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
354 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
355 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
356 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
357 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
358 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
359 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
360 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
361 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
362 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
363 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
364 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
365 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
371 for (int i = 16; i < 80; i += 16)
373 ROUND512_EXPAND (); ROUND512_STEP (i);
386 __constant u32 te0[256] =
388 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
389 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
390 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
391 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
392 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
393 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
394 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
395 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
396 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
397 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
398 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
399 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
400 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
401 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
402 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
403 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
404 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
405 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
406 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
407 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
408 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
409 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
410 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
411 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
412 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
413 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
414 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
415 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
416 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
417 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
418 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
419 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
420 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
421 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
422 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
423 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
424 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
425 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
426 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
427 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
428 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
429 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
430 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
431 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
432 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
433 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
434 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
435 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
436 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
437 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
438 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
439 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
440 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
441 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
442 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
443 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
444 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
445 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
446 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
447 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
448 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
449 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
450 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
451 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
454 __constant u32 te1[256] =
456 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
457 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
458 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
459 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
460 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
461 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
462 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
463 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
464 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
465 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
466 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
467 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
468 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
469 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
470 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
471 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
472 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
473 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
474 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
475 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
476 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
477 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
478 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
479 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
480 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
481 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
482 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
483 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
484 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
485 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
486 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
487 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
488 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
489 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
490 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
491 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
492 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
493 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
494 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
495 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
496 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
497 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
498 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
499 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
500 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
501 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
502 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
503 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
504 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
505 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
506 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
507 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
508 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
509 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
510 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
511 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
512 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
513 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
514 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
515 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
516 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
517 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
518 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
519 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
522 __constant u32 te2[256] =
524 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
525 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
526 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
527 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
528 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
529 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
530 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
531 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
532 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
533 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
534 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
535 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
536 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
537 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
538 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
539 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
540 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
541 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
542 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
543 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
544 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
545 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
546 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
547 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
548 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
549 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
550 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
551 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
552 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
553 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
554 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
555 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
556 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
557 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
558 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
559 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
560 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
561 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
562 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
563 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
564 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
565 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
566 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
567 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
568 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
569 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
570 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
571 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
572 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
573 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
574 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
575 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
576 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
577 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
578 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
579 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
580 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
581 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
582 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
583 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
584 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
585 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
586 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
587 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
590 __constant u32 te3[256] =
592 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
593 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
594 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
595 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
596 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
597 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
598 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
599 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
600 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
601 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
602 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
603 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
604 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
605 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
606 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
607 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
608 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
609 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
610 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
611 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
612 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
613 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
614 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
615 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
616 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
617 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
618 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
619 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
620 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
621 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
622 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
623 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
624 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
625 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
626 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
627 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
628 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
629 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
630 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
631 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
632 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
633 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
634 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
635 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
636 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
637 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
638 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
639 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
640 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
641 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
642 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
643 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
644 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
645 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
646 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
647 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
648 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
649 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
650 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
651 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
652 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
653 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
654 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
655 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
658 __constant u32 te4[256] =
660 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
661 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
662 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
663 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
664 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
665 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
666 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
667 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
668 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
669 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
670 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
671 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
672 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
673 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
674 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
675 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
676 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
677 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
678 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
679 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
680 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
681 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
682 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
683 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
684 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
685 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
686 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
687 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
688 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
689 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
690 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
691 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
692 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
693 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
694 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
695 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
696 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
697 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
698 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
699 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
700 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
701 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
702 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
703 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
704 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
705 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
706 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
707 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
708 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
709 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
710 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
711 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
712 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
713 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
714 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
715 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
716 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
717 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
718 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
719 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
720 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
721 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
722 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
723 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
726 __constant u32 rcon[] =
728 0x01000000, 0x02000000, 0x04000000, 0x08000000,
729 0x10000000, 0x20000000, 0x40000000, 0x80000000,
730 0x1b000000, 0x36000000,
733 static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
735 rek[0] = swap_workaround (userkey[0]);
736 rek[1] = swap_workaround (userkey[1]);
737 rek[2] = swap_workaround (userkey[2]);
738 rek[3] = swap_workaround (userkey[3]);
740 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
742 u32 temp = rek[j + 3];
744 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
745 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
746 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
747 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
749 rek[j + 4] = rek[j + 0]
753 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
754 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
755 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
759 static void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
763 in_swap[0] = swap_workaround (in[0]);
764 in_swap[1] = swap_workaround (in[1]);
765 in_swap[2] = swap_workaround (in[2]);
766 in_swap[3] = swap_workaround (in[3]);
768 u32 s0 = in_swap[0] ^ rek[0];
769 u32 s1 = in_swap[1] ^ rek[1];
770 u32 s2 = in_swap[2] ^ rek[2];
771 u32 s3 = in_swap[3] ^ rek[3];
778 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
779 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
780 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
781 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
782 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
783 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
784 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
785 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
786 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
787 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
788 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
789 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
790 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
791 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
792 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
793 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
794 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
795 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
796 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
797 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
798 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
799 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
800 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
801 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
802 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
803 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
804 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
805 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
806 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
807 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
808 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
809 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
810 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
811 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
812 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
813 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
815 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
816 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
817 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
818 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
821 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
822 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
823 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
824 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
827 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
828 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
829 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
830 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
833 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
834 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
835 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
836 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
839 out[0] = swap_workaround (out[0]);
840 out[1] = swap_workaround (out[1]);
841 out[2] = swap_workaround (out[2]);
842 out[3] = swap_workaround (out[3]);
845 static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2])
850 block0[0] = append[0];
851 block0[1] = append[1];
855 block0[0] = block0[0] | append[0] << 8;
856 block0[1] = append[0] >> 24 | append[1] << 8;
857 block0[2] = append[1] >> 24;
861 block0[0] = block0[0] | append[0] << 16;
862 block0[1] = append[0] >> 16 | append[1] << 16;
863 block0[2] = append[1] >> 16;
867 block0[0] = block0[0] | append[0] << 24;
868 block0[1] = append[0] >> 8 | append[1] << 24;
869 block0[2] = append[1] >> 8;
873 block0[1] = append[0];
874 block0[2] = append[1];
878 block0[1] = block0[1] | append[0] << 8;
879 block0[2] = append[0] >> 24 | append[1] << 8;
880 block0[3] = append[1] >> 24;
884 block0[1] = block0[1] | append[0] << 16;
885 block0[2] = append[0] >> 16 | append[1] << 16;
886 block0[3] = append[1] >> 16;
890 block0[1] = block0[1] | append[0] << 24;
891 block0[2] = append[0] >> 8 | append[1] << 24;
892 block0[3] = append[1] >> 8;
896 block0[2] = append[0];
897 block0[3] = append[1];
901 block0[2] = block0[2] | append[0] << 8;
902 block0[3] = append[0] >> 24 | append[1] << 8;
903 block1[0] = append[1] >> 24;
907 block0[2] = block0[2] | append[0] << 16;
908 block0[3] = append[0] >> 16 | append[1] << 16;
909 block1[0] = append[1] >> 16;
913 block0[2] = block0[2] | append[0] << 24;
914 block0[3] = append[0] >> 8 | append[1] << 24;
915 block1[0] = append[1] >> 8;
919 block0[3] = append[0];
920 block1[0] = append[1];
924 block0[3] = block0[3] | append[0] << 8;
925 block1[0] = append[0] >> 24 | append[1] << 8;
926 block1[1] = append[1] >> 24;
930 block0[3] = block0[3] | append[0] << 16;
931 block1[0] = append[0] >> 16 | append[1] << 16;
932 block1[1] = append[1] >> 16;
936 block0[3] = block0[3] | append[0] << 24;
937 block1[0] = append[0] >> 8 | append[1] << 24;
938 block1[1] = append[1] >> 8;
942 block1[0] = append[0];
943 block1[1] = append[1];
947 block1[0] = block1[0] | append[0] << 8;
948 block1[1] = append[0] >> 24 | append[1] << 8;
949 block1[2] = append[1] >> 24;
953 block1[0] = block1[0] | append[0] << 16;
954 block1[1] = append[0] >> 16 | append[1] << 16;
955 block1[2] = append[1] >> 16;
959 block1[0] = block1[0] | append[0] << 24;
960 block1[1] = append[0] >> 8 | append[1] << 24;
961 block1[2] = append[1] >> 8;
965 block1[1] = append[0];
966 block1[2] = append[1];
970 block1[1] = block1[1] | append[0] << 8;
971 block1[2] = append[0] >> 24 | append[1] << 8;
972 block1[3] = append[1] >> 24;
976 block1[1] = block1[1] | append[0] << 16;
977 block1[2] = append[0] >> 16 | append[1] << 16;
978 block1[3] = append[1] >> 16;
982 block1[1] = block1[1] | append[0] << 24;
983 block1[2] = append[0] >> 8 | append[1] << 24;
984 block1[3] = append[1] >> 8;
988 block1[2] = append[0];
989 block1[3] = append[1];
993 block1[2] = block1[2] | append[0] << 8;
994 block1[3] = append[0] >> 24 | append[1] << 8;
995 block2[0] = append[1] >> 24;
999 block1[2] = block1[2] | append[0] << 16;
1000 block1[3] = append[0] >> 16 | append[1] << 16;
1001 block2[0] = append[1] >> 16;
1005 block1[2] = block1[2] | append[0] << 24;
1006 block1[3] = append[0] >> 8 | append[1] << 24;
1007 block2[0] = append[1] >> 8;
1011 block1[3] = append[0];
1012 block2[0] = append[1];
1016 block1[3] = block1[3] | append[0] << 8;
1017 block2[0] = append[0] >> 24 | append[1] << 8;
1018 block2[1] = append[1] >> 24;
1022 block1[3] = block1[3] | append[0] << 16;
1023 block2[0] = append[0] >> 16 | append[1] << 16;
1024 block2[1] = append[1] >> 16;
1028 block1[3] = block1[3] | append[0] << 24;
1029 block2[0] = append[0] >> 8 | append[1] << 24;
1030 block2[1] = append[1] >> 8;
1034 block2[0] = append[0];
1035 block2[1] = append[1];
1039 block2[0] = block2[0] | append[0] << 8;
1040 block2[1] = append[0] >> 24 | append[1] << 8;
1041 block2[2] = append[1] >> 24;
1045 block2[0] = block2[0] | append[0] << 16;
1046 block2[1] = append[0] >> 16 | append[1] << 16;
1047 block2[2] = append[1] >> 16;
1051 block2[0] = block2[0] | append[0] << 24;
1052 block2[1] = append[0] >> 8 | append[1] << 24;
1053 block2[2] = append[1] >> 8;
1057 block2[1] = append[0];
1058 block2[2] = append[1];
1062 block2[1] = block2[1] | append[0] << 8;
1063 block2[2] = append[0] >> 24 | append[1] << 8;
1064 block2[3] = append[1] >> 24;
1068 block2[1] = block2[1] | append[0] << 16;
1069 block2[2] = append[0] >> 16 | append[1] << 16;
1070 block2[3] = append[1] >> 16;
1074 block2[1] = block2[1] | append[0] << 24;
1075 block2[2] = append[0] >> 8 | append[1] << 24;
1076 block2[3] = append[1] >> 8;
1080 block2[2] = append[0];
1081 block2[3] = append[1];
1085 block2[2] = block2[2] | append[0] << 8;
1086 block2[3] = append[0] >> 24 | append[1] << 8;
1087 block3[0] = append[1] >> 24;
1091 block2[2] = block2[2] | append[0] << 16;
1092 block2[3] = append[0] >> 16 | append[1] << 16;
1093 block3[0] = append[1] >> 16;
1097 block2[2] = block2[2] | append[0] << 24;
1098 block2[3] = append[0] >> 8 | append[1] << 24;
1099 block3[0] = append[1] >> 8;
1103 block2[3] = append[0];
1104 block3[0] = append[1];
1108 block2[3] = block2[3] | append[0] << 8;
1109 block3[0] = append[0] >> 24 | append[1] << 8;
1110 block3[1] = append[1] >> 24;
1114 block2[3] = block2[3] | append[0] << 16;
1115 block3[0] = append[0] >> 16 | append[1] << 16;
1116 block3[1] = append[1] >> 16;
1120 block2[3] = block2[3] | append[0] << 24;
1121 block3[0] = append[0] >> 8 | append[1] << 24;
1122 block3[1] = append[1] >> 8;
1126 block3[0] = append[0];
1127 block3[1] = append[1];
1131 block3[0] = block3[0] | append[0] << 8;
1132 block3[1] = append[0] >> 24 | append[1] << 8;
1133 block3[2] = append[1] >> 24;
1137 block3[0] = block3[0] | append[0] << 16;
1138 block3[1] = append[0] >> 16 | append[1] << 16;
1139 block3[2] = append[1] >> 16;
1143 block3[0] = block3[0] | append[0] << 24;
1144 block3[1] = append[0] >> 8 | append[1] << 24;
1145 block3[2] = append[1] >> 8;
1149 block3[1] = append[0];
1150 block3[2] = append[1];
1154 block3[1] = block3[1] | append[0] << 8;
1155 block3[2] = append[0] >> 24 | append[1] << 8;
1156 block3[3] = append[1] >> 24;
1160 block3[1] = block3[1] | append[0] << 16;
1161 block3[2] = append[0] >> 16 | append[1] << 16;
1162 block3[3] = append[1] >> 16;
1166 block3[1] = block3[1] | append[0] << 24;
1167 block3[2] = append[0] >> 8 | append[1] << 24;
1168 block3[3] = append[1] >> 8;
1172 block3[2] = append[0];
1173 block3[3] = append[1];
1178 #define AESSZ 16 // AES_BLOCK_SIZE
1184 #define WORDSZ256 64
1185 #define WORDSZ384 128
1186 #define WORDSZ512 128
1188 #define PWMAXSZ 32 // oclHashcat password length limit
1189 #define BLMAXSZ BLSZ512
1190 #define WORDMAXSZ WORDSZ512
1192 #define PWMAXSZ4 (PWMAXSZ / 4)
1193 #define BLMAXSZ4 (BLMAXSZ / 4)
1194 #define WORDMAXSZ4 (WORDMAXSZ / 4)
1195 #define AESSZ4 (AESSZ / 4)
1197 static void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, const u32 bl_len)
1199 const u32 bd = bl_len / 4;
1201 const u32 pm = pw_len % 4;
1202 const u32 pd = pw_len / 4;
1208 for (u32 i = 0; i < pd; i++) sc[idx++] = pw[i];
1209 for (u32 i = 0; i < bd; i++) sc[idx++] = bl[i];
1210 for (u32 i = 0; i < 4; i++) sc[idx++] = sc[i];
1218 for (i = 0; i < pd; i++) sc[idx++] = pw[i];
1220 | amd_bytealign (bl[0], 0, pm4);
1221 for (i = 1; i < bd; i++) sc[idx++] = amd_bytealign (bl[i], bl[i - 1], pm4);
1222 sc[idx++] = amd_bytealign (sc[0], bl[i - 1], pm4);
1223 for (i = 1; i < 4; i++) sc[idx++] = amd_bytealign (sc[i], sc[i - 1], pm4);
1224 sc[idx++] = amd_bytealign ( 0, sc[i - 1], pm4);
1228 static void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, const u32 pwbl_len)
1230 const u32 m = offset % pwbl_len;
1232 const u32 om = m % 4;
1233 const u32 od = m / 4;
1235 pt[0] = amd_bytealign (sc[od + 1], sc[od + 0], om);
1236 pt[1] = amd_bytealign (sc[od + 2], sc[od + 1], om);
1237 pt[2] = amd_bytealign (sc[od + 3], sc[od + 2], om);
1238 pt[3] = amd_bytealign (sc[od + 4], sc[od + 3], om);
1241 static void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, const u32 *sc, const u32 pwbl_len, u32 *iv, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
1243 for (u32 k = 0, wk = 0; k < W_len; k += AESSZ, wk += AESSZ4)
1247 make_pt_with_offset (pt, offset + k, sc, pwbl_len);
1254 AES128_encrypt (pt, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1256 ctx->W32[wk + 0] = iv[0];
1257 ctx->W32[wk + 1] = iv[1];
1258 ctx->W32[wk + 2] = iv[2];
1259 ctx->W32[wk + 3] = iv[3];
1263 static u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
1265 // make scratch buffer
1267 u32 sc[PWMAXSZ4 + BLMAXSZ4 + AESSZ4];
1269 make_sc (sc, pw, pw_len, ctx->dgst32, ctx->dgst_len);
1271 // make sure pwbl_len is calculcated before it gets changed
1273 const u32 pwbl_len = pw_len + ctx->dgst_len;
1279 iv[0] = ctx->dgst32[4];
1280 iv[1] = ctx->dgst32[5];
1281 iv[2] = ctx->dgst32[6];
1282 iv[3] = ctx->dgst32[7];
1288 AES128_ExpandKey (ctx->dgst32, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1290 // first call is special as the hash depends on the result of it
1291 // but since we do not know about the outcome at this time
1292 // we must use the max
1294 make_w_with_offset (ctx, WORDMAXSZ, 0, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1296 // now we can find out hash to use
1300 for (u32 i = 0; i < 4; i++)
1302 sum += (ctx->W32[i] >> 24) & 0xff;
1303 sum += (ctx->W32[i] >> 16) & 0xff;
1304 sum += (ctx->W32[i] >> 8) & 0xff;
1305 sum += (ctx->W32[i] >> 0) & 0xff;
1312 case 0: ctx->dgst32[0] = SHA256M_A;
1313 ctx->dgst32[1] = SHA256M_B;
1314 ctx->dgst32[2] = SHA256M_C;
1315 ctx->dgst32[3] = SHA256M_D;
1316 ctx->dgst32[4] = SHA256M_E;
1317 ctx->dgst32[5] = SHA256M_F;
1318 ctx->dgst32[6] = SHA256M_G;
1319 ctx->dgst32[7] = SHA256M_H;
1320 ctx->dgst_len = BLSZ256;
1321 ctx->W_len = WORDSZ256;
1322 sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1323 sha256_transform (&ctx->W32[16], &ctx->W32[20], &ctx->W32[24], &ctx->W32[28], ctx->dgst32);
1325 case 1: ctx->dgst64[0] = SHA384M_A;
1326 ctx->dgst64[1] = SHA384M_B;
1327 ctx->dgst64[2] = SHA384M_C;
1328 ctx->dgst64[3] = SHA384M_D;
1329 ctx->dgst64[4] = SHA384M_E;
1330 ctx->dgst64[5] = SHA384M_F;
1331 ctx->dgst64[6] = SHA384M_G;
1332 ctx->dgst64[7] = SHA384M_H;
1333 ctx->dgst_len = BLSZ384;
1334 ctx->W_len = WORDSZ384;
1335 sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1337 case 2: ctx->dgst64[0] = SHA512M_A;
1338 ctx->dgst64[1] = SHA512M_B;
1339 ctx->dgst64[2] = SHA512M_C;
1340 ctx->dgst64[3] = SHA512M_D;
1341 ctx->dgst64[4] = SHA512M_E;
1342 ctx->dgst64[5] = SHA512M_F;
1343 ctx->dgst64[6] = SHA512M_G;
1344 ctx->dgst64[7] = SHA512M_H;
1345 ctx->dgst_len = BLSZ512;
1346 ctx->W_len = WORDSZ512;
1347 sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1353 const u32 final_len = pwbl_len * 64;
1355 const u32 iter_max = ctx->W_len - (ctx->W_len / 8);
1360 for (offset = WORDMAXSZ, left = final_len - offset; left >= iter_max; offset += ctx->W_len, left -= ctx->W_len)
1362 make_w_with_offset (ctx, ctx->W_len, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1364 switch (ctx->dgst_len)
1366 case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1368 case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1370 case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1379 switch (ctx->dgst_len)
1381 case BLSZ384: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1382 ctx->W64[ 8] = 0x80;
1389 ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
1390 ex = ctx->W64[7] >> 56;
1392 case BLSZ512: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1393 ctx->W64[ 8] = 0x80;
1400 ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
1401 ex = ctx->W64[7] >> 56;
1407 switch (ctx->dgst_len)
1409 case BLSZ256: ex = ctx->W32[15] >> 24;
1410 ctx->W32[ 0] = 0x80;
1425 ctx->W32[15] = swap_workaround (final_len * 8);
1427 case BLSZ384: ex = ctx->W64[15] >> 56;
1428 ctx->W64[ 0] = 0x80;
1443 ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
1445 case BLSZ512: ex = ctx->W64[15] >> 56;
1446 ctx->W64[ 0] = 0x80;
1461 ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
1466 switch (ctx->dgst_len)
1468 case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
1469 ctx->dgst32[ 0] = swap_workaround (ctx->dgst32[0]);
1470 ctx->dgst32[ 1] = swap_workaround (ctx->dgst32[1]);
1471 ctx->dgst32[ 2] = swap_workaround (ctx->dgst32[2]);
1472 ctx->dgst32[ 3] = swap_workaround (ctx->dgst32[3]);
1473 ctx->dgst32[ 4] = swap_workaround (ctx->dgst32[4]);
1474 ctx->dgst32[ 5] = swap_workaround (ctx->dgst32[5]);
1475 ctx->dgst32[ 6] = swap_workaround (ctx->dgst32[6]);
1476 ctx->dgst32[ 7] = swap_workaround (ctx->dgst32[7]);
1477 ctx->dgst32[ 8] = 0;
1478 ctx->dgst32[ 9] = 0;
1479 ctx->dgst32[10] = 0;
1480 ctx->dgst32[11] = 0;
1481 ctx->dgst32[12] = 0;
1482 ctx->dgst32[13] = 0;
1483 ctx->dgst32[14] = 0;
1484 ctx->dgst32[15] = 0;
1486 case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1487 ctx->dgst64[0] = swap_workaround (ctx->dgst64[0]);
1488 ctx->dgst64[1] = swap_workaround (ctx->dgst64[1]);
1489 ctx->dgst64[2] = swap_workaround (ctx->dgst64[2]);
1490 ctx->dgst64[3] = swap_workaround (ctx->dgst64[3]);
1491 ctx->dgst64[4] = swap_workaround (ctx->dgst64[4]);
1492 ctx->dgst64[5] = swap_workaround (ctx->dgst64[5]);
1496 case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
1497 ctx->dgst64[0] = swap_workaround (ctx->dgst64[0]);
1498 ctx->dgst64[1] = swap_workaround (ctx->dgst64[1]);
1499 ctx->dgst64[2] = swap_workaround (ctx->dgst64[2]);
1500 ctx->dgst64[3] = swap_workaround (ctx->dgst64[3]);
1501 ctx->dgst64[4] = swap_workaround (ctx->dgst64[4]);
1502 ctx->dgst64[5] = swap_workaround (ctx->dgst64[5]);
1503 ctx->dgst64[6] = swap_workaround (ctx->dgst64[6]);
1504 ctx->dgst64[7] = swap_workaround (ctx->dgst64[7]);
1511 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1517 const u32 gid = get_global_id (0);
1519 if (gid >= gid_max) return;
1523 w0[0] = pws[gid].i[0];
1524 w0[1] = pws[gid].i[1];
1525 w0[2] = pws[gid].i[2];
1526 w0[3] = pws[gid].i[3];
1528 const u32 pw_len = pws[gid].pw_len;
1536 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1537 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1539 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1545 u32 block_len = pw_len;
1575 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1577 block_len += salt_len;
1579 append_0x80_2 (block0, block1, block_len);
1581 block3[3] = swap_workaround (block_len * 8);
1585 digest[0] = SHA256M_A;
1586 digest[1] = SHA256M_B;
1587 digest[2] = SHA256M_C;
1588 digest[3] = SHA256M_D;
1589 digest[4] = SHA256M_E;
1590 digest[5] = SHA256M_F;
1591 digest[6] = SHA256M_G;
1592 digest[7] = SHA256M_H;
1594 sha256_transform (block0, block1, block2, block3, digest);
1596 digest[0] = swap_workaround (digest[0]);
1597 digest[1] = swap_workaround (digest[1]);
1598 digest[2] = swap_workaround (digest[2]);
1599 digest[3] = swap_workaround (digest[3]);
1600 digest[4] = swap_workaround (digest[4]);
1601 digest[5] = swap_workaround (digest[5]);
1602 digest[6] = swap_workaround (digest[6]);
1603 digest[7] = swap_workaround (digest[7]);
1605 tmps[gid].dgst32[0] = digest[0];
1606 tmps[gid].dgst32[1] = digest[1];
1607 tmps[gid].dgst32[2] = digest[2];
1608 tmps[gid].dgst32[3] = digest[3];
1609 tmps[gid].dgst32[4] = digest[4];
1610 tmps[gid].dgst32[5] = digest[5];
1611 tmps[gid].dgst32[6] = digest[6];
1612 tmps[gid].dgst32[7] = digest[7];
1613 tmps[gid].dgst_len = BLSZ256;
1614 tmps[gid].W_len = WORDSZ256;
1617 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1619 const u32 gid = get_global_id (0);
1620 const u32 lid = get_local_id (0);
1626 const u32 lid4 = lid * 4;
1628 __local u32 s_te0[256];
1629 __local u32 s_te1[256];
1630 __local u32 s_te2[256];
1631 __local u32 s_te3[256];
1632 __local u32 s_te4[256];
1634 s_te0[lid4 + 0] = te0[lid4 + 0];
1635 s_te0[lid4 + 1] = te0[lid4 + 1];
1636 s_te0[lid4 + 2] = te0[lid4 + 2];
1637 s_te0[lid4 + 3] = te0[lid4 + 3];
1639 s_te1[lid4 + 0] = te1[lid4 + 0];
1640 s_te1[lid4 + 1] = te1[lid4 + 1];
1641 s_te1[lid4 + 2] = te1[lid4 + 2];
1642 s_te1[lid4 + 3] = te1[lid4 + 3];
1644 s_te2[lid4 + 0] = te2[lid4 + 0];
1645 s_te2[lid4 + 1] = te2[lid4 + 1];
1646 s_te2[lid4 + 2] = te2[lid4 + 2];
1647 s_te2[lid4 + 3] = te2[lid4 + 3];
1649 s_te3[lid4 + 0] = te3[lid4 + 0];
1650 s_te3[lid4 + 1] = te3[lid4 + 1];
1651 s_te3[lid4 + 2] = te3[lid4 + 2];
1652 s_te3[lid4 + 3] = te3[lid4 + 3];
1654 s_te4[lid4 + 0] = te4[lid4 + 0];
1655 s_te4[lid4 + 1] = te4[lid4 + 1];
1656 s_te4[lid4 + 2] = te4[lid4 + 2];
1657 s_te4[lid4 + 3] = te4[lid4 + 3];
1659 barrier (CLK_LOCAL_MEM_FENCE);
1661 if (gid >= gid_max) return;
1669 w0[0] = pws[gid].i[0];
1670 w0[1] = pws[gid].i[1];
1671 w0[2] = pws[gid].i[2];
1672 w0[3] = pws[gid].i[3];
1674 const u32 pw_len = pws[gid].pw_len;
1682 ctx.dgst64[0] = tmps[gid].dgst64[0];
1683 ctx.dgst64[1] = tmps[gid].dgst64[1];
1684 ctx.dgst64[2] = tmps[gid].dgst64[2];
1685 ctx.dgst64[3] = tmps[gid].dgst64[3];
1686 ctx.dgst64[4] = tmps[gid].dgst64[4];
1687 ctx.dgst64[5] = tmps[gid].dgst64[5];
1688 ctx.dgst64[6] = tmps[gid].dgst64[6];
1689 ctx.dgst64[7] = tmps[gid].dgst64[7];
1690 ctx.dgst_len = tmps[gid].dgst_len;
1691 ctx.W_len = tmps[gid].W_len;
1695 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1697 ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
1700 if ((loop_pos + loop_cnt) == 64)
1702 for (u32 i = 64; i < ex + 32; i++)
1704 ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
1708 tmps[gid].dgst64[0] = ctx.dgst64[0];
1709 tmps[gid].dgst64[1] = ctx.dgst64[1];
1710 tmps[gid].dgst64[2] = ctx.dgst64[2];
1711 tmps[gid].dgst64[3] = ctx.dgst64[3];
1712 tmps[gid].dgst64[4] = ctx.dgst64[4];
1713 tmps[gid].dgst64[5] = ctx.dgst64[5];
1714 tmps[gid].dgst64[6] = ctx.dgst64[6];
1715 tmps[gid].dgst64[7] = ctx.dgst64[7];
1716 tmps[gid].dgst_len = ctx.dgst_len;
1717 tmps[gid].W_len = ctx.W_len;
1720 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pdf17l8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1726 const u32 gid = get_global_id (0);
1728 if (gid >= gid_max) return;
1730 const u32 lid = get_local_id (0);
1736 const u32x r0 = swap_workaround (tmps[gid].dgst32[DGST_R0]);
1737 const u32x r1 = swap_workaround (tmps[gid].dgst32[DGST_R1]);
1738 const u32x r2 = swap_workaround (tmps[gid].dgst32[DGST_R2]);
1739 const u32x r3 = swap_workaround (tmps[gid].dgst32[DGST_R3]);
1743 #include VECT_COMPARE_M