2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
21 #define COMPARE_M "check_multi_vect1_comp4.c"
25 #define COMPARE_M "check_multi_vect2_comp4.c"
29 #define COMPARE_M "check_multi_vect4_comp4.c"
32 static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
58 MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
59 MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
60 MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
61 MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
62 MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
63 MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
64 MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
65 MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
66 MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
67 MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
68 MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
69 MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
70 MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
71 MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
72 MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
73 MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
75 MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
76 MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
77 MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
78 MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
79 MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
80 MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
81 MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
82 MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
83 MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
84 MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
85 MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
86 MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
87 MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
88 MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
89 MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
90 MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
92 MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
93 MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
94 MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
95 MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
96 MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
97 MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
98 MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
99 MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
100 MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
101 MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
102 MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
103 MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
104 MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
105 MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
106 MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
107 MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
109 MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
110 MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
111 MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
112 MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
113 MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
114 MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
115 MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
116 MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
117 MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
118 MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
119 MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
120 MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
121 MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
122 MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
123 MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
124 MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
132 static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4])
134 w0[0] = w0[0] ^ 0x36363636;
135 w0[1] = w0[1] ^ 0x36363636;
136 w0[2] = w0[2] ^ 0x36363636;
137 w0[3] = w0[3] ^ 0x36363636;
138 w1[0] = w1[0] ^ 0x36363636;
139 w1[1] = w1[1] ^ 0x36363636;
140 w1[2] = w1[2] ^ 0x36363636;
141 w1[3] = w1[3] ^ 0x36363636;
142 w2[0] = w2[0] ^ 0x36363636;
143 w2[1] = w2[1] ^ 0x36363636;
144 w2[2] = w2[2] ^ 0x36363636;
145 w2[3] = w2[3] ^ 0x36363636;
146 w3[0] = w3[0] ^ 0x36363636;
147 w3[1] = w3[1] ^ 0x36363636;
148 w3[2] = w3[2] ^ 0x36363636;
149 w3[3] = w3[3] ^ 0x36363636;
156 md5_transform (w0, w1, w2, w3, ipad);
158 w0[0] = w0[0] ^ 0x6a6a6a6a;
159 w0[1] = w0[1] ^ 0x6a6a6a6a;
160 w0[2] = w0[2] ^ 0x6a6a6a6a;
161 w0[3] = w0[3] ^ 0x6a6a6a6a;
162 w1[0] = w1[0] ^ 0x6a6a6a6a;
163 w1[1] = w1[1] ^ 0x6a6a6a6a;
164 w1[2] = w1[2] ^ 0x6a6a6a6a;
165 w1[3] = w1[3] ^ 0x6a6a6a6a;
166 w2[0] = w2[0] ^ 0x6a6a6a6a;
167 w2[1] = w2[1] ^ 0x6a6a6a6a;
168 w2[2] = w2[2] ^ 0x6a6a6a6a;
169 w2[3] = w2[3] ^ 0x6a6a6a6a;
170 w3[0] = w3[0] ^ 0x6a6a6a6a;
171 w3[1] = w3[1] ^ 0x6a6a6a6a;
172 w3[2] = w3[2] ^ 0x6a6a6a6a;
173 w3[3] = w3[3] ^ 0x6a6a6a6a;
180 md5_transform (w0, w1, w2, w3, opad);
183 static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4])
190 md5_transform (w0, w1, w2, w3, digest);
206 w3[2] = (64 + 16) * 8;
214 md5_transform (w0, w1, w2, w3, digest);
217 static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
245 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
246 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
247 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
248 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
249 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
250 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
251 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
252 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
253 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
254 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
255 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
256 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
257 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
258 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
259 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
260 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
261 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
262 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
263 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
264 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
269 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
270 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
271 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
272 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
273 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
274 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
275 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
276 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
277 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
278 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
279 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
280 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
281 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
282 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
283 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
284 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
285 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
286 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
287 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
288 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
293 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
294 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
295 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
296 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
297 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
298 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
299 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
300 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
301 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
302 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
303 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
304 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
305 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
306 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
307 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
308 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
309 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
310 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
311 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
312 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
317 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
318 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
319 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
320 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
321 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
322 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
323 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
324 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
325 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
326 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
327 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
328 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
329 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
330 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
331 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
332 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
333 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
334 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
335 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
336 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
345 static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5])
347 w0[0] = w0[0] ^ 0x36363636;
348 w0[1] = w0[1] ^ 0x36363636;
349 w0[2] = w0[2] ^ 0x36363636;
350 w0[3] = w0[3] ^ 0x36363636;
351 w1[0] = w1[0] ^ 0x36363636;
352 w1[1] = w1[1] ^ 0x36363636;
353 w1[2] = w1[2] ^ 0x36363636;
354 w1[3] = w1[3] ^ 0x36363636;
355 w2[0] = w2[0] ^ 0x36363636;
356 w2[1] = w2[1] ^ 0x36363636;
357 w2[2] = w2[2] ^ 0x36363636;
358 w2[3] = w2[3] ^ 0x36363636;
359 w3[0] = w3[0] ^ 0x36363636;
360 w3[1] = w3[1] ^ 0x36363636;
361 w3[2] = w3[2] ^ 0x36363636;
362 w3[3] = w3[3] ^ 0x36363636;
370 sha1_transform (w0, w1, w2, w3, ipad);
372 w0[0] = w0[0] ^ 0x6a6a6a6a;
373 w0[1] = w0[1] ^ 0x6a6a6a6a;
374 w0[2] = w0[2] ^ 0x6a6a6a6a;
375 w0[3] = w0[3] ^ 0x6a6a6a6a;
376 w1[0] = w1[0] ^ 0x6a6a6a6a;
377 w1[1] = w1[1] ^ 0x6a6a6a6a;
378 w1[2] = w1[2] ^ 0x6a6a6a6a;
379 w1[3] = w1[3] ^ 0x6a6a6a6a;
380 w2[0] = w2[0] ^ 0x6a6a6a6a;
381 w2[1] = w2[1] ^ 0x6a6a6a6a;
382 w2[2] = w2[2] ^ 0x6a6a6a6a;
383 w2[3] = w2[3] ^ 0x6a6a6a6a;
384 w3[0] = w3[0] ^ 0x6a6a6a6a;
385 w3[1] = w3[1] ^ 0x6a6a6a6a;
386 w3[2] = w3[2] ^ 0x6a6a6a6a;
387 w3[3] = w3[3] ^ 0x6a6a6a6a;
395 sha1_transform (w0, w1, w2, w3, opad);
398 static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5])
406 sha1_transform (w0, w1, w2, w3, digest);
423 w3[3] = (64 + 20) * 8;
431 sha1_transform (w0, w1, w2, w3, digest);
434 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global wpa_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
440 const u32 gid = get_global_id (0);
442 if (gid >= gid_max) return;
446 w0[0] = pws[gid].i[ 0];
447 w0[1] = pws[gid].i[ 1];
448 w0[2] = pws[gid].i[ 2];
449 w0[3] = pws[gid].i[ 3];
453 w1[0] = pws[gid].i[ 4];
454 w1[1] = pws[gid].i[ 5];
455 w1[2] = pws[gid].i[ 6];
456 w1[3] = pws[gid].i[ 7];
460 w2[0] = pws[gid].i[ 8];
461 w2[1] = pws[gid].i[ 9];
462 w2[2] = pws[gid].i[10];
463 w2[3] = pws[gid].i[11];
467 w3[0] = pws[gid].i[12];
468 w3[1] = pws[gid].i[13];
469 w3[2] = pws[gid].i[14];
470 w3[3] = pws[gid].i[15];
476 u32 salt_len = salt_bufs[salt_pos].salt_len;
481 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
482 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
483 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
484 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
485 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
486 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
487 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
488 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
494 w0[0] = swap_workaround (w0[0]);
495 w0[1] = swap_workaround (w0[1]);
496 w0[2] = swap_workaround (w0[2]);
497 w0[3] = swap_workaround (w0[3]);
498 w1[0] = swap_workaround (w1[0]);
499 w1[1] = swap_workaround (w1[1]);
500 w1[2] = swap_workaround (w1[2]);
501 w1[3] = swap_workaround (w1[3]);
502 w2[0] = swap_workaround (w2[0]);
503 w2[1] = swap_workaround (w2[1]);
504 w2[2] = swap_workaround (w2[2]);
505 w2[3] = swap_workaround (w2[3]);
506 w3[0] = swap_workaround (w3[0]);
507 w3[1] = swap_workaround (w3[1]);
508 w3[2] = swap_workaround (w3[2]);
509 w3[3] = swap_workaround (w3[3]);
514 hmac_sha1_pad (w0, w1, w2, w3, ipad, opad);
516 tmps[gid].ipad[0] = ipad[0];
517 tmps[gid].ipad[1] = ipad[1];
518 tmps[gid].ipad[2] = ipad[2];
519 tmps[gid].ipad[3] = ipad[3];
520 tmps[gid].ipad[4] = ipad[4];
522 tmps[gid].opad[0] = opad[0];
523 tmps[gid].opad[1] = opad[1];
524 tmps[gid].opad[2] = opad[2];
525 tmps[gid].opad[3] = opad[3];
526 tmps[gid].opad[4] = opad[4];
528 for (u32 i = 0, j = 1; i < 8; i += 5, j += 1)
530 w0[0] = salt_buf0[0];
531 w0[1] = salt_buf0[1];
532 w0[2] = salt_buf0[2];
533 w0[3] = salt_buf0[3];
534 w1[0] = salt_buf1[0];
535 w1[1] = salt_buf1[1];
536 w1[2] = salt_buf1[2];
537 w1[3] = salt_buf1[3];
548 append_0x01_3 (w0, w1, w2, salt_len + 3);
550 append_0x02_3 (w0, w1, w2, salt_len + 3);
552 append_0x80_3 (w0, w1, w2, salt_len + 4);
554 w0[0] = swap_workaround (w0[0]);
555 w0[1] = swap_workaround (w0[1]);
556 w0[2] = swap_workaround (w0[2]);
557 w0[3] = swap_workaround (w0[3]);
558 w1[0] = swap_workaround (w1[0]);
559 w1[1] = swap_workaround (w1[1]);
560 w1[2] = swap_workaround (w1[2]);
561 w1[3] = swap_workaround (w1[3]);
562 w2[0] = swap_workaround (w2[0]);
563 w2[1] = swap_workaround (w2[1]);
569 w3[3] = (64 + salt_len + 4) * 8;
573 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst);
575 tmps[gid].dgst[i + 0] = dgst[0];
576 tmps[gid].dgst[i + 1] = dgst[1];
577 tmps[gid].dgst[i + 2] = dgst[2];
578 tmps[gid].dgst[i + 3] = dgst[3];
579 tmps[gid].dgst[i + 4] = dgst[4];
581 tmps[gid].out[i + 0] = dgst[0];
582 tmps[gid].out[i + 1] = dgst[1];
583 tmps[gid].out[i + 2] = dgst[2];
584 tmps[gid].out[i + 3] = dgst[3];
585 tmps[gid].out[i + 4] = dgst[4];
589 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global wpa_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
591 const u32 gid = get_global_id (0);
593 if (gid >= gid_max) return;
598 ipad[0] = tmps[gid].ipad[0];
599 ipad[1] = tmps[gid].ipad[1];
600 ipad[2] = tmps[gid].ipad[2];
601 ipad[3] = tmps[gid].ipad[3];
602 ipad[4] = tmps[gid].ipad[4];
604 opad[0] = tmps[gid].opad[0];
605 opad[1] = tmps[gid].opad[1];
606 opad[2] = tmps[gid].opad[2];
607 opad[3] = tmps[gid].opad[3];
608 opad[4] = tmps[gid].opad[4];
610 for (u32 i = 0; i < 8; i += 5)
615 dgst[0] = tmps[gid].dgst[i + 0];
616 dgst[1] = tmps[gid].dgst[i + 1];
617 dgst[2] = tmps[gid].dgst[i + 2];
618 dgst[3] = tmps[gid].dgst[i + 3];
619 dgst[4] = tmps[gid].dgst[i + 4];
621 out[0] = tmps[gid].out[i + 0];
622 out[1] = tmps[gid].out[i + 1];
623 out[2] = tmps[gid].out[i + 2];
624 out[3] = tmps[gid].out[i + 3];
625 out[4] = tmps[gid].out[i + 4];
627 for (u32 j = 0; j < loop_cnt; j++)
649 w3[3] = (64 + 20) * 8;
651 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst);
660 tmps[gid].dgst[i + 0] = dgst[0];
661 tmps[gid].dgst[i + 1] = dgst[1];
662 tmps[gid].dgst[i + 2] = dgst[2];
663 tmps[gid].dgst[i + 3] = dgst[3];
664 tmps[gid].dgst[i + 4] = dgst[4];
666 tmps[gid].out[i + 0] = out[0];
667 tmps[gid].out[i + 1] = out[1];
668 tmps[gid].out[i + 2] = out[2];
669 tmps[gid].out[i + 3] = out[3];
670 tmps[gid].out[i + 4] = out[4];
674 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global wpa_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
676 const u32 gid = get_global_id (0);
678 if (gid >= gid_max) return;
680 const u32 lid = get_local_id (0);
687 w0[0] = tmps[gid].out[0];
688 w0[1] = tmps[gid].out[1];
689 w0[2] = tmps[gid].out[2];
690 w0[3] = tmps[gid].out[3];
691 w1[0] = tmps[gid].out[4];
692 w1[1] = tmps[gid].out[5];
693 w1[2] = tmps[gid].out[6];
694 w1[3] = tmps[gid].out[7];
707 hmac_sha1_pad (w0, w1, w2, w3, ipad, opad);
709 w0[0] = wpa_bufs[salt_pos].pke[ 0];
710 w0[1] = wpa_bufs[salt_pos].pke[ 1];
711 w0[2] = wpa_bufs[salt_pos].pke[ 2];
712 w0[3] = wpa_bufs[salt_pos].pke[ 3];
713 w1[0] = wpa_bufs[salt_pos].pke[ 4];
714 w1[1] = wpa_bufs[salt_pos].pke[ 5];
715 w1[2] = wpa_bufs[salt_pos].pke[ 6];
716 w1[3] = wpa_bufs[salt_pos].pke[ 7];
717 w2[0] = wpa_bufs[salt_pos].pke[ 8];
718 w2[1] = wpa_bufs[salt_pos].pke[ 9];
719 w2[2] = wpa_bufs[salt_pos].pke[10];
720 w2[3] = wpa_bufs[salt_pos].pke[11];
721 w3[0] = wpa_bufs[salt_pos].pke[12];
722 w3[1] = wpa_bufs[salt_pos].pke[13];
723 w3[2] = wpa_bufs[salt_pos].pke[14];
724 w3[3] = wpa_bufs[salt_pos].pke[15];
726 sha1_transform (w0, w1, w2, w3, ipad);
728 w0[0] = wpa_bufs[salt_pos].pke[16];
729 w0[1] = wpa_bufs[salt_pos].pke[17];
730 w0[2] = wpa_bufs[salt_pos].pke[18];
731 w0[3] = wpa_bufs[salt_pos].pke[19];
732 w1[0] = wpa_bufs[salt_pos].pke[20];
733 w1[1] = wpa_bufs[salt_pos].pke[21];
734 w1[2] = wpa_bufs[salt_pos].pke[22];
735 w1[3] = wpa_bufs[salt_pos].pke[23];
736 w2[0] = wpa_bufs[salt_pos].pke[24];
743 w3[3] = (64 + 100) * 8;
747 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, digest);
750 w0[0] = swap_workaround (digest[0]);
751 w0[1] = swap_workaround (digest[1]);
752 w0[2] = swap_workaround (digest[2]);
753 w0[3] = swap_workaround (digest[3]);
767 hmac_md5_pad (w0, w1, w2, w3, ipad, opad);
769 int eapol_size = wpa_bufs[salt_pos].eapol_size;
774 for (eapol_left = eapol_size, eapol_off = 0; eapol_left >= 56; eapol_left -= 64, eapol_off += 16)
776 w0[0] = wpa_bufs[salt_pos].eapol[eapol_off + 0];
777 w0[1] = wpa_bufs[salt_pos].eapol[eapol_off + 1];
778 w0[2] = wpa_bufs[salt_pos].eapol[eapol_off + 2];
779 w0[3] = wpa_bufs[salt_pos].eapol[eapol_off + 3];
780 w1[0] = wpa_bufs[salt_pos].eapol[eapol_off + 4];
781 w1[1] = wpa_bufs[salt_pos].eapol[eapol_off + 5];
782 w1[2] = wpa_bufs[salt_pos].eapol[eapol_off + 6];
783 w1[3] = wpa_bufs[salt_pos].eapol[eapol_off + 7];
784 w2[0] = wpa_bufs[salt_pos].eapol[eapol_off + 8];
785 w2[1] = wpa_bufs[salt_pos].eapol[eapol_off + 9];
786 w2[2] = wpa_bufs[salt_pos].eapol[eapol_off + 10];
787 w2[3] = wpa_bufs[salt_pos].eapol[eapol_off + 11];
788 w3[0] = wpa_bufs[salt_pos].eapol[eapol_off + 12];
789 w3[1] = wpa_bufs[salt_pos].eapol[eapol_off + 13];
790 w3[2] = wpa_bufs[salt_pos].eapol[eapol_off + 14];
791 w3[3] = wpa_bufs[salt_pos].eapol[eapol_off + 15];
793 md5_transform (w0, w1, w2, w3, ipad);
796 w0[0] = wpa_bufs[salt_pos].eapol[eapol_off + 0];
797 w0[1] = wpa_bufs[salt_pos].eapol[eapol_off + 1];
798 w0[2] = wpa_bufs[salt_pos].eapol[eapol_off + 2];
799 w0[3] = wpa_bufs[salt_pos].eapol[eapol_off + 3];
800 w1[0] = wpa_bufs[salt_pos].eapol[eapol_off + 4];
801 w1[1] = wpa_bufs[salt_pos].eapol[eapol_off + 5];
802 w1[2] = wpa_bufs[salt_pos].eapol[eapol_off + 6];
803 w1[3] = wpa_bufs[salt_pos].eapol[eapol_off + 7];
804 w2[0] = wpa_bufs[salt_pos].eapol[eapol_off + 8];
805 w2[1] = wpa_bufs[salt_pos].eapol[eapol_off + 9];
806 w2[2] = wpa_bufs[salt_pos].eapol[eapol_off + 10];
807 w2[3] = wpa_bufs[salt_pos].eapol[eapol_off + 11];
808 w3[0] = wpa_bufs[salt_pos].eapol[eapol_off + 12];
809 w3[1] = wpa_bufs[salt_pos].eapol[eapol_off + 13];
810 w3[2] = (64 + eapol_size) * 8;
815 hmac_md5_run (w0, w1, w2, w3, ipad, opad, digest1);
823 const u32 r0 = digest1[DGST_R0];
824 const u32 r1 = digest1[DGST_R1];
825 const u32 r2 = digest1[DGST_R2];
826 const u32 r3 = digest1[DGST_R3];
849 hmac_sha1_pad (w0, w1, w2, w3, ipad, opad);
851 int eapol_size = wpa_bufs[salt_pos].eapol_size;
856 for (eapol_left = eapol_size, eapol_off = 0; eapol_left >= 56; eapol_left -= 64, eapol_off += 16)
858 w0[0] = wpa_bufs[salt_pos].eapol[eapol_off + 0];
859 w0[1] = wpa_bufs[salt_pos].eapol[eapol_off + 1];
860 w0[2] = wpa_bufs[salt_pos].eapol[eapol_off + 2];
861 w0[3] = wpa_bufs[salt_pos].eapol[eapol_off + 3];
862 w1[0] = wpa_bufs[salt_pos].eapol[eapol_off + 4];
863 w1[1] = wpa_bufs[salt_pos].eapol[eapol_off + 5];
864 w1[2] = wpa_bufs[salt_pos].eapol[eapol_off + 6];
865 w1[3] = wpa_bufs[salt_pos].eapol[eapol_off + 7];
866 w2[0] = wpa_bufs[salt_pos].eapol[eapol_off + 8];
867 w2[1] = wpa_bufs[salt_pos].eapol[eapol_off + 9];
868 w2[2] = wpa_bufs[salt_pos].eapol[eapol_off + 10];
869 w2[3] = wpa_bufs[salt_pos].eapol[eapol_off + 11];
870 w3[0] = wpa_bufs[salt_pos].eapol[eapol_off + 12];
871 w3[1] = wpa_bufs[salt_pos].eapol[eapol_off + 13];
872 w3[2] = wpa_bufs[salt_pos].eapol[eapol_off + 14];
873 w3[3] = wpa_bufs[salt_pos].eapol[eapol_off + 15];
875 sha1_transform (w0, w1, w2, w3, ipad);
878 w0[0] = wpa_bufs[salt_pos].eapol[eapol_off + 0];
879 w0[1] = wpa_bufs[salt_pos].eapol[eapol_off + 1];
880 w0[2] = wpa_bufs[salt_pos].eapol[eapol_off + 2];
881 w0[3] = wpa_bufs[salt_pos].eapol[eapol_off + 3];
882 w1[0] = wpa_bufs[salt_pos].eapol[eapol_off + 4];
883 w1[1] = wpa_bufs[salt_pos].eapol[eapol_off + 5];
884 w1[2] = wpa_bufs[salt_pos].eapol[eapol_off + 6];
885 w1[3] = wpa_bufs[salt_pos].eapol[eapol_off + 7];
886 w2[0] = wpa_bufs[salt_pos].eapol[eapol_off + 8];
887 w2[1] = wpa_bufs[salt_pos].eapol[eapol_off + 9];
888 w2[2] = wpa_bufs[salt_pos].eapol[eapol_off + 10];
889 w2[3] = wpa_bufs[salt_pos].eapol[eapol_off + 11];
890 w3[0] = wpa_bufs[salt_pos].eapol[eapol_off + 12];
891 w3[1] = wpa_bufs[salt_pos].eapol[eapol_off + 13];
893 w3[3] = (64 + eapol_size) * 8;
897 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, digest2);
905 const u32 r0 = digest2[DGST_R0];
906 const u32 r1 = digest2[DGST_R1];
907 const u32 r2 = digest2[DGST_R2];
908 const u32 r3 = digest2[DGST_R3];