2 * Author......: Jens Steube <jens.steube@gmail.com>
8 //too much register pressure
9 //#define NEW_SIMD_CODE
11 #include "inc_hash_constants.h"
12 #include "inc_vendor.cl"
19 #include "inc_hash_functions.cl"
20 #include "inc_types.cl"
21 #include "inc_common.cl"
24 #include "inc_simd.cl"
34 void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j)
39 rc4_key->S[i] = rc4_key->S[j];
43 void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
48 __local u32 *ptr = (__local u32 *) rc4_key->S;
53 for (u32 i = 0; i < 64; i++)
60 for (u32 i = 0; i < 16; i++)
68 j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
69 j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
70 j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
71 j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
75 j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
76 j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
77 j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
78 j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
82 j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
83 j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
84 j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
85 j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
89 j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
90 j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
91 j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
92 j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
96 u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
101 for (u32 k = 0; k < 4; k++)
110 swap (rc4_key, i, j);
112 idx = rc4_key->S[i] + rc4_key->S[j];
114 xor4 |= rc4_key->S[idx] << 0;
119 swap (rc4_key, i, j);
121 idx = rc4_key->S[i] + rc4_key->S[j];
123 xor4 |= rc4_key->S[idx] << 8;
128 swap (rc4_key, i, j);
130 idx = rc4_key->S[i] + rc4_key->S[j];
132 xor4 |= rc4_key->S[idx] << 16;
137 swap (rc4_key, i, j);
139 idx = rc4_key->S[i] + rc4_key->S[j];
141 xor4 |= rc4_key->S[idx] << 24;
143 out[k] = in[k] ^ xor4;
149 void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
173 MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
174 MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
175 MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
176 MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
177 MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
178 MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
179 MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
180 MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
181 MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
182 MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
183 MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
184 MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
185 MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
186 MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
187 MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
188 MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
190 MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
191 MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
192 MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
193 MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
194 MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
195 MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
196 MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
197 MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
198 MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
199 MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
200 MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
201 MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
202 MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
203 MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
204 MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
205 MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
207 MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
208 MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
209 MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
210 MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
211 MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
212 MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
213 MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
214 MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
215 MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
216 MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
217 MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
218 MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
219 MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
220 MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
221 MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
222 MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
224 MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
225 MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
226 MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
227 MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
228 MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
229 MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
230 MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
231 MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
232 MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
233 MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
234 MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
235 MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
236 MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
237 MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
238 MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
239 MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
247 void gen336 (u32 digest_pre[4], u32 salt_buf[4], u32 digest[4])
254 digest_t0[0] = digest_pre[0];
255 digest_t0[1] = digest_pre[1] & 0xff;
257 digest_t1[0] = digest_pre[0] << 8;
258 digest_t1[1] = digest_pre[0] >> 24 | digest_pre[1] << 8;
260 digest_t2[0] = digest_pre[0] << 16;
261 digest_t2[1] = digest_pre[0] >> 16 | digest_pre[1] << 16;
263 digest_t3[0] = digest_pre[0] << 24;
264 digest_t3[1] = digest_pre[0] >> 8 | digest_pre[1] << 24;
271 salt_buf_t0[0] = salt_buf[0];
272 salt_buf_t0[1] = salt_buf[1];
273 salt_buf_t0[2] = salt_buf[2];
274 salt_buf_t0[3] = salt_buf[3];
276 salt_buf_t1[0] = salt_buf[0] << 8;
277 salt_buf_t1[1] = salt_buf[0] >> 24 | salt_buf[1] << 8;
278 salt_buf_t1[2] = salt_buf[1] >> 24 | salt_buf[2] << 8;
279 salt_buf_t1[3] = salt_buf[2] >> 24 | salt_buf[3] << 8;
280 salt_buf_t1[4] = salt_buf[3] >> 24;
282 salt_buf_t2[0] = salt_buf[0] << 16;
283 salt_buf_t2[1] = salt_buf[0] >> 16 | salt_buf[1] << 16;
284 salt_buf_t2[2] = salt_buf[1] >> 16 | salt_buf[2] << 16;
285 salt_buf_t2[3] = salt_buf[2] >> 16 | salt_buf[3] << 16;
286 salt_buf_t2[4] = salt_buf[3] >> 16;
288 salt_buf_t3[0] = salt_buf[0] << 24;
289 salt_buf_t3[1] = salt_buf[0] >> 8 | salt_buf[1] << 24;
290 salt_buf_t3[2] = salt_buf[1] >> 8 | salt_buf[2] << 24;
291 salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24;
292 salt_buf_t3[4] = salt_buf[3] >> 8;
299 // generate the 16 * 21 buffer
319 w0_t[0] = digest_t0[0];
320 w0_t[1] = digest_t0[1];
323 w0_t[1] |= salt_buf_t1[0];
324 w0_t[2] = salt_buf_t1[1];
325 w0_t[3] = salt_buf_t1[2];
326 w1_t[0] = salt_buf_t1[3];
327 w1_t[1] = salt_buf_t1[4];
330 w1_t[1] |= digest_t1[0];
331 w1_t[2] = digest_t1[1];
334 w1_t[2] |= salt_buf_t2[0];
335 w1_t[3] = salt_buf_t2[1];
336 w2_t[0] = salt_buf_t2[2];
337 w2_t[1] = salt_buf_t2[3];
338 w2_t[2] = salt_buf_t2[4];
341 w2_t[2] |= digest_t2[0];
342 w2_t[3] = digest_t2[1];
345 w2_t[3] |= salt_buf_t3[0];
346 w3_t[0] = salt_buf_t3[1];
347 w3_t[1] = salt_buf_t3[2];
348 w3_t[2] = salt_buf_t3[3];
349 w3_t[3] = salt_buf_t3[4];
353 w3_t[3] |= digest_t3[0];
355 md5_transform (w0_t, w1_t, w2_t, w3_t, digest);
375 w0_t[0] = digest_t3[1];
378 w0_t[1] = salt_buf_t0[0];
379 w0_t[2] = salt_buf_t0[1];
380 w0_t[3] = salt_buf_t0[2];
381 w1_t[0] = salt_buf_t0[3];
384 w1_t[1] = digest_t0[0];
385 w1_t[2] = digest_t0[1];
388 w1_t[2] |= salt_buf_t1[0];
389 w1_t[3] = salt_buf_t1[1];
390 w2_t[0] = salt_buf_t1[2];
391 w2_t[1] = salt_buf_t1[3];
392 w2_t[2] = salt_buf_t1[4];
395 w2_t[2] |= digest_t1[0];
396 w2_t[3] = digest_t1[1];
399 w2_t[3] |= salt_buf_t2[0];
400 w3_t[0] = salt_buf_t2[1];
401 w3_t[1] = salt_buf_t2[2];
402 w3_t[2] = salt_buf_t2[3];
403 w3_t[3] = salt_buf_t2[4];
406 w3_t[3] |= digest_t2[0];
408 md5_transform (w0_t, w1_t, w2_t, w3_t, digest);
428 w0_t[0] = digest_t2[1];
431 w0_t[0] |= salt_buf_t3[0];
432 w0_t[1] = salt_buf_t3[1];
433 w0_t[2] = salt_buf_t3[2];
434 w0_t[3] = salt_buf_t3[3];
435 w1_t[0] = salt_buf_t3[4];
438 w1_t[0] |= digest_t3[0];
439 w1_t[1] = digest_t3[1];
442 w1_t[2] = salt_buf_t0[0];
443 w1_t[3] = salt_buf_t0[1];
444 w2_t[0] = salt_buf_t0[2];
445 w2_t[1] = salt_buf_t0[3];
448 w2_t[2] = digest_t0[0];
449 w2_t[3] = digest_t0[1];
452 w2_t[3] |= salt_buf_t1[0];
453 w3_t[0] = salt_buf_t1[1];
454 w3_t[1] = salt_buf_t1[2];
455 w3_t[2] = salt_buf_t1[3];
456 w3_t[3] = salt_buf_t1[4];
459 w3_t[3] |= digest_t1[0];
461 md5_transform (w0_t, w1_t, w2_t, w3_t, digest);
481 w0_t[0] = digest_t1[1];
484 w0_t[0] |= salt_buf_t2[0];
485 w0_t[1] = salt_buf_t2[1];
486 w0_t[2] = salt_buf_t2[2];
487 w0_t[3] = salt_buf_t2[3];
488 w1_t[0] = salt_buf_t2[4];
491 w1_t[0] |= digest_t2[0];
492 w1_t[1] = digest_t2[1];
495 w1_t[1] |= salt_buf_t3[0];
496 w1_t[2] = salt_buf_t3[1];
497 w1_t[3] = salt_buf_t3[2];
498 w2_t[0] = salt_buf_t3[3];
499 w2_t[1] = salt_buf_t3[4];
502 w2_t[1] |= digest_t3[0];
503 w2_t[2] = digest_t3[1];
506 w2_t[3] = salt_buf_t0[0];
507 w3_t[0] = salt_buf_t0[1];
508 w3_t[1] = salt_buf_t0[2];
509 w3_t[2] = salt_buf_t0[3];
512 w3_t[3] = digest_t0[0];
514 md5_transform (w0_t, w1_t, w2_t, w3_t, digest);
534 w0_t[0] = digest_t0[1];
537 w0_t[0] |= salt_buf_t1[0];
538 w0_t[1] = salt_buf_t1[1];
539 w0_t[2] = salt_buf_t1[2];
540 w0_t[3] = salt_buf_t1[3];
541 w1_t[0] = salt_buf_t1[4];
544 w1_t[0] |= digest_t1[0];
545 w1_t[1] = digest_t1[1];
548 w1_t[1] |= salt_buf_t2[0];
549 w1_t[2] = salt_buf_t2[1];
550 w1_t[3] = salt_buf_t2[2];
551 w2_t[0] = salt_buf_t2[3];
552 w2_t[1] = salt_buf_t2[4];
555 w2_t[1] |= digest_t2[0];
556 w2_t[2] = digest_t2[1];
559 w2_t[2] |= salt_buf_t3[0];
560 w2_t[3] = salt_buf_t3[1];
561 w3_t[0] = salt_buf_t3[2];
562 w3_t[1] = salt_buf_t3[3];
563 w3_t[2] = salt_buf_t3[4];
566 w3_t[2] |= digest_t3[0];
567 w3_t[3] = digest_t3[1];
569 md5_transform (w0_t, w1_t, w2_t, w3_t, digest);
571 w0_t[0] = salt_buf_t0[0];
572 w0_t[1] = salt_buf_t0[1];
573 w0_t[2] = salt_buf_t0[2];
574 w0_t[3] = salt_buf_t0[3];
585 w3_t[2] = 21 * 16 * 8;
588 md5_transform (w0_t, w1_t, w2_t, w3_t, digest);
591 __kernel void m09700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
597 const u32 lid = get_local_id (0);
603 const u32 gid = get_global_id (0);
605 if (gid >= gid_max) return;
610 pw_buf0[0] = pws[gid].i[ 0];
611 pw_buf0[1] = pws[gid].i[ 1];
612 pw_buf0[2] = pws[gid].i[ 2];
613 pw_buf0[3] = pws[gid].i[ 3];
614 pw_buf1[0] = pws[gid].i[ 4];
615 pw_buf1[1] = pws[gid].i[ 5];
616 pw_buf1[2] = pws[gid].i[ 6];
617 pw_buf1[3] = pws[gid].i[ 7];
619 const u32 pw_len = pws[gid].pw_len;
625 __local RC4_KEY rc4_keys[64];
627 __local RC4_KEY *rc4_key = &rc4_keys[lid];
635 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
636 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
637 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
638 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
644 const u32 version = oldoffice01_bufs[salt_pos].version;
646 u32 encryptedVerifier[4];
648 encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0];
649 encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1];
650 encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2];
651 encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3];
657 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
664 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
666 append_0x80_2x4_VV (w0, w1, out_len);
672 make_unicode (w1, w2, w3);
673 make_unicode (w0, w0, w1);
675 w3[2] = out_len * 8 * 2;
680 digest_pre[0] = MD5M_A;
681 digest_pre[1] = MD5M_B;
682 digest_pre[2] = MD5M_C;
683 digest_pre[3] = MD5M_D;
685 md5_transform (w0, w1, w2, w3, digest_pre);
687 digest_pre[0] &= 0xffffffff;
688 digest_pre[1] &= 0x000000ff;
689 digest_pre[2] &= 0x00000000;
690 digest_pre[3] &= 0x00000000;
699 gen336 (digest_pre, salt_buf, digest);
701 // now the 40 bit input for the MD5 which then will generate the RC4 key, so it's precomputable!
704 w0[1] = digest[1] & 0xff;
725 md5_transform (w0, w1, w2, w3, digest);
736 rc4_init_16 (rc4_key, key);
740 u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
764 md5_transform (w0, w1, w2, w3, digest);
766 rc4_next_16 (rc4_key, 16, j, digest, out);
768 COMPARE_M_SIMD (out[0], out[1], out[2], out[3]);
772 __kernel void m09700_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
776 __kernel void m09700_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
780 __kernel void m09700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
786 const u32 lid = get_local_id (0);
792 const u32 gid = get_global_id (0);
794 if (gid >= gid_max) return;
799 pw_buf0[0] = pws[gid].i[ 0];
800 pw_buf0[1] = pws[gid].i[ 1];
801 pw_buf0[2] = pws[gid].i[ 2];
802 pw_buf0[3] = pws[gid].i[ 3];
803 pw_buf1[0] = pws[gid].i[ 4];
804 pw_buf1[1] = pws[gid].i[ 5];
805 pw_buf1[2] = pws[gid].i[ 6];
806 pw_buf1[3] = pws[gid].i[ 7];
808 const u32 pw_len = pws[gid].pw_len;
814 __local RC4_KEY rc4_keys[64];
816 __local RC4_KEY *rc4_key = &rc4_keys[lid];
824 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
825 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
826 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
827 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
833 const u32 version = oldoffice01_bufs[salt_pos].version;
835 u32 encryptedVerifier[4];
837 encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0];
838 encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1];
839 encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2];
840 encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3];
846 const u32 search[4] =
848 digests_buf[digests_offset].digest_buf[DGST_R0],
849 digests_buf[digests_offset].digest_buf[DGST_R1],
850 digests_buf[digests_offset].digest_buf[DGST_R2],
851 digests_buf[digests_offset].digest_buf[DGST_R3]
858 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
865 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
867 append_0x80_2x4_VV (w0, w1, out_len);
873 make_unicode (w1, w2, w3);
874 make_unicode (w0, w0, w1);
876 w3[2] = out_len * 8 * 2;
881 digest_pre[0] = MD5M_A;
882 digest_pre[1] = MD5M_B;
883 digest_pre[2] = MD5M_C;
884 digest_pre[3] = MD5M_D;
886 md5_transform (w0, w1, w2, w3, digest_pre);
888 digest_pre[0] &= 0xffffffff;
889 digest_pre[1] &= 0x000000ff;
890 digest_pre[2] &= 0x00000000;
891 digest_pre[3] &= 0x00000000;
900 gen336 (digest_pre, salt_buf, digest);
902 // now the 40 bit input for the MD5 which then will generate the RC4 key, so it's precomputable!
905 w0[1] = digest[1] & 0xff;
926 md5_transform (w0, w1, w2, w3, digest);
937 rc4_init_16 (rc4_key, key);
941 u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
965 md5_transform (w0, w1, w2, w3, digest);
967 rc4_next_16 (rc4_key, 16, j, digest, out);
969 COMPARE_S_SIMD (out[0], out[1], out[2], out[3]);
973 __kernel void m09700_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
977 __kernel void m09700_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)