2 * Author......: Jens Steube <jens.steube@gmail.com>
8 //too much register pressure
9 //#define NEW_SIMD_CODE
11 #include "include/constants.h"
12 #include "include/kernel_vendor.h"
19 #include "include/kernel_functions.c"
20 #include "OpenCL/types_ocl.c"
21 #include "OpenCL/common.c"
22 #include "OpenCL/simd.c"
24 static void memcat64c_be (u32x block[16], const u32 offset, u32x carry[16])
26 const u32 mod = offset & 3;
27 const u32 div = offset / 4;
48 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
50 tmp00 = __byte_perm (carry[ 0], 0, selector);
51 tmp01 = __byte_perm (carry[ 1], carry[ 0], selector);
52 tmp02 = __byte_perm (carry[ 2], carry[ 1], selector);
53 tmp03 = __byte_perm (carry[ 3], carry[ 2], selector);
54 tmp04 = __byte_perm (carry[ 4], carry[ 3], selector);
55 tmp05 = __byte_perm (carry[ 5], carry[ 4], selector);
56 tmp06 = __byte_perm (carry[ 6], carry[ 5], selector);
57 tmp07 = __byte_perm (carry[ 7], carry[ 6], selector);
58 tmp08 = __byte_perm (carry[ 8], carry[ 7], selector);
59 tmp09 = __byte_perm (carry[ 9], carry[ 8], selector);
60 tmp10 = __byte_perm (carry[10], carry[ 9], selector);
61 tmp11 = __byte_perm (carry[11], carry[10], selector);
62 tmp12 = __byte_perm (carry[12], carry[11], selector);
63 tmp13 = __byte_perm (carry[13], carry[12], selector);
64 tmp14 = __byte_perm (carry[14], carry[13], selector);
65 tmp15 = __byte_perm (carry[15], carry[14], selector);
66 tmp16 = __byte_perm ( 0, carry[15], selector);
69 #if defined IS_AMD || defined IS_GENERIC
70 tmp00 = amd_bytealign ( 0, carry[ 0], offset);
71 tmp01 = amd_bytealign (carry[ 0], carry[ 1], offset);
72 tmp02 = amd_bytealign (carry[ 1], carry[ 2], offset);
73 tmp03 = amd_bytealign (carry[ 2], carry[ 3], offset);
74 tmp04 = amd_bytealign (carry[ 3], carry[ 4], offset);
75 tmp05 = amd_bytealign (carry[ 4], carry[ 5], offset);
76 tmp06 = amd_bytealign (carry[ 5], carry[ 6], offset);
77 tmp07 = amd_bytealign (carry[ 6], carry[ 7], offset);
78 tmp08 = amd_bytealign (carry[ 7], carry[ 8], offset);
79 tmp09 = amd_bytealign (carry[ 8], carry[ 9], offset);
80 tmp10 = amd_bytealign (carry[ 9], carry[10], offset);
81 tmp11 = amd_bytealign (carry[10], carry[11], offset);
82 tmp12 = amd_bytealign (carry[11], carry[12], offset);
83 tmp13 = amd_bytealign (carry[12], carry[13], offset);
84 tmp14 = amd_bytealign (carry[13], carry[14], offset);
85 tmp15 = amd_bytealign (carry[14], carry[15], offset);
86 tmp16 = amd_bytealign (carry[15], 0, offset);
129 case 0: block[ 0] |= tmp00;
147 case 1: block[ 1] |= tmp00;
165 case 2: block[ 2] |= tmp00;
183 case 3: block[ 3] |= tmp00;
201 case 4: block[ 4] |= tmp00;
219 case 5: block[ 5] |= tmp00;
237 case 6: block[ 6] |= tmp00;
255 case 7: block[ 7] |= tmp00;
273 case 8: block[ 8] |= tmp00;
291 case 9: block[ 9] |= tmp00;
309 case 10: block[10] |= tmp00;
327 case 11: block[11] |= tmp00;
345 case 12: block[12] |= tmp00;
363 case 13: block[13] |= tmp00;
381 case 14: block[14] |= tmp00;
399 case 15: block[15] |= tmp00;
420 __kernel void m13500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pstoken_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
426 const u32 lid = get_local_id (0);
432 const u32 gid = get_global_id (0);
434 if (gid >= gid_max) return;
439 pw_buf0[0] = pws[gid].i[0];
440 pw_buf0[1] = pws[gid].i[1];
441 pw_buf0[2] = pws[gid].i[2];
442 pw_buf0[3] = pws[gid].i[3];
443 pw_buf1[0] = pws[gid].i[4];
444 pw_buf1[1] = pws[gid].i[5];
445 pw_buf1[2] = pws[gid].i[6];
446 pw_buf1[3] = pws[gid].i[7];
448 const u32 pw_l_len = pws[gid].pw_len;
454 const u32 pc_offset = esalt_bufs[salt_pos].pc_offset;
458 pc_digest[0] = esalt_bufs[salt_pos].pc_digest[0];
459 pc_digest[1] = esalt_bufs[salt_pos].pc_digest[1];
460 pc_digest[2] = esalt_bufs[salt_pos].pc_digest[2];
461 pc_digest[3] = esalt_bufs[salt_pos].pc_digest[3];
462 pc_digest[4] = esalt_bufs[salt_pos].pc_digest[4];
469 salt_buf0[0] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 0]);
470 salt_buf0[1] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 1]);
471 salt_buf0[2] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 2]);
472 salt_buf0[3] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 3]);
473 salt_buf1[0] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 4]);
474 salt_buf1[1] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 5]);
475 salt_buf1[2] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 6]);
476 salt_buf1[3] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 7]);
477 salt_buf2[0] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 8]);
478 salt_buf2[1] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 9]);
479 salt_buf2[2] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 10]);
480 salt_buf2[3] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 11]);
481 salt_buf3[0] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 12]);
482 salt_buf3[1] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 13]);
483 salt_buf3[2] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 14]);
484 salt_buf3[3] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 15]);
486 const u32 salt_len = esalt_bufs[salt_pos].salt_len;
492 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
494 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
496 const u32x pw_len = pw_l_len + pw_r_len;
499 * concat password candidate
502 u32x wordl0[4] = { 0 };
503 u32x wordl1[4] = { 0 };
504 u32x wordl2[4] = { 0 };
505 u32x wordl3[4] = { 0 };
507 wordl0[0] = pw_buf0[0];
508 wordl0[1] = pw_buf0[1];
509 wordl0[2] = pw_buf0[2];
510 wordl0[3] = pw_buf0[3];
511 wordl1[0] = pw_buf1[0];
512 wordl1[1] = pw_buf1[1];
513 wordl1[2] = pw_buf1[2];
514 wordl1[3] = pw_buf1[3];
516 u32x wordr0[4] = { 0 };
517 u32x wordr1[4] = { 0 };
518 u32x wordr2[4] = { 0 };
519 u32x wordr3[4] = { 0 };
521 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
522 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
523 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
524 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
525 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
526 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
527 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
528 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
530 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
532 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
536 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
544 w0[0] = wordl0[0] | wordr0[0];
545 w0[1] = wordl0[1] | wordr0[1];
546 w0[2] = wordl0[2] | wordr0[2];
547 w0[3] = wordl0[3] | wordr0[3];
548 w1[0] = wordl1[0] | wordr1[0];
549 w1[1] = wordl1[1] | wordr1[1];
550 w1[2] = wordl1[2] | wordr1[2];
551 w1[3] = wordl1[3] | wordr1[3];
553 append_0x80_2x4_VV (w0, w1, pw_len);
555 make_unicode (w1, w2, w3);
556 make_unicode (w0, w0, w1);
558 const u32x pw_len2 = pw_len * 2;
560 const u32x pw_salt_len = pw_len2 + salt_len;
563 * prepend salt -- can't stay outside the loop this time
568 carry[ 0] = swap32 (w0[0]);
569 carry[ 1] = swap32 (w0[1]);
570 carry[ 2] = swap32 (w0[2]);
571 carry[ 3] = swap32 (w0[3]);
572 carry[ 4] = swap32 (w1[0]);
573 carry[ 5] = swap32 (w1[1]);
574 carry[ 6] = swap32 (w1[2]);
575 carry[ 7] = swap32 (w1[3]);
576 carry[ 8] = swap32 (w2[0]);
577 carry[ 9] = swap32 (w2[1]);
578 carry[10] = swap32 (w2[2]);
579 carry[11] = swap32 (w2[3]);
580 carry[12] = swap32 (w3[0]);
581 carry[13] = swap32 (w3[1]);
582 carry[14] = swap32 (w3[2]);
583 carry[15] = swap32 (w3[3]);
587 w[ 0] = salt_buf0[0];
588 w[ 1] = salt_buf0[1];
589 w[ 2] = salt_buf0[2];
590 w[ 3] = salt_buf0[3];
591 w[ 4] = salt_buf1[0];
592 w[ 5] = salt_buf1[1];
593 w[ 6] = salt_buf1[2];
594 w[ 7] = salt_buf1[3];
595 w[ 8] = salt_buf2[0];
596 w[ 9] = salt_buf2[1];
597 w[10] = salt_buf2[2];
598 w[11] = salt_buf2[3];
599 w[12] = salt_buf3[0];
600 w[13] = salt_buf3[1];
601 w[14] = salt_buf3[2];
602 w[15] = salt_buf3[3];
604 memcat64c_be (w, salt_len & 0x3f, carry);
606 u32x a = pc_digest[0];
607 u32x b = pc_digest[1];
608 u32x c = pc_digest[2];
609 u32x d = pc_digest[3];
610 u32x e = pc_digest[4];
612 if (((salt_len & 0x3f) + pw_len2) >= 56)
634 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
635 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
636 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
637 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
638 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
639 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
640 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
641 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
642 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
643 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
644 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
645 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
646 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
647 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
648 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
649 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
650 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
651 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
652 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
653 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
658 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
659 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
660 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
661 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
662 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
663 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
664 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
665 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
666 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
667 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
668 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
669 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
670 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
671 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
672 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
673 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
674 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
675 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
676 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
677 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
682 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
683 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
684 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
685 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
686 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
687 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
688 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
689 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
690 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
691 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
692 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
693 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
694 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
695 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
696 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
697 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
698 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
699 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
700 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
701 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
706 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
707 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
708 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
709 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
710 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
711 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
712 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
713 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
714 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
715 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
716 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
717 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
718 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
719 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
720 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
721 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
722 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
723 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
724 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
725 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
770 u32x wf_t = pw_salt_len * 8;
781 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
782 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
783 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
784 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
785 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
786 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
787 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
788 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
789 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
790 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
791 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
792 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
793 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
794 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
795 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
796 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
797 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
798 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
799 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
800 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
805 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
806 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
807 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
808 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
809 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
810 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
811 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
812 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
813 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
814 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
815 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
816 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
817 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
818 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
819 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
820 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
821 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
822 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
823 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
824 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
829 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
830 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
831 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
832 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
833 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
834 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
835 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
836 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
837 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
838 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
839 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
840 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
841 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
842 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
843 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
844 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
845 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
846 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
847 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
848 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
853 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
854 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
855 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
856 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
857 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
858 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
859 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
860 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
861 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
862 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
863 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
864 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
865 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
866 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
867 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
868 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
869 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
870 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
871 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
872 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
880 COMPARE_M_SIMD (d, e, c, b);
884 __kernel void m13500_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pstoken_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
888 __kernel void m13500_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pstoken_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
892 __kernel void m13500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pstoken_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
898 const u32 lid = get_local_id (0);
904 const u32 gid = get_global_id (0);
906 if (gid >= gid_max) return;
911 pw_buf0[0] = pws[gid].i[0];
912 pw_buf0[1] = pws[gid].i[1];
913 pw_buf0[2] = pws[gid].i[2];
914 pw_buf0[3] = pws[gid].i[3];
915 pw_buf1[0] = pws[gid].i[4];
916 pw_buf1[1] = pws[gid].i[5];
917 pw_buf1[2] = pws[gid].i[6];
918 pw_buf1[3] = pws[gid].i[7];
920 const u32 pw_l_len = pws[gid].pw_len;
926 const u32 pc_offset = esalt_bufs[salt_pos].pc_offset;
930 pc_digest[0] = esalt_bufs[salt_pos].pc_digest[0];
931 pc_digest[1] = esalt_bufs[salt_pos].pc_digest[1];
932 pc_digest[2] = esalt_bufs[salt_pos].pc_digest[2];
933 pc_digest[3] = esalt_bufs[salt_pos].pc_digest[3];
934 pc_digest[4] = esalt_bufs[salt_pos].pc_digest[4];
941 salt_buf0[0] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 0]);
942 salt_buf0[1] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 1]);
943 salt_buf0[2] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 2]);
944 salt_buf0[3] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 3]);
945 salt_buf1[0] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 4]);
946 salt_buf1[1] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 5]);
947 salt_buf1[2] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 6]);
948 salt_buf1[3] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 7]);
949 salt_buf2[0] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 8]);
950 salt_buf2[1] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 9]);
951 salt_buf2[2] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 10]);
952 salt_buf2[3] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 11]);
953 salt_buf3[0] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 12]);
954 salt_buf3[1] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 13]);
955 salt_buf3[2] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 14]);
956 salt_buf3[3] = swap32_S (esalt_bufs[salt_pos].salt_buf[pc_offset + 15]);
958 const u32 salt_len = esalt_bufs[salt_pos].salt_len;
964 const u32 search[4] =
966 digests_buf[digests_offset].digest_buf[DGST_R0],
967 digests_buf[digests_offset].digest_buf[DGST_R1],
968 digests_buf[digests_offset].digest_buf[DGST_R2],
969 digests_buf[digests_offset].digest_buf[DGST_R3]
976 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
978 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
980 const u32x pw_len = pw_l_len + pw_r_len;
983 * concat password candidate
986 u32x wordl0[4] = { 0 };
987 u32x wordl1[4] = { 0 };
988 u32x wordl2[4] = { 0 };
989 u32x wordl3[4] = { 0 };
991 wordl0[0] = pw_buf0[0];
992 wordl0[1] = pw_buf0[1];
993 wordl0[2] = pw_buf0[2];
994 wordl0[3] = pw_buf0[3];
995 wordl1[0] = pw_buf1[0];
996 wordl1[1] = pw_buf1[1];
997 wordl1[2] = pw_buf1[2];
998 wordl1[3] = pw_buf1[3];
1000 u32x wordr0[4] = { 0 };
1001 u32x wordr1[4] = { 0 };
1002 u32x wordr2[4] = { 0 };
1003 u32x wordr3[4] = { 0 };
1005 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
1006 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
1007 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
1008 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
1009 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
1010 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
1011 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
1012 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
1014 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
1016 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
1020 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
1028 w0[0] = wordl0[0] | wordr0[0];
1029 w0[1] = wordl0[1] | wordr0[1];
1030 w0[2] = wordl0[2] | wordr0[2];
1031 w0[3] = wordl0[3] | wordr0[3];
1032 w1[0] = wordl1[0] | wordr1[0];
1033 w1[1] = wordl1[1] | wordr1[1];
1034 w1[2] = wordl1[2] | wordr1[2];
1035 w1[3] = wordl1[3] | wordr1[3];
1037 append_0x80_2x4_VV (w0, w1, pw_len);
1039 make_unicode (w1, w2, w3);
1040 make_unicode (w0, w0, w1);
1042 const u32x pw_len2 = pw_len * 2;
1044 const u32x pw_salt_len = pw_len2 + salt_len;
1047 * prepend salt -- can't stay outside the loop this time
1052 carry[ 0] = swap32 (w0[0]);
1053 carry[ 1] = swap32 (w0[1]);
1054 carry[ 2] = swap32 (w0[2]);
1055 carry[ 3] = swap32 (w0[3]);
1056 carry[ 4] = swap32 (w1[0]);
1057 carry[ 5] = swap32 (w1[1]);
1058 carry[ 6] = swap32 (w1[2]);
1059 carry[ 7] = swap32 (w1[3]);
1060 carry[ 8] = swap32 (w2[0]);
1061 carry[ 9] = swap32 (w2[1]);
1062 carry[10] = swap32 (w2[2]);
1063 carry[11] = swap32 (w2[3]);
1064 carry[12] = swap32 (w3[0]);
1065 carry[13] = swap32 (w3[1]);
1066 carry[14] = swap32 (w3[2]);
1067 carry[15] = swap32 (w3[3]);
1071 w[ 0] = salt_buf0[0];
1072 w[ 1] = salt_buf0[1];
1073 w[ 2] = salt_buf0[2];
1074 w[ 3] = salt_buf0[3];
1075 w[ 4] = salt_buf1[0];
1076 w[ 5] = salt_buf1[1];
1077 w[ 6] = salt_buf1[2];
1078 w[ 7] = salt_buf1[3];
1079 w[ 8] = salt_buf2[0];
1080 w[ 9] = salt_buf2[1];
1081 w[10] = salt_buf2[2];
1082 w[11] = salt_buf2[3];
1083 w[12] = salt_buf3[0];
1084 w[13] = salt_buf3[1];
1085 w[14] = salt_buf3[2];
1086 w[15] = salt_buf3[3];
1088 memcat64c_be (w, salt_len & 0x3f, carry);
1090 u32x a = pc_digest[0];
1091 u32x b = pc_digest[1];
1092 u32x c = pc_digest[2];
1093 u32x d = pc_digest[3];
1094 u32x e = pc_digest[4];
1096 if (((salt_len & 0x3f) + pw_len2) >= 56)
1118 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
1119 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
1120 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
1121 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
1122 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
1123 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
1124 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
1125 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
1126 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
1127 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
1128 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
1129 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
1130 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
1131 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
1132 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
1133 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
1134 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
1135 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
1136 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
1137 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
1142 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
1143 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
1144 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
1145 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
1146 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
1147 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
1148 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
1149 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
1150 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
1151 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
1152 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
1153 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
1154 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
1155 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
1156 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
1157 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
1158 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
1159 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
1160 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
1161 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
1166 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
1167 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
1168 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
1169 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
1170 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
1171 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
1172 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
1173 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
1174 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
1175 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
1176 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
1177 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
1178 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
1179 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
1180 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
1181 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
1182 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
1183 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
1184 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
1185 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
1190 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
1191 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
1192 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
1193 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
1194 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
1195 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
1196 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
1197 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
1198 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
1199 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
1200 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
1201 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
1202 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
1203 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
1204 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
1205 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
1206 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
1207 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
1208 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
1209 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
1254 u32x wf_t = pw_salt_len * 8;
1265 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
1266 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
1267 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
1268 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
1269 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
1270 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
1271 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
1272 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
1273 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
1274 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
1275 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
1276 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
1277 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
1278 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
1279 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
1280 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
1281 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
1282 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
1283 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
1284 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
1289 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
1290 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
1291 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
1292 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
1293 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
1294 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
1295 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
1296 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
1297 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
1298 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
1299 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
1300 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
1301 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
1302 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
1303 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
1304 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
1305 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
1306 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
1307 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
1308 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
1313 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
1314 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
1315 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
1316 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
1317 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
1318 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
1319 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
1320 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
1321 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
1322 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
1323 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
1324 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
1325 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
1326 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
1327 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
1328 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
1329 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
1330 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
1331 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
1332 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
1337 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
1338 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
1339 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
1340 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
1341 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
1342 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
1343 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
1344 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
1345 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
1346 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
1347 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
1348 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
1349 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
1350 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
1351 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
1352 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
1353 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
1354 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
1355 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
1356 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
1364 COMPARE_S_SIMD (d, e, c, b);
1368 __kernel void m13500_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pstoken_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1372 __kernel void m13500_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pstoken_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)