2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "inc_vendor.cl"
9 #include "inc_hash_constants.h"
10 #include "inc_hash_functions.cl"
11 #include "inc_types.cl"
12 #include "inc_common.cl"
14 #define COMPARE_S "inc_comp_single.cl"
15 #define COMPARE_M "inc_comp_multi.cl"
17 __constant u32 k_sha256[64] =
19 SHA256C00, SHA256C01, SHA256C02, SHA256C03,
20 SHA256C04, SHA256C05, SHA256C06, SHA256C07,
21 SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
22 SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
23 SHA256C10, SHA256C11, SHA256C12, SHA256C13,
24 SHA256C14, SHA256C15, SHA256C16, SHA256C17,
25 SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
26 SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
27 SHA256C20, SHA256C21, SHA256C22, SHA256C23,
28 SHA256C24, SHA256C25, SHA256C26, SHA256C27,
29 SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
30 SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
31 SHA256C30, SHA256C31, SHA256C32, SHA256C33,
32 SHA256C34, SHA256C35, SHA256C36, SHA256C37,
33 SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
34 SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
39 void sha256_transform (const u32 w[16], u32 digest[8])
50 u32 w0_t = swap32 (w[ 0]);
51 u32 w1_t = swap32 (w[ 1]);
52 u32 w2_t = swap32 (w[ 2]);
53 u32 w3_t = swap32 (w[ 3]);
54 u32 w4_t = swap32 (w[ 4]);
55 u32 w5_t = swap32 (w[ 5]);
56 u32 w6_t = swap32 (w[ 6]);
57 u32 w7_t = swap32 (w[ 7]);
58 u32 w8_t = swap32 (w[ 8]);
59 u32 w9_t = swap32 (w[ 9]);
60 u32 wa_t = swap32 (w[10]);
61 u32 wb_t = swap32 (w[11]);
62 u32 wc_t = swap32 (w[12]);
63 u32 wd_t = swap32 (w[13]);
64 u32 we_t = swap32 (w[14]);
65 u32 wf_t = swap32 (w[15]);
67 #define ROUND_EXPAND() \
69 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
70 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
71 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
72 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
73 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
74 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
75 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
76 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
77 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
78 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
79 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
80 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
81 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
82 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
83 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
84 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
87 #define ROUND_STEP(i) \
89 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
90 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
91 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
92 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
93 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
94 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
95 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
96 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
97 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
98 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
99 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
100 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
101 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
102 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
103 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
104 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
112 for (int i = 16; i < 64; i += 16)
114 ROUND_EXPAND (); ROUND_STEP (i);
127 void sha256_transform_no14 (const u32 w[16], u32 digest[8])
148 sha256_transform (w_t, digest);
151 void init_ctx (u32 digest[8])
153 digest[0] = SHA256M_A;
154 digest[1] = SHA256M_B;
155 digest[2] = SHA256M_C;
156 digest[3] = SHA256M_D;
157 digest[4] = SHA256M_E;
158 digest[5] = SHA256M_F;
159 digest[6] = SHA256M_G;
160 digest[7] = SHA256M_H;
163 void bzero16 (u32 block[16])
183 void bswap8 (u32 block[16])
185 block[ 0] = swap32 (block[ 0]);
186 block[ 1] = swap32 (block[ 1]);
187 block[ 2] = swap32 (block[ 2]);
188 block[ 3] = swap32 (block[ 3]);
189 block[ 4] = swap32 (block[ 4]);
190 block[ 5] = swap32 (block[ 5]);
191 block[ 6] = swap32 (block[ 6]);
192 block[ 7] = swap32 (block[ 7]);
195 u32 memcat16 (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len)
197 const u32 mod = block_len & 3;
198 const u32 div = block_len / 4;
206 #if defined IS_AMD || defined IS_GENERIC
207 const int offset_minus_4 = 4 - block_len;
209 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
210 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
211 tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
212 tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
213 tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
226 const int offset_minus_4 = 4 - (block_len & 3);
228 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
230 tmp0 = __byte_perm ( 0, append[0], selector);
231 tmp1 = __byte_perm (append[0], append[1], selector);
232 tmp2 = __byte_perm (append[1], append[2], selector);
233 tmp3 = __byte_perm (append[2], append[3], selector);
234 tmp4 = __byte_perm (append[3], 0, selector);
239 case 0: block[ 0] |= tmp0;
245 case 1: block[ 1] |= tmp0;
251 case 2: block[ 2] |= tmp0;
257 case 3: block[ 3] |= tmp0;
263 case 4: block[ 4] |= tmp0;
269 case 5: block[ 5] |= tmp0;
275 case 6: block[ 6] |= tmp0;
281 case 7: block[ 7] |= tmp0;
287 case 8: block[ 8] |= tmp0;
293 case 9: block[ 9] |= tmp0;
299 case 10: block[10] |= tmp0;
305 case 11: block[11] |= tmp0;
311 case 12: block[12] |= tmp0;
316 case 13: block[13] |= tmp0;
320 case 14: block[14] |= tmp0;
323 case 15: block[15] |= tmp0;
327 u32 new_len = block_len + append_len;
332 u32 memcat16c (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len, u32 digest[8])
334 const u32 mod = block_len & 3;
335 const u32 div = block_len / 4;
343 #if defined IS_AMD || defined IS_GENERIC
344 const int offset_minus_4 = 4 - block_len;
346 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
347 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
348 tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
349 tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
350 tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
363 const int offset_minus_4 = 4 - (block_len & 3);
365 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
367 tmp0 = __byte_perm ( 0, append[0], selector);
368 tmp1 = __byte_perm (append[0], append[1], selector);
369 tmp2 = __byte_perm (append[1], append[2], selector);
370 tmp3 = __byte_perm (append[2], append[3], selector);
371 tmp4 = __byte_perm (append[3], 0, selector);
374 u32 carry[4] = { 0, 0, 0, 0 };
378 case 0: block[ 0] |= tmp0;
384 case 1: block[ 1] |= tmp0;
390 case 2: block[ 2] |= tmp0;
396 case 3: block[ 3] |= tmp0;
402 case 4: block[ 4] |= tmp0;
408 case 5: block[ 5] |= tmp0;
414 case 6: block[ 6] |= tmp0;
420 case 7: block[ 7] |= tmp0;
426 case 8: block[ 8] |= tmp0;
432 case 9: block[ 9] |= tmp0;
438 case 10: block[10] |= tmp0;
444 case 11: block[11] |= tmp0;
450 case 12: block[12] |= tmp0;
456 case 13: block[13] |= tmp0;
462 case 14: block[14] |= tmp0;
468 case 15: block[15] |= tmp0;
476 u32 new_len = block_len + append_len;
482 sha256_transform (block, digest);
495 u32 memcat20 (u32 block[20], const u32 block_len, const u32 append[4], const u32 append_len)
497 const u32 mod = block_len & 3;
498 const u32 div = block_len / 4;
506 #if defined IS_AMD || defined IS_GENERIC
507 const int offset_minus_4 = 4 - block_len;
509 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
510 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
511 tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
512 tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
513 tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
526 const int offset_minus_4 = 4 - (block_len & 3);
528 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
530 tmp0 = __byte_perm ( 0, append[0], selector);
531 tmp1 = __byte_perm (append[0], append[1], selector);
532 tmp2 = __byte_perm (append[1], append[2], selector);
533 tmp3 = __byte_perm (append[2], append[3], selector);
534 tmp4 = __byte_perm (append[3], 0, selector);
539 case 0: block[ 0] |= tmp0;
545 case 1: block[ 1] |= tmp0;
551 case 2: block[ 2] |= tmp0;
557 case 3: block[ 3] |= tmp0;
563 case 4: block[ 4] |= tmp0;
569 case 5: block[ 5] |= tmp0;
575 case 6: block[ 6] |= tmp0;
581 case 7: block[ 7] |= tmp0;
587 case 8: block[ 8] |= tmp0;
593 case 9: block[ 9] |= tmp0;
599 case 10: block[10] |= tmp0;
605 case 11: block[11] |= tmp0;
611 case 12: block[12] |= tmp0;
617 case 13: block[13] |= tmp0;
623 case 14: block[14] |= tmp0;
629 case 15: block[15] |= tmp0;
637 return block_len + append_len;
640 u32 memcat20_x80 (u32 block[20], const u32 block_len, const u32 append[4], const u32 append_len)
642 const u32 mod = block_len & 3;
643 const u32 div = block_len / 4;
651 #if defined IS_AMD || defined IS_GENERIC
652 const int offset_minus_4 = 4 - block_len;
654 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
655 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
656 tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
657 tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
658 tmp4 = amd_bytealign ( 0x80, append[3], offset_minus_4);
671 const int offset_minus_4 = 4 - (block_len & 3);
673 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
675 tmp0 = __byte_perm ( 0, append[0], selector);
676 tmp1 = __byte_perm (append[0], append[1], selector);
677 tmp2 = __byte_perm (append[1], append[2], selector);
678 tmp3 = __byte_perm (append[2], append[3], selector);
679 tmp4 = __byte_perm (append[3], 0x80, selector);
684 case 0: block[ 0] |= tmp0;
690 case 1: block[ 1] |= tmp0;
696 case 2: block[ 2] |= tmp0;
702 case 3: block[ 3] |= tmp0;
708 case 4: block[ 4] |= tmp0;
714 case 5: block[ 5] |= tmp0;
720 case 6: block[ 6] |= tmp0;
726 case 7: block[ 7] |= tmp0;
732 case 8: block[ 8] |= tmp0;
738 case 9: block[ 9] |= tmp0;
744 case 10: block[10] |= tmp0;
750 case 11: block[11] |= tmp0;
756 case 12: block[12] |= tmp0;
762 case 13: block[13] |= tmp0;
768 case 14: block[14] |= tmp0;
774 case 15: block[15] |= tmp0;
782 return block_len + append_len;
785 __kernel void m07400_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
791 const u32 gid = get_global_id (0);
793 if (gid >= gid_max) return;
797 w0[0] = pws[gid].i[0];
798 w0[1] = pws[gid].i[1];
799 w0[2] = pws[gid].i[2];
800 w0[3] = pws[gid].i[3];
802 const u32 pw_len = pws[gid].pw_len;
810 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
811 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
812 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
813 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
815 u32 salt_len = salt_bufs[salt_pos].salt_len;
821 u32 block_len; // never reaches > 64
822 u32 transform_len; // required for w[15] = len * 8
830 /* Prepare for the real work. */
838 block_len = memcat16 (block, block_len, w0, pw_len);
842 block_len = memcat16 (block, block_len, salt_buf, salt_len);
846 block_len = memcat16 (block, block_len, w0, pw_len);
848 append_0x80_1x16 (block, block_len);
850 block[15] = swap32 (block_len * 8);
852 init_ctx (alt_result);
854 sha256_transform (block, alt_result);
862 u32 alt_result_tmp[8];
864 alt_result_tmp[0] = alt_result[0];
865 alt_result_tmp[1] = alt_result[1];
866 alt_result_tmp[2] = alt_result[2];
867 alt_result_tmp[3] = alt_result[3];
868 alt_result_tmp[4] = 0;
869 alt_result_tmp[5] = 0;
870 alt_result_tmp[6] = 0;
871 alt_result_tmp[7] = 0;
873 truncate_block (alt_result_tmp, pw_len);
875 /* Add the key string. */
877 block_len = memcat16 (block, block_len, w0, pw_len);
879 /* The last part is the salt string. This must be at most 8
880 characters and it ends at the first `$' character (for
881 compatibility with existing implementations). */
883 block_len = memcat16 (block, block_len, salt_buf, salt_len);
885 /* Now get result of this (32 bytes) and add it to the other
888 block_len = memcat16 (block, block_len, alt_result_tmp, pw_len);
890 transform_len = block_len;
892 /* Take the binary representation of the length of the key and for every
893 1 add the alternate sum, for every 0 the key. */
895 alt_result_tmp[0] = alt_result[0];
896 alt_result_tmp[1] = alt_result[1];
897 alt_result_tmp[2] = alt_result[2];
898 alt_result_tmp[3] = alt_result[3];
899 alt_result_tmp[4] = alt_result[4];
900 alt_result_tmp[5] = alt_result[5];
901 alt_result_tmp[6] = alt_result[6];
902 alt_result_tmp[7] = alt_result[7];
904 init_ctx (alt_result);
906 for (u32 j = pw_len; j; j >>= 1)
910 block_len = memcat16c (block, block_len, &alt_result_tmp[0], 16, alt_result);
911 block_len = memcat16c (block, block_len, &alt_result_tmp[4], 16, alt_result);
917 block_len = memcat16c (block, block_len, w0, pw_len, alt_result);
919 transform_len += pw_len;
923 append_0x80_1x16 (block, block_len);
927 sha256_transform (block, alt_result);
932 block[15] = swap32 (transform_len * 8);
934 sha256_transform (block, alt_result);
938 tmps[gid].alt_result[0] = alt_result[0];
939 tmps[gid].alt_result[1] = alt_result[1];
940 tmps[gid].alt_result[2] = alt_result[2];
941 tmps[gid].alt_result[3] = alt_result[3];
942 tmps[gid].alt_result[4] = alt_result[4];
943 tmps[gid].alt_result[5] = alt_result[5];
944 tmps[gid].alt_result[6] = alt_result[6];
945 tmps[gid].alt_result[7] = alt_result[7];
947 /* Start computation of P byte sequence. */
955 /* For every character in the password add the entire password. */
959 for (u32 j = 0; j < pw_len; j++)
961 block_len = memcat16c (block, block_len, w0, pw_len, p_bytes);
963 transform_len += pw_len;
966 /* Finish the digest. */
968 append_0x80_1x16 (block, block_len);
972 sha256_transform (block, p_bytes);
977 block[15] = swap32 (transform_len * 8);
979 sha256_transform (block, p_bytes);
983 truncate_block (p_bytes, pw_len);
985 tmps[gid].p_bytes[0] = p_bytes[0];
986 tmps[gid].p_bytes[1] = p_bytes[1];
987 tmps[gid].p_bytes[2] = p_bytes[2];
988 tmps[gid].p_bytes[3] = p_bytes[3];
990 /* Start computation of S byte sequence. */
998 /* For every character in the password add the entire password. */
1002 for (u32 j = 0; j < 16 + (alt_result[0] & 0xff); j++)
1004 block_len = memcat16c (block, block_len, salt_buf, salt_len, s_bytes);
1006 transform_len += salt_len;
1009 /* Finish the digest. */
1011 append_0x80_1x16 (block, block_len);
1013 if (block_len >= 56)
1015 sha256_transform (block, s_bytes);
1020 block[15] = swap32 (transform_len * 8);
1022 sha256_transform (block, s_bytes);
1026 truncate_block (s_bytes, salt_len);
1028 tmps[gid].s_bytes[0] = s_bytes[0];
1029 tmps[gid].s_bytes[1] = s_bytes[1];
1030 tmps[gid].s_bytes[2] = s_bytes[2];
1031 tmps[gid].s_bytes[3] = s_bytes[3];
1034 __kernel void m07400_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1040 const u32 gid = get_global_id (0);
1042 if (gid >= gid_max) return;
1044 const u32 pw_len = pws[gid].pw_len;
1052 p_bytes[0] = tmps[gid].p_bytes[0];
1053 p_bytes[1] = tmps[gid].p_bytes[1];
1054 p_bytes[2] = tmps[gid].p_bytes[2];
1055 p_bytes[3] = tmps[gid].p_bytes[3];
1059 p_bytes_x80[0] = tmps[gid].p_bytes[0];
1060 p_bytes_x80[1] = tmps[gid].p_bytes[1];
1061 p_bytes_x80[2] = tmps[gid].p_bytes[2];
1062 p_bytes_x80[3] = tmps[gid].p_bytes[3];
1064 append_0x80_1x4 (p_bytes_x80, pw_len);
1068 s_bytes[0] = tmps[gid].s_bytes[0];
1069 s_bytes[1] = tmps[gid].s_bytes[1];
1070 s_bytes[2] = tmps[gid].s_bytes[2];
1071 s_bytes[3] = tmps[gid].s_bytes[3];
1075 alt_result[0] = tmps[gid].alt_result[0];
1076 alt_result[1] = tmps[gid].alt_result[1];
1077 alt_result[2] = tmps[gid].alt_result[2];
1078 alt_result[3] = tmps[gid].alt_result[3];
1079 alt_result[4] = tmps[gid].alt_result[4];
1080 alt_result[5] = tmps[gid].alt_result[5];
1081 alt_result[6] = tmps[gid].alt_result[6];
1082 alt_result[7] = tmps[gid].alt_result[7];
1084 u32 salt_len = salt_bufs[salt_pos].salt_len;
1086 /* Repeatedly run the collected hash value through SHA256 to burn
1089 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1097 bzero16 (&block[ 0]);
1098 bzero16 (&block[16]);
1102 const u32 j1 = (j & 1) ? 1 : 0;
1103 const u32 j3 = (j % 3) ? 1 : 0;
1104 const u32 j7 = (j % 7) ? 1 : 0;
1108 block[0] = p_bytes[0];
1109 block[1] = p_bytes[1];
1110 block[2] = p_bytes[2];
1111 block[3] = p_bytes[3];
1117 block[0] = alt_result[0];
1118 block[1] = alt_result[1];
1119 block[2] = alt_result[2];
1120 block[3] = alt_result[3];
1121 block[4] = alt_result[4];
1122 block[5] = alt_result[5];
1123 block[6] = alt_result[6];
1124 block[7] = alt_result[7];
1131 block_len = memcat20 (block, block_len, s_bytes, salt_len);
1136 block_len = memcat20 (block, block_len, p_bytes, pw_len);
1141 block_len = memcat20 (block, block_len, &alt_result[0], 16);
1142 block_len = memcat20_x80 (block, block_len, &alt_result[4], 16);
1146 block_len = memcat20 (block, block_len, p_bytes_x80, pw_len);
1149 if (block_len >= 56)
1151 sha256_transform (block, tmp);
1153 block[ 0] = block[16];
1154 block[ 1] = block[17];
1155 block[ 2] = block[18];
1156 block[ 3] = block[19];
1171 block[15] = swap32 (block_len * 8);
1173 sha256_transform_no14 (block, tmp);
1177 alt_result[0] = tmp[0];
1178 alt_result[1] = tmp[1];
1179 alt_result[2] = tmp[2];
1180 alt_result[3] = tmp[3];
1181 alt_result[4] = tmp[4];
1182 alt_result[5] = tmp[5];
1183 alt_result[6] = tmp[6];
1184 alt_result[7] = tmp[7];
1187 tmps[gid].alt_result[0] = alt_result[0];
1188 tmps[gid].alt_result[1] = alt_result[1];
1189 tmps[gid].alt_result[2] = alt_result[2];
1190 tmps[gid].alt_result[3] = alt_result[3];
1191 tmps[gid].alt_result[4] = alt_result[4];
1192 tmps[gid].alt_result[5] = alt_result[5];
1193 tmps[gid].alt_result[6] = alt_result[6];
1194 tmps[gid].alt_result[7] = alt_result[7];
1197 __kernel void m07400_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1203 const u32 gid = get_global_id (0);
1205 if (gid >= gid_max) return;
1207 const u32 lid = get_local_id (0);
1209 const u32 r0 = tmps[gid].alt_result[0];
1210 const u32 r1 = tmps[gid].alt_result[1];
1211 const u32 r2 = tmps[gid].alt_result[2];
1212 const u32 r3 = tmps[gid].alt_result[3];
1221 // this is basically a much cleaner version, but apparently drops speeds by over 100% :(
1223 #define PUTCHAR32_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c)
1224 #define GETCHAR32_BE(a,p) ((u8 *)(a))[(p) ^ 3]
1234 void sha256_transform (const u32 w[16], u32 digest[8])
1262 #define ROUND_EXPAND() \
1264 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
1265 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
1266 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
1267 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
1268 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
1269 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
1270 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
1271 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
1272 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
1273 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
1274 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
1275 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
1276 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
1277 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
1278 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
1279 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
1282 #define ROUND_STEP(i) \
1284 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
1285 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
1286 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
1287 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
1288 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
1289 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
1290 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
1291 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
1292 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
1293 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
1294 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
1295 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
1296 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
1297 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
1298 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
1299 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
1307 for (int i = 16; i < 64; i += 16)
1309 ROUND_EXPAND (); ROUND_STEP (i);
1322 void sha256_init (sha256_ctx_t *sha256_ctx)
1324 sha256_ctx->state[0] = SHA256M_A;
1325 sha256_ctx->state[1] = SHA256M_B;
1326 sha256_ctx->state[2] = SHA256M_C;
1327 sha256_ctx->state[3] = SHA256M_D;
1328 sha256_ctx->state[4] = SHA256M_E;
1329 sha256_ctx->state[5] = SHA256M_F;
1330 sha256_ctx->state[6] = SHA256M_G;
1331 sha256_ctx->state[7] = SHA256M_H;
1333 sha256_ctx->len = 0;
1336 void sha256_update (sha256_ctx_t *sha256_ctx, const u32 *buf, int len)
1338 int pos = sha256_ctx->len & 0x3f;
1340 sha256_ctx->len += len;
1342 if ((pos + len) < 64)
1344 for (int i = 0; i < len; i++)
1346 PUTCHAR32_BE (sha256_ctx->buf, pos++, GETCHAR32_BE (buf, i));
1354 for (int i = 0; i < cnt; i++)
1356 PUTCHAR32_BE (sha256_ctx->buf, pos++, GETCHAR32_BE (buf, i));
1359 sha256_transform (sha256_ctx->buf, sha256_ctx->state);
1363 for (int i = 0; i < len; i++)
1365 PUTCHAR32_BE (sha256_ctx->buf, i, GETCHAR32_BE (buf, cnt + i));
1369 void sha256_final (sha256_ctx_t *sha256_ctx)
1371 int pos = sha256_ctx->len & 0x3f;
1373 for (int i = pos; i < 64; i++)
1375 PUTCHAR32_BE (sha256_ctx->buf, i, 0);
1378 PUTCHAR32_BE (sha256_ctx->buf, pos, 0x80);
1382 sha256_transform (sha256_ctx->buf, sha256_ctx->state);
1384 sha256_ctx->buf[ 0] = 0;
1385 sha256_ctx->buf[ 1] = 0;
1386 sha256_ctx->buf[ 2] = 0;
1387 sha256_ctx->buf[ 3] = 0;
1388 sha256_ctx->buf[ 4] = 0;
1389 sha256_ctx->buf[ 5] = 0;
1390 sha256_ctx->buf[ 6] = 0;
1391 sha256_ctx->buf[ 7] = 0;
1392 sha256_ctx->buf[ 8] = 0;
1393 sha256_ctx->buf[ 9] = 0;
1394 sha256_ctx->buf[10] = 0;
1395 sha256_ctx->buf[11] = 0;
1396 sha256_ctx->buf[12] = 0;
1397 sha256_ctx->buf[13] = 0;
1398 sha256_ctx->buf[14] = 0;
1399 sha256_ctx->buf[15] = 0;
1402 sha256_ctx->buf[15] = sha256_ctx->len * 8;
1404 sha256_transform (sha256_ctx->buf, sha256_ctx->state);
1407 __kernel void m07400_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1413 const u32 gid = get_global_id (0);
1415 if (gid >= gid_max) return;
1419 pw[0] = swap32 (pws[gid].i[0]);
1420 pw[1] = swap32 (pws[gid].i[1]);
1421 pw[2] = swap32 (pws[gid].i[2]);
1422 pw[3] = swap32 (pws[gid].i[3]);
1424 const u32 pw_len = pws[gid].pw_len;
1432 salt[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
1433 salt[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
1434 salt[2] = swap32 (salt_bufs[salt_pos].salt_buf[2]);
1435 salt[3] = swap32 (salt_bufs[salt_pos].salt_buf[3]);
1437 u32 salt_len = salt_bufs[salt_pos].salt_len;
1443 sha256_ctx_t sha256_ctx;
1445 sha256_init (&sha256_ctx);
1447 sha256_update (&sha256_ctx, pw, pw_len);
1448 sha256_update (&sha256_ctx, salt, salt_len);
1449 sha256_update (&sha256_ctx, pw, pw_len);
1451 sha256_final (&sha256_ctx);
1455 tmp[0] = sha256_ctx.state[0];
1456 tmp[1] = sha256_ctx.state[1];
1457 tmp[2] = sha256_ctx.state[2];
1458 tmp[3] = sha256_ctx.state[3];
1459 tmp[4] = sha256_ctx.state[4];
1460 tmp[5] = sha256_ctx.state[5];
1461 tmp[6] = sha256_ctx.state[6];
1462 tmp[7] = sha256_ctx.state[7];
1464 sha256_init (&sha256_ctx);
1466 sha256_update (&sha256_ctx, pw, pw_len);
1467 sha256_update (&sha256_ctx, salt, salt_len);
1468 sha256_update (&sha256_ctx, tmp, pw_len);
1470 for (u32 j = pw_len; j; j >>= 1)
1474 sha256_update (&sha256_ctx, tmp, 32);
1478 sha256_update (&sha256_ctx, pw, pw_len);
1482 sha256_final (&sha256_ctx);
1484 tmps[gid].alt_result[0] = sha256_ctx.state[0];
1485 tmps[gid].alt_result[1] = sha256_ctx.state[1];
1486 tmps[gid].alt_result[2] = sha256_ctx.state[2];
1487 tmps[gid].alt_result[3] = sha256_ctx.state[3];
1488 tmps[gid].alt_result[4] = sha256_ctx.state[4];
1489 tmps[gid].alt_result[5] = sha256_ctx.state[5];
1490 tmps[gid].alt_result[6] = sha256_ctx.state[6];
1491 tmps[gid].alt_result[7] = sha256_ctx.state[7];
1495 sha256_init (&sha256_ctx);
1497 for (u32 j = 0; j < pw_len; j++)
1499 sha256_update (&sha256_ctx, pw, pw_len);
1502 sha256_final (&sha256_ctx);
1504 tmps[gid].p_bytes[0] = sha256_ctx.state[0];
1505 tmps[gid].p_bytes[1] = sha256_ctx.state[1];
1506 tmps[gid].p_bytes[2] = sha256_ctx.state[2];
1507 tmps[gid].p_bytes[3] = sha256_ctx.state[3];
1511 sha256_init (&sha256_ctx);
1513 for (u32 j = 0; j < 16 + ((tmps[gid].alt_result[0] >> 24) & 0xff); j++)
1515 sha256_update (&sha256_ctx, salt, salt_len);
1518 sha256_final (&sha256_ctx);
1520 tmps[gid].s_bytes[0] = sha256_ctx.state[0];
1521 tmps[gid].s_bytes[1] = sha256_ctx.state[1];
1522 tmps[gid].s_bytes[2] = sha256_ctx.state[2];
1523 tmps[gid].s_bytes[3] = sha256_ctx.state[3];
1526 __kernel void m07400_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1532 const u32 gid = get_global_id (0);
1534 if (gid >= gid_max) return;
1538 p_bytes0[0] = tmps[gid].p_bytes[0];
1539 p_bytes0[1] = tmps[gid].p_bytes[1];
1540 p_bytes0[2] = tmps[gid].p_bytes[2];
1541 p_bytes0[3] = tmps[gid].p_bytes[3];
1543 const u32 pw_len = pws[gid].pw_len;
1547 s_bytes0[0] = tmps[gid].s_bytes[0];
1548 s_bytes0[1] = tmps[gid].s_bytes[1];
1549 s_bytes0[2] = tmps[gid].s_bytes[2];
1550 s_bytes0[3] = tmps[gid].s_bytes[3];
1552 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1556 wpc_len[0] = 32 + 0 + 0 + pw_len;
1557 wpc_len[1] = pw_len + 0 + 0 + 32;
1558 wpc_len[2] = 32 + salt_len + 0 + pw_len;
1559 wpc_len[3] = pw_len + salt_len + 0 + 32;
1560 wpc_len[4] = 32 + 0 + pw_len + pw_len;
1561 wpc_len[5] = pw_len + 0 + pw_len + 32;
1562 wpc_len[6] = 32 + salt_len + pw_len + pw_len;
1563 wpc_len[7] = pw_len + salt_len + pw_len + 32;
1565 u32 wpc[8][32] = { { 0 } };
1567 for (u32 i = 0; i < 8; i++)
1573 for (u32 j = 0; j < pw_len; j++)
1575 PUTCHAR32_BE (wpc[i], block_len++, GETCHAR32_BE (p_bytes0, j));
1585 for (u32 j = 0; j < salt_len; j++)
1587 PUTCHAR32_BE (wpc[i], block_len++, GETCHAR32_BE (s_bytes0, j));
1593 for (u32 j = 0; j < pw_len; j++)
1595 PUTCHAR32_BE (wpc[i], block_len++, GETCHAR32_BE (p_bytes0, j));
1605 for (u32 j = 0; j < pw_len; j++)
1607 PUTCHAR32_BE (wpc[i], block_len++, GETCHAR32_BE (p_bytes0, j));
1611 PUTCHAR32_BE (wpc[i], block_len, 0x80);
1615 wpc[i][15] = block_len * 8;
1619 wpc[i][31] = block_len * 8;
1629 alt_result[0] = tmps[gid].alt_result[0];
1630 alt_result[1] = tmps[gid].alt_result[1];
1631 alt_result[2] = tmps[gid].alt_result[2];
1632 alt_result[3] = tmps[gid].alt_result[3];
1633 alt_result[4] = tmps[gid].alt_result[4];
1634 alt_result[5] = tmps[gid].alt_result[5];
1635 alt_result[6] = tmps[gid].alt_result[6];
1636 alt_result[7] = tmps[gid].alt_result[7];
1639 /* Repeatedly run the collected hash value through SHA256 to burn
1642 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1644 const u32 j1 = (j & 1) ? 1 : 0;
1645 const u32 j3 = (j % 3) ? 2 : 0;
1646 const u32 j7 = (j % 7) ? 4 : 0;
1648 const u32 pc = j1 + j3 + j7;
1652 block[ 0] = wpc[pc][ 0];
1653 block[ 1] = wpc[pc][ 1];
1654 block[ 2] = wpc[pc][ 2];
1655 block[ 3] = wpc[pc][ 3];
1656 block[ 4] = wpc[pc][ 4];
1657 block[ 5] = wpc[pc][ 5];
1658 block[ 6] = wpc[pc][ 6];
1659 block[ 7] = wpc[pc][ 7];
1660 block[ 8] = wpc[pc][ 8];
1661 block[ 9] = wpc[pc][ 9];
1662 block[10] = wpc[pc][10];
1663 block[11] = wpc[pc][11];
1664 block[12] = wpc[pc][12];
1665 block[13] = wpc[pc][13];
1666 block[14] = wpc[pc][14];
1667 block[15] = wpc[pc][15];
1668 block[16] = wpc[pc][16];
1669 block[17] = wpc[pc][17];
1670 block[18] = wpc[pc][18];
1671 block[19] = wpc[pc][19];
1672 block[20] = wpc[pc][20];
1673 block[21] = wpc[pc][21];
1674 block[22] = wpc[pc][22];
1675 block[23] = wpc[pc][23];
1676 block[24] = wpc[pc][24];
1677 block[25] = wpc[pc][25];
1678 block[26] = wpc[pc][26];
1679 block[27] = wpc[pc][27];
1680 block[28] = wpc[pc][28];
1681 block[29] = wpc[pc][29];
1682 block[30] = wpc[pc][30];
1683 block[31] = wpc[pc][31];
1685 const u32 block_len = wpc_len[pc];
1692 for (u32 k = 0, p = block_len - 32; k < 32; k++, p++)
1694 PUTCHAR32_BE (block, p, GETCHAR32_BE (alt_result, k));
1699 block[0] = alt_result[0];
1700 block[1] = alt_result[1];
1701 block[2] = alt_result[2];
1702 block[3] = alt_result[3];
1703 block[4] = alt_result[4];
1704 block[5] = alt_result[5];
1705 block[6] = alt_result[6];
1706 block[7] = alt_result[7];
1709 alt_result[0] = SHA256M_A;
1710 alt_result[1] = SHA256M_B;
1711 alt_result[2] = SHA256M_C;
1712 alt_result[3] = SHA256M_D;
1713 alt_result[4] = SHA256M_E;
1714 alt_result[5] = SHA256M_F;
1715 alt_result[6] = SHA256M_G;
1716 alt_result[7] = SHA256M_H;
1718 sha256_transform (block, alt_result);
1720 if (block_len >= 56)
1722 sha256_transform (block + 16, alt_result);
1726 tmps[gid].alt_result[0] = alt_result[0];
1727 tmps[gid].alt_result[1] = alt_result[1];
1728 tmps[gid].alt_result[2] = alt_result[2];
1729 tmps[gid].alt_result[3] = alt_result[3];
1730 tmps[gid].alt_result[4] = alt_result[4];
1731 tmps[gid].alt_result[5] = alt_result[5];
1732 tmps[gid].alt_result[6] = alt_result[6];
1733 tmps[gid].alt_result[7] = alt_result[7];
1736 __kernel void m07400_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1742 const u32 gid = get_global_id (0);
1744 if (gid >= gid_max) return;
1746 const u32 lid = get_local_id (0);
1748 const u32 r0 = swap32 (tmps[gid].alt_result[0]);
1749 const u32 r1 = swap32 (tmps[gid].alt_result[1]);
1750 const u32 r2 = swap32 (tmps[gid].alt_result[2]);
1751 const u32 r3 = swap32 (tmps[gid].alt_result[3]);