2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "inc_hash_constants.h"
9 #include "inc_vendor.cl"
16 #include "inc_hash_functions.cl"
17 #include "inc_types.cl"
18 #include "inc_common.cl"
20 #define COMPARE_S "inc_comp_single.cl"
21 #define COMPARE_M "inc_comp_multi.cl"
23 __constant u32 k_sha256[64] =
25 SHA256C00, SHA256C01, SHA256C02, SHA256C03,
26 SHA256C04, SHA256C05, SHA256C06, SHA256C07,
27 SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
28 SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
29 SHA256C10, SHA256C11, SHA256C12, SHA256C13,
30 SHA256C14, SHA256C15, SHA256C16, SHA256C17,
31 SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
32 SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
33 SHA256C20, SHA256C21, SHA256C22, SHA256C23,
34 SHA256C24, SHA256C25, SHA256C26, SHA256C27,
35 SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
36 SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
37 SHA256C30, SHA256C31, SHA256C32, SHA256C33,
38 SHA256C34, SHA256C35, SHA256C36, SHA256C37,
39 SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
40 SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
45 void sha256_transform (const u32 w[16], u32 digest[8])
56 u32 w0_t = swap32 (w[ 0]);
57 u32 w1_t = swap32 (w[ 1]);
58 u32 w2_t = swap32 (w[ 2]);
59 u32 w3_t = swap32 (w[ 3]);
60 u32 w4_t = swap32 (w[ 4]);
61 u32 w5_t = swap32 (w[ 5]);
62 u32 w6_t = swap32 (w[ 6]);
63 u32 w7_t = swap32 (w[ 7]);
64 u32 w8_t = swap32 (w[ 8]);
65 u32 w9_t = swap32 (w[ 9]);
66 u32 wa_t = swap32 (w[10]);
67 u32 wb_t = swap32 (w[11]);
68 u32 wc_t = swap32 (w[12]);
69 u32 wd_t = swap32 (w[13]);
70 u32 we_t = swap32 (w[14]);
71 u32 wf_t = swap32 (w[15]);
73 #define ROUND_EXPAND() \
75 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
76 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
77 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
78 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
79 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
80 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
81 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
82 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
83 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
84 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
85 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
86 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
87 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
88 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
89 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
90 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
93 #define ROUND_STEP(i) \
95 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
96 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
97 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
98 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
99 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
100 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
101 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
102 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
103 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
104 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
105 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
106 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
107 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
108 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
109 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
110 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
118 for (int i = 16; i < 64; i += 16)
120 ROUND_EXPAND (); ROUND_STEP (i);
133 void sha256_transform_no14 (const u32 w[16], u32 digest[8])
154 sha256_transform (w_t, digest);
157 void init_ctx (u32 digest[8])
159 digest[0] = SHA256M_A;
160 digest[1] = SHA256M_B;
161 digest[2] = SHA256M_C;
162 digest[3] = SHA256M_D;
163 digest[4] = SHA256M_E;
164 digest[5] = SHA256M_F;
165 digest[6] = SHA256M_G;
166 digest[7] = SHA256M_H;
169 void bzero16 (u32 block[16])
189 void bswap8 (u32 block[16])
191 block[ 0] = swap32 (block[ 0]);
192 block[ 1] = swap32 (block[ 1]);
193 block[ 2] = swap32 (block[ 2]);
194 block[ 3] = swap32 (block[ 3]);
195 block[ 4] = swap32 (block[ 4]);
196 block[ 5] = swap32 (block[ 5]);
197 block[ 6] = swap32 (block[ 6]);
198 block[ 7] = swap32 (block[ 7]);
201 u32 memcat16 (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len)
203 const u32 mod = block_len & 3;
204 const u32 div = block_len / 4;
212 #if defined IS_AMD || defined IS_GENERIC
213 const int offset_minus_4 = 4 - block_len;
215 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
216 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
217 tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
218 tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
219 tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
232 const int offset_minus_4 = 4 - (block_len & 3);
234 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
236 tmp0 = __byte_perm ( 0, append[0], selector);
237 tmp1 = __byte_perm (append[0], append[1], selector);
238 tmp2 = __byte_perm (append[1], append[2], selector);
239 tmp3 = __byte_perm (append[2], append[3], selector);
240 tmp4 = __byte_perm (append[3], 0, selector);
245 case 0: block[ 0] |= tmp0;
251 case 1: block[ 1] |= tmp0;
257 case 2: block[ 2] |= tmp0;
263 case 3: block[ 3] |= tmp0;
269 case 4: block[ 4] |= tmp0;
275 case 5: block[ 5] |= tmp0;
281 case 6: block[ 6] |= tmp0;
287 case 7: block[ 7] |= tmp0;
293 case 8: block[ 8] |= tmp0;
299 case 9: block[ 9] |= tmp0;
305 case 10: block[10] |= tmp0;
311 case 11: block[11] |= tmp0;
317 case 12: block[12] |= tmp0;
322 case 13: block[13] |= tmp0;
326 case 14: block[14] |= tmp0;
329 case 15: block[15] |= tmp0;
333 u32 new_len = block_len + append_len;
338 u32 memcat16c (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len, u32 digest[8])
340 const u32 mod = block_len & 3;
341 const u32 div = block_len / 4;
349 #if defined IS_AMD || defined IS_GENERIC
350 const int offset_minus_4 = 4 - block_len;
352 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
353 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
354 tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
355 tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
356 tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
369 const int offset_minus_4 = 4 - (block_len & 3);
371 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
373 tmp0 = __byte_perm ( 0, append[0], selector);
374 tmp1 = __byte_perm (append[0], append[1], selector);
375 tmp2 = __byte_perm (append[1], append[2], selector);
376 tmp3 = __byte_perm (append[2], append[3], selector);
377 tmp4 = __byte_perm (append[3], 0, selector);
380 u32 carry[4] = { 0, 0, 0, 0 };
384 case 0: block[ 0] |= tmp0;
390 case 1: block[ 1] |= tmp0;
396 case 2: block[ 2] |= tmp0;
402 case 3: block[ 3] |= tmp0;
408 case 4: block[ 4] |= tmp0;
414 case 5: block[ 5] |= tmp0;
420 case 6: block[ 6] |= tmp0;
426 case 7: block[ 7] |= tmp0;
432 case 8: block[ 8] |= tmp0;
438 case 9: block[ 9] |= tmp0;
444 case 10: block[10] |= tmp0;
450 case 11: block[11] |= tmp0;
456 case 12: block[12] |= tmp0;
462 case 13: block[13] |= tmp0;
468 case 14: block[14] |= tmp0;
474 case 15: block[15] |= tmp0;
482 u32 new_len = block_len + append_len;
488 sha256_transform (block, digest);
501 u32 memcat20 (u32 block[20], const u32 block_len, const u32 append[4], const u32 append_len)
503 const u32 mod = block_len & 3;
504 const u32 div = block_len / 4;
512 #if defined IS_AMD || defined IS_GENERIC
513 const int offset_minus_4 = 4 - block_len;
515 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
516 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
517 tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
518 tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
519 tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
532 const int offset_minus_4 = 4 - (block_len & 3);
534 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
536 tmp0 = __byte_perm ( 0, append[0], selector);
537 tmp1 = __byte_perm (append[0], append[1], selector);
538 tmp2 = __byte_perm (append[1], append[2], selector);
539 tmp3 = __byte_perm (append[2], append[3], selector);
540 tmp4 = __byte_perm (append[3], 0, selector);
545 case 0: block[ 0] |= tmp0;
551 case 1: block[ 1] |= tmp0;
557 case 2: block[ 2] |= tmp0;
563 case 3: block[ 3] |= tmp0;
569 case 4: block[ 4] |= tmp0;
575 case 5: block[ 5] |= tmp0;
581 case 6: block[ 6] |= tmp0;
587 case 7: block[ 7] |= tmp0;
593 case 8: block[ 8] |= tmp0;
599 case 9: block[ 9] |= tmp0;
605 case 10: block[10] |= tmp0;
611 case 11: block[11] |= tmp0;
617 case 12: block[12] |= tmp0;
623 case 13: block[13] |= tmp0;
629 case 14: block[14] |= tmp0;
635 case 15: block[15] |= tmp0;
643 return block_len + append_len;
646 u32 memcat20_x80 (u32 block[20], const u32 block_len, const u32 append[4], const u32 append_len)
648 const u32 mod = block_len & 3;
649 const u32 div = block_len / 4;
657 #if defined IS_AMD || defined IS_GENERIC
658 const int offset_minus_4 = 4 - block_len;
660 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
661 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
662 tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
663 tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
664 tmp4 = amd_bytealign ( 0x80, append[3], offset_minus_4);
677 const int offset_minus_4 = 4 - (block_len & 3);
679 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
681 tmp0 = __byte_perm ( 0, append[0], selector);
682 tmp1 = __byte_perm (append[0], append[1], selector);
683 tmp2 = __byte_perm (append[1], append[2], selector);
684 tmp3 = __byte_perm (append[2], append[3], selector);
685 tmp4 = __byte_perm (append[3], 0x80, selector);
690 case 0: block[ 0] |= tmp0;
696 case 1: block[ 1] |= tmp0;
702 case 2: block[ 2] |= tmp0;
708 case 3: block[ 3] |= tmp0;
714 case 4: block[ 4] |= tmp0;
720 case 5: block[ 5] |= tmp0;
726 case 6: block[ 6] |= tmp0;
732 case 7: block[ 7] |= tmp0;
738 case 8: block[ 8] |= tmp0;
744 case 9: block[ 9] |= tmp0;
750 case 10: block[10] |= tmp0;
756 case 11: block[11] |= tmp0;
762 case 12: block[12] |= tmp0;
768 case 13: block[13] |= tmp0;
774 case 14: block[14] |= tmp0;
780 case 15: block[15] |= tmp0;
788 return block_len + append_len;
791 __kernel void m07400_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
797 const u32 gid = get_global_id (0);
799 if (gid >= gid_max) return;
803 w0[0] = pws[gid].i[0];
804 w0[1] = pws[gid].i[1];
805 w0[2] = pws[gid].i[2];
806 w0[3] = pws[gid].i[3];
808 const u32 pw_len = pws[gid].pw_len;
816 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
817 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
818 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
819 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
821 u32 salt_len = salt_bufs[salt_pos].salt_len;
827 u32 block_len; // never reaches > 64
828 u32 transform_len; // required for w[15] = len * 8
836 /* Prepare for the real work. */
844 block_len = memcat16 (block, block_len, w0, pw_len);
848 block_len = memcat16 (block, block_len, salt_buf, salt_len);
852 block_len = memcat16 (block, block_len, w0, pw_len);
854 append_0x80_1x16 (block, block_len);
856 block[15] = swap32 (block_len * 8);
858 init_ctx (alt_result);
860 sha256_transform (block, alt_result);
868 u32 alt_result_tmp[8];
870 alt_result_tmp[0] = alt_result[0];
871 alt_result_tmp[1] = alt_result[1];
872 alt_result_tmp[2] = alt_result[2];
873 alt_result_tmp[3] = alt_result[3];
874 alt_result_tmp[4] = 0;
875 alt_result_tmp[5] = 0;
876 alt_result_tmp[6] = 0;
877 alt_result_tmp[7] = 0;
879 truncate_block (alt_result_tmp, pw_len);
881 /* Add the key string. */
883 block_len = memcat16 (block, block_len, w0, pw_len);
885 /* The last part is the salt string. This must be at most 8
886 characters and it ends at the first `$' character (for
887 compatibility with existing implementations). */
889 block_len = memcat16 (block, block_len, salt_buf, salt_len);
891 /* Now get result of this (32 bytes) and add it to the other
894 block_len = memcat16 (block, block_len, alt_result_tmp, pw_len);
896 transform_len = block_len;
898 /* Take the binary representation of the length of the key and for every
899 1 add the alternate sum, for every 0 the key. */
901 alt_result_tmp[0] = alt_result[0];
902 alt_result_tmp[1] = alt_result[1];
903 alt_result_tmp[2] = alt_result[2];
904 alt_result_tmp[3] = alt_result[3];
905 alt_result_tmp[4] = alt_result[4];
906 alt_result_tmp[5] = alt_result[5];
907 alt_result_tmp[6] = alt_result[6];
908 alt_result_tmp[7] = alt_result[7];
910 init_ctx (alt_result);
912 for (u32 j = pw_len; j; j >>= 1)
916 block_len = memcat16c (block, block_len, &alt_result_tmp[0], 16, alt_result);
917 block_len = memcat16c (block, block_len, &alt_result_tmp[4], 16, alt_result);
923 block_len = memcat16c (block, block_len, w0, pw_len, alt_result);
925 transform_len += pw_len;
929 append_0x80_1x16 (block, block_len);
933 sha256_transform (block, alt_result);
938 block[15] = swap32 (transform_len * 8);
940 sha256_transform (block, alt_result);
944 tmps[gid].alt_result[0] = alt_result[0];
945 tmps[gid].alt_result[1] = alt_result[1];
946 tmps[gid].alt_result[2] = alt_result[2];
947 tmps[gid].alt_result[3] = alt_result[3];
948 tmps[gid].alt_result[4] = alt_result[4];
949 tmps[gid].alt_result[5] = alt_result[5];
950 tmps[gid].alt_result[6] = alt_result[6];
951 tmps[gid].alt_result[7] = alt_result[7];
953 /* Start computation of P byte sequence. */
961 /* For every character in the password add the entire password. */
965 for (u32 j = 0; j < pw_len; j++)
967 block_len = memcat16c (block, block_len, w0, pw_len, p_bytes);
969 transform_len += pw_len;
972 /* Finish the digest. */
974 append_0x80_1x16 (block, block_len);
978 sha256_transform (block, p_bytes);
983 block[15] = swap32 (transform_len * 8);
985 sha256_transform (block, p_bytes);
989 truncate_block (p_bytes, pw_len);
991 tmps[gid].p_bytes[0] = p_bytes[0];
992 tmps[gid].p_bytes[1] = p_bytes[1];
993 tmps[gid].p_bytes[2] = p_bytes[2];
994 tmps[gid].p_bytes[3] = p_bytes[3];
996 /* Start computation of S byte sequence. */
1004 /* For every character in the password add the entire password. */
1008 for (u32 j = 0; j < 16 + (alt_result[0] & 0xff); j++)
1010 block_len = memcat16c (block, block_len, salt_buf, salt_len, s_bytes);
1012 transform_len += salt_len;
1015 /* Finish the digest. */
1017 append_0x80_1x16 (block, block_len);
1019 if (block_len >= 56)
1021 sha256_transform (block, s_bytes);
1026 block[15] = swap32 (transform_len * 8);
1028 sha256_transform (block, s_bytes);
1032 truncate_block (s_bytes, salt_len);
1034 tmps[gid].s_bytes[0] = s_bytes[0];
1035 tmps[gid].s_bytes[1] = s_bytes[1];
1036 tmps[gid].s_bytes[2] = s_bytes[2];
1037 tmps[gid].s_bytes[3] = s_bytes[3];
1040 __kernel void m07400_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1046 const u32 gid = get_global_id (0);
1048 if (gid >= gid_max) return;
1050 const u32 pw_len = pws[gid].pw_len;
1058 p_bytes[0] = tmps[gid].p_bytes[0];
1059 p_bytes[1] = tmps[gid].p_bytes[1];
1060 p_bytes[2] = tmps[gid].p_bytes[2];
1061 p_bytes[3] = tmps[gid].p_bytes[3];
1065 p_bytes_x80[0] = tmps[gid].p_bytes[0];
1066 p_bytes_x80[1] = tmps[gid].p_bytes[1];
1067 p_bytes_x80[2] = tmps[gid].p_bytes[2];
1068 p_bytes_x80[3] = tmps[gid].p_bytes[3];
1070 append_0x80_1x4 (p_bytes_x80, pw_len);
1074 s_bytes[0] = tmps[gid].s_bytes[0];
1075 s_bytes[1] = tmps[gid].s_bytes[1];
1076 s_bytes[2] = tmps[gid].s_bytes[2];
1077 s_bytes[3] = tmps[gid].s_bytes[3];
1081 alt_result[0] = tmps[gid].alt_result[0];
1082 alt_result[1] = tmps[gid].alt_result[1];
1083 alt_result[2] = tmps[gid].alt_result[2];
1084 alt_result[3] = tmps[gid].alt_result[3];
1085 alt_result[4] = tmps[gid].alt_result[4];
1086 alt_result[5] = tmps[gid].alt_result[5];
1087 alt_result[6] = tmps[gid].alt_result[6];
1088 alt_result[7] = tmps[gid].alt_result[7];
1090 u32 salt_len = salt_bufs[salt_pos].salt_len;
1092 /* Repeatedly run the collected hash value through SHA256 to burn
1095 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1103 bzero16 (&block[ 0]);
1104 bzero16 (&block[16]);
1108 const u32 j1 = (j & 1) ? 1 : 0;
1109 const u32 j3 = (j % 3) ? 1 : 0;
1110 const u32 j7 = (j % 7) ? 1 : 0;
1114 block[0] = p_bytes[0];
1115 block[1] = p_bytes[1];
1116 block[2] = p_bytes[2];
1117 block[3] = p_bytes[3];
1123 block[0] = alt_result[0];
1124 block[1] = alt_result[1];
1125 block[2] = alt_result[2];
1126 block[3] = alt_result[3];
1127 block[4] = alt_result[4];
1128 block[5] = alt_result[5];
1129 block[6] = alt_result[6];
1130 block[7] = alt_result[7];
1137 block_len = memcat20 (block, block_len, s_bytes, salt_len);
1142 block_len = memcat20 (block, block_len, p_bytes, pw_len);
1147 block_len = memcat20 (block, block_len, &alt_result[0], 16);
1148 block_len = memcat20_x80 (block, block_len, &alt_result[4], 16);
1152 block_len = memcat20 (block, block_len, p_bytes_x80, pw_len);
1155 if (block_len >= 56)
1157 sha256_transform (block, tmp);
1159 block[ 0] = block[16];
1160 block[ 1] = block[17];
1161 block[ 2] = block[18];
1162 block[ 3] = block[19];
1177 block[15] = swap32 (block_len * 8);
1179 sha256_transform_no14 (block, tmp);
1183 alt_result[0] = tmp[0];
1184 alt_result[1] = tmp[1];
1185 alt_result[2] = tmp[2];
1186 alt_result[3] = tmp[3];
1187 alt_result[4] = tmp[4];
1188 alt_result[5] = tmp[5];
1189 alt_result[6] = tmp[6];
1190 alt_result[7] = tmp[7];
1193 tmps[gid].alt_result[0] = alt_result[0];
1194 tmps[gid].alt_result[1] = alt_result[1];
1195 tmps[gid].alt_result[2] = alt_result[2];
1196 tmps[gid].alt_result[3] = alt_result[3];
1197 tmps[gid].alt_result[4] = alt_result[4];
1198 tmps[gid].alt_result[5] = alt_result[5];
1199 tmps[gid].alt_result[6] = alt_result[6];
1200 tmps[gid].alt_result[7] = alt_result[7];
1203 __kernel void m07400_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1209 const u32 gid = get_global_id (0);
1211 if (gid >= gid_max) return;
1213 const u32 lid = get_local_id (0);
1215 const u32 r0 = tmps[gid].alt_result[0];
1216 const u32 r1 = tmps[gid].alt_result[1];
1217 const u32 r2 = tmps[gid].alt_result[2];
1218 const u32 r3 = tmps[gid].alt_result[3];
1227 // this is basically a much cleaner version, but apparently drops speeds by over 100% :(
1229 #define PUTCHAR32_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c)
1230 #define GETCHAR32_BE(a,p) ((u8 *)(a))[(p) ^ 3]
1240 void sha256_transform (const u32 w[16], u32 digest[8])
1268 #define ROUND_EXPAND() \
1270 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
1271 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
1272 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
1273 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
1274 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
1275 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
1276 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
1277 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
1278 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
1279 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
1280 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
1281 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
1282 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
1283 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
1284 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
1285 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
1288 #define ROUND_STEP(i) \
1290 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
1291 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
1292 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
1293 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
1294 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
1295 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
1296 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
1297 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
1298 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
1299 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
1300 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
1301 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
1302 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
1303 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
1304 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
1305 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
1313 for (int i = 16; i < 64; i += 16)
1315 ROUND_EXPAND (); ROUND_STEP (i);
1328 void sha256_init (sha256_ctx_t *sha256_ctx)
1330 sha256_ctx->state[0] = SHA256M_A;
1331 sha256_ctx->state[1] = SHA256M_B;
1332 sha256_ctx->state[2] = SHA256M_C;
1333 sha256_ctx->state[3] = SHA256M_D;
1334 sha256_ctx->state[4] = SHA256M_E;
1335 sha256_ctx->state[5] = SHA256M_F;
1336 sha256_ctx->state[6] = SHA256M_G;
1337 sha256_ctx->state[7] = SHA256M_H;
1339 sha256_ctx->len = 0;
1342 void sha256_update (sha256_ctx_t *sha256_ctx, const u32 *buf, int len)
1344 int pos = sha256_ctx->len & 0x3f;
1346 sha256_ctx->len += len;
1348 if ((pos + len) < 64)
1350 for (int i = 0; i < len; i++)
1352 PUTCHAR32_BE (sha256_ctx->buf, pos++, GETCHAR32_BE (buf, i));
1360 for (int i = 0; i < cnt; i++)
1362 PUTCHAR32_BE (sha256_ctx->buf, pos++, GETCHAR32_BE (buf, i));
1365 sha256_transform (sha256_ctx->buf, sha256_ctx->state);
1369 for (int i = 0; i < len; i++)
1371 PUTCHAR32_BE (sha256_ctx->buf, i, GETCHAR32_BE (buf, cnt + i));
1375 void sha256_final (sha256_ctx_t *sha256_ctx)
1377 int pos = sha256_ctx->len & 0x3f;
1379 for (int i = pos; i < 64; i++)
1381 PUTCHAR32_BE (sha256_ctx->buf, i, 0);
1384 PUTCHAR32_BE (sha256_ctx->buf, pos, 0x80);
1388 sha256_transform (sha256_ctx->buf, sha256_ctx->state);
1390 sha256_ctx->buf[ 0] = 0;
1391 sha256_ctx->buf[ 1] = 0;
1392 sha256_ctx->buf[ 2] = 0;
1393 sha256_ctx->buf[ 3] = 0;
1394 sha256_ctx->buf[ 4] = 0;
1395 sha256_ctx->buf[ 5] = 0;
1396 sha256_ctx->buf[ 6] = 0;
1397 sha256_ctx->buf[ 7] = 0;
1398 sha256_ctx->buf[ 8] = 0;
1399 sha256_ctx->buf[ 9] = 0;
1400 sha256_ctx->buf[10] = 0;
1401 sha256_ctx->buf[11] = 0;
1402 sha256_ctx->buf[12] = 0;
1403 sha256_ctx->buf[13] = 0;
1404 sha256_ctx->buf[14] = 0;
1405 sha256_ctx->buf[15] = 0;
1408 sha256_ctx->buf[15] = sha256_ctx->len * 8;
1410 sha256_transform (sha256_ctx->buf, sha256_ctx->state);
1413 __kernel void m07400_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1419 const u32 gid = get_global_id (0);
1421 if (gid >= gid_max) return;
1425 pw[0] = swap32 (pws[gid].i[0]);
1426 pw[1] = swap32 (pws[gid].i[1]);
1427 pw[2] = swap32 (pws[gid].i[2]);
1428 pw[3] = swap32 (pws[gid].i[3]);
1430 const u32 pw_len = pws[gid].pw_len;
1438 salt[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
1439 salt[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
1440 salt[2] = swap32 (salt_bufs[salt_pos].salt_buf[2]);
1441 salt[3] = swap32 (salt_bufs[salt_pos].salt_buf[3]);
1443 u32 salt_len = salt_bufs[salt_pos].salt_len;
1449 sha256_ctx_t sha256_ctx;
1451 sha256_init (&sha256_ctx);
1453 sha256_update (&sha256_ctx, pw, pw_len);
1454 sha256_update (&sha256_ctx, salt, salt_len);
1455 sha256_update (&sha256_ctx, pw, pw_len);
1457 sha256_final (&sha256_ctx);
1461 tmp[0] = sha256_ctx.state[0];
1462 tmp[1] = sha256_ctx.state[1];
1463 tmp[2] = sha256_ctx.state[2];
1464 tmp[3] = sha256_ctx.state[3];
1465 tmp[4] = sha256_ctx.state[4];
1466 tmp[5] = sha256_ctx.state[5];
1467 tmp[6] = sha256_ctx.state[6];
1468 tmp[7] = sha256_ctx.state[7];
1470 sha256_init (&sha256_ctx);
1472 sha256_update (&sha256_ctx, pw, pw_len);
1473 sha256_update (&sha256_ctx, salt, salt_len);
1474 sha256_update (&sha256_ctx, tmp, pw_len);
1476 for (u32 j = pw_len; j; j >>= 1)
1480 sha256_update (&sha256_ctx, tmp, 32);
1484 sha256_update (&sha256_ctx, pw, pw_len);
1488 sha256_final (&sha256_ctx);
1490 tmps[gid].alt_result[0] = sha256_ctx.state[0];
1491 tmps[gid].alt_result[1] = sha256_ctx.state[1];
1492 tmps[gid].alt_result[2] = sha256_ctx.state[2];
1493 tmps[gid].alt_result[3] = sha256_ctx.state[3];
1494 tmps[gid].alt_result[4] = sha256_ctx.state[4];
1495 tmps[gid].alt_result[5] = sha256_ctx.state[5];
1496 tmps[gid].alt_result[6] = sha256_ctx.state[6];
1497 tmps[gid].alt_result[7] = sha256_ctx.state[7];
1501 sha256_init (&sha256_ctx);
1503 for (u32 j = 0; j < pw_len; j++)
1505 sha256_update (&sha256_ctx, pw, pw_len);
1508 sha256_final (&sha256_ctx);
1510 tmps[gid].p_bytes[0] = sha256_ctx.state[0];
1511 tmps[gid].p_bytes[1] = sha256_ctx.state[1];
1512 tmps[gid].p_bytes[2] = sha256_ctx.state[2];
1513 tmps[gid].p_bytes[3] = sha256_ctx.state[3];
1517 sha256_init (&sha256_ctx);
1519 for (u32 j = 0; j < 16 + ((tmps[gid].alt_result[0] >> 24) & 0xff); j++)
1521 sha256_update (&sha256_ctx, salt, salt_len);
1524 sha256_final (&sha256_ctx);
1526 tmps[gid].s_bytes[0] = sha256_ctx.state[0];
1527 tmps[gid].s_bytes[1] = sha256_ctx.state[1];
1528 tmps[gid].s_bytes[2] = sha256_ctx.state[2];
1529 tmps[gid].s_bytes[3] = sha256_ctx.state[3];
1532 __kernel void m07400_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1538 const u32 gid = get_global_id (0);
1540 if (gid >= gid_max) return;
1544 p_bytes0[0] = tmps[gid].p_bytes[0];
1545 p_bytes0[1] = tmps[gid].p_bytes[1];
1546 p_bytes0[2] = tmps[gid].p_bytes[2];
1547 p_bytes0[3] = tmps[gid].p_bytes[3];
1549 const u32 pw_len = pws[gid].pw_len;
1553 s_bytes0[0] = tmps[gid].s_bytes[0];
1554 s_bytes0[1] = tmps[gid].s_bytes[1];
1555 s_bytes0[2] = tmps[gid].s_bytes[2];
1556 s_bytes0[3] = tmps[gid].s_bytes[3];
1558 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1562 wpc_len[0] = 32 + 0 + 0 + pw_len;
1563 wpc_len[1] = pw_len + 0 + 0 + 32;
1564 wpc_len[2] = 32 + salt_len + 0 + pw_len;
1565 wpc_len[3] = pw_len + salt_len + 0 + 32;
1566 wpc_len[4] = 32 + 0 + pw_len + pw_len;
1567 wpc_len[5] = pw_len + 0 + pw_len + 32;
1568 wpc_len[6] = 32 + salt_len + pw_len + pw_len;
1569 wpc_len[7] = pw_len + salt_len + pw_len + 32;
1571 u32 wpc[8][32] = { { 0 } };
1573 for (u32 i = 0; i < 8; i++)
1579 for (u32 j = 0; j < pw_len; j++)
1581 PUTCHAR32_BE (wpc[i], block_len++, GETCHAR32_BE (p_bytes0, j));
1591 for (u32 j = 0; j < salt_len; j++)
1593 PUTCHAR32_BE (wpc[i], block_len++, GETCHAR32_BE (s_bytes0, j));
1599 for (u32 j = 0; j < pw_len; j++)
1601 PUTCHAR32_BE (wpc[i], block_len++, GETCHAR32_BE (p_bytes0, j));
1611 for (u32 j = 0; j < pw_len; j++)
1613 PUTCHAR32_BE (wpc[i], block_len++, GETCHAR32_BE (p_bytes0, j));
1617 PUTCHAR32_BE (wpc[i], block_len, 0x80);
1621 wpc[i][15] = block_len * 8;
1625 wpc[i][31] = block_len * 8;
1635 alt_result[0] = tmps[gid].alt_result[0];
1636 alt_result[1] = tmps[gid].alt_result[1];
1637 alt_result[2] = tmps[gid].alt_result[2];
1638 alt_result[3] = tmps[gid].alt_result[3];
1639 alt_result[4] = tmps[gid].alt_result[4];
1640 alt_result[5] = tmps[gid].alt_result[5];
1641 alt_result[6] = tmps[gid].alt_result[6];
1642 alt_result[7] = tmps[gid].alt_result[7];
1645 /* Repeatedly run the collected hash value through SHA256 to burn
1648 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1650 const u32 j1 = (j & 1) ? 1 : 0;
1651 const u32 j3 = (j % 3) ? 2 : 0;
1652 const u32 j7 = (j % 7) ? 4 : 0;
1654 const u32 pc = j1 + j3 + j7;
1658 block[ 0] = wpc[pc][ 0];
1659 block[ 1] = wpc[pc][ 1];
1660 block[ 2] = wpc[pc][ 2];
1661 block[ 3] = wpc[pc][ 3];
1662 block[ 4] = wpc[pc][ 4];
1663 block[ 5] = wpc[pc][ 5];
1664 block[ 6] = wpc[pc][ 6];
1665 block[ 7] = wpc[pc][ 7];
1666 block[ 8] = wpc[pc][ 8];
1667 block[ 9] = wpc[pc][ 9];
1668 block[10] = wpc[pc][10];
1669 block[11] = wpc[pc][11];
1670 block[12] = wpc[pc][12];
1671 block[13] = wpc[pc][13];
1672 block[14] = wpc[pc][14];
1673 block[15] = wpc[pc][15];
1674 block[16] = wpc[pc][16];
1675 block[17] = wpc[pc][17];
1676 block[18] = wpc[pc][18];
1677 block[19] = wpc[pc][19];
1678 block[20] = wpc[pc][20];
1679 block[21] = wpc[pc][21];
1680 block[22] = wpc[pc][22];
1681 block[23] = wpc[pc][23];
1682 block[24] = wpc[pc][24];
1683 block[25] = wpc[pc][25];
1684 block[26] = wpc[pc][26];
1685 block[27] = wpc[pc][27];
1686 block[28] = wpc[pc][28];
1687 block[29] = wpc[pc][29];
1688 block[30] = wpc[pc][30];
1689 block[31] = wpc[pc][31];
1691 const u32 block_len = wpc_len[pc];
1698 for (u32 k = 0, p = block_len - 32; k < 32; k++, p++)
1700 PUTCHAR32_BE (block, p, GETCHAR32_BE (alt_result, k));
1705 block[0] = alt_result[0];
1706 block[1] = alt_result[1];
1707 block[2] = alt_result[2];
1708 block[3] = alt_result[3];
1709 block[4] = alt_result[4];
1710 block[5] = alt_result[5];
1711 block[6] = alt_result[6];
1712 block[7] = alt_result[7];
1715 alt_result[0] = SHA256M_A;
1716 alt_result[1] = SHA256M_B;
1717 alt_result[2] = SHA256M_C;
1718 alt_result[3] = SHA256M_D;
1719 alt_result[4] = SHA256M_E;
1720 alt_result[5] = SHA256M_F;
1721 alt_result[6] = SHA256M_G;
1722 alt_result[7] = SHA256M_H;
1724 sha256_transform (block, alt_result);
1726 if (block_len >= 56)
1728 sha256_transform (block + 16, alt_result);
1732 tmps[gid].alt_result[0] = alt_result[0];
1733 tmps[gid].alt_result[1] = alt_result[1];
1734 tmps[gid].alt_result[2] = alt_result[2];
1735 tmps[gid].alt_result[3] = alt_result[3];
1736 tmps[gid].alt_result[4] = alt_result[4];
1737 tmps[gid].alt_result[5] = alt_result[5];
1738 tmps[gid].alt_result[6] = alt_result[6];
1739 tmps[gid].alt_result[7] = alt_result[7];
1742 __kernel void m07400_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1748 const u32 gid = get_global_id (0);
1750 if (gid >= gid_max) return;
1752 const u32 lid = get_local_id (0);
1754 const u32 r0 = swap32 (tmps[gid].alt_result[0]);
1755 const u32 r1 = swap32 (tmps[gid].alt_result[1]);
1756 const u32 r2 = swap32 (tmps[gid].alt_result[2]);
1757 const u32 r3 = swap32 (tmps[gid].alt_result[3]);