2 * Author......: Jens Steube <jens.steube@gmail.com>
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
20 #include "OpenCL/types_ocl.c"
21 #include "OpenCL/common.c"
22 #include "OpenCL/simd.c"
25 #define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
27 #define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
29 #define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
31 #define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
34 static void m04700m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256])
40 const u32 gid = get_global_id (0);
41 const u32 lid = get_local_id (0);
49 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
51 const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
53 const u32x w0lr = w0l | w0r;
92 MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
93 MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
94 MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
95 MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
96 MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
97 MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
98 MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
99 MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
100 MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
101 MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
102 MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
103 MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
104 MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
105 MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
106 MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
107 MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
109 MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
110 MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
111 MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
112 MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
113 MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
114 MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
115 MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
116 MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
117 MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
118 MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
119 MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
120 MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
121 MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
122 MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
123 MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
124 MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
126 MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
127 MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
128 MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
129 MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
130 MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
131 MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
132 MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
133 MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
134 MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
135 MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
136 MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
137 MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
138 MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
139 MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
140 MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
141 MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
143 MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
144 MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
145 MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
146 MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
147 MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
148 MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
149 MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
150 MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
151 MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
152 MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
153 MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
154 MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
155 MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
156 MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
157 MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
158 MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
169 w0_t[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
170 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
171 w0_t[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
172 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
173 w0_t[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
174 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
175 w0_t[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
176 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
177 w1_t[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
178 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
179 w1_t[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
180 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
181 w1_t[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
182 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
183 w1_t[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
184 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
186 w2_t[0] = 0x80000000;
207 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]);
208 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]);
209 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]);
210 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]);
211 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]);
212 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]);
213 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]);
214 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]);
215 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]);
216 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]);
217 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]);
218 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]);
219 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]);
220 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]);
221 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]);
222 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]);
223 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]);
224 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]);
225 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]);
226 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]);
231 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]);
232 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]);
233 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]);
234 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]);
235 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]);
236 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]);
237 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]);
238 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]);
239 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]);
240 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]);
241 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]);
242 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]);
243 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]);
244 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]);
245 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]);
246 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]);
247 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]);
248 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]);
249 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]);
250 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]);
255 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]);
256 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]);
257 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]);
258 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]);
259 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]);
260 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]);
261 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]);
262 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]);
263 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]);
264 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]);
265 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]);
266 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]);
267 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]);
268 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]);
269 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]);
270 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]);
271 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]);
272 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]);
273 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]);
274 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]);
279 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]);
280 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]);
281 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]);
282 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]);
283 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]);
284 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]);
285 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]);
286 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]);
287 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]);
288 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]);
289 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]);
290 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]);
291 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]);
292 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]);
293 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]);
294 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
295 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
296 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
297 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
298 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
300 COMPARE_M_SIMD (d, e, c, b);
304 static void m04700s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256])
310 const u32 gid = get_global_id (0);
311 const u32 lid = get_local_id (0);
317 const u32 search[4] =
319 digests_buf[digests_offset].digest_buf[DGST_R0],
320 digests_buf[digests_offset].digest_buf[DGST_R1],
321 digests_buf[digests_offset].digest_buf[DGST_R2],
322 digests_buf[digests_offset].digest_buf[DGST_R3]
329 const u32 e_rev = rotl32_S (search[1], 2u);
337 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
339 const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
341 const u32x w0lr = w0l | w0r;
380 MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
381 MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
382 MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
383 MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
384 MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
385 MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
386 MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
387 MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
388 MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
389 MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
390 MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
391 MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
392 MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
393 MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
394 MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
395 MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
397 MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
398 MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
399 MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
400 MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
401 MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
402 MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
403 MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
404 MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
405 MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
406 MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
407 MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
408 MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
409 MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
410 MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
411 MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
412 MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
414 MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
415 MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
416 MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
417 MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
418 MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
419 MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
420 MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
421 MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
422 MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
423 MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
424 MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
425 MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
426 MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
427 MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
428 MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
429 MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
431 MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
432 MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
433 MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
434 MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
435 MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
436 MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
437 MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
438 MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
439 MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
440 MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
441 MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
442 MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
443 MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
444 MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
445 MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
446 MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
457 w0_t[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
458 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
459 w0_t[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
460 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
461 w0_t[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
462 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
463 w0_t[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
464 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
465 w1_t[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
466 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
467 w1_t[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
468 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
469 w1_t[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
470 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
471 w1_t[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
472 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
474 w2_t[0] = 0x80000000;
495 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]);
496 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]);
497 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]);
498 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]);
499 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]);
500 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]);
501 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]);
502 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]);
503 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]);
504 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]);
505 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]);
506 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]);
507 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]);
508 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]);
509 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]);
510 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]);
511 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]);
512 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]);
513 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]);
514 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]);
519 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]);
520 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]);
521 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]);
522 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]);
523 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]);
524 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]);
525 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]);
526 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]);
527 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]);
528 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]);
529 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]);
530 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]);
531 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]);
532 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]);
533 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]);
534 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]);
535 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]);
536 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]);
537 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]);
538 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]);
543 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]);
544 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]);
545 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]);
546 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]);
547 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]);
548 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]);
549 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]);
550 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]);
551 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]);
552 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]);
553 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]);
554 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]);
555 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]);
556 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]);
557 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]);
558 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]);
559 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]);
560 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]);
561 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]);
562 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]);
567 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]);
568 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]);
569 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]);
570 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]);
571 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]);
572 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]);
573 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]);
574 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]);
575 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]);
576 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]);
577 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]);
578 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]);
579 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]);
580 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]);
581 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]);
582 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
584 if (MATCHES_NONE_VS (e, e_rev)) continue;
586 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
587 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
588 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
589 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
591 COMPARE_S_SIMD (d, e, c, b);
596 __kernel void m04700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
602 const u32 gid = get_global_id (0);
603 const u32 lid = get_local_id (0);
604 const u32 lsz = get_local_size (0);
610 __local u32 l_bin2asc[256];
612 for (u32 i = lid; i < 256; i += lsz)
614 const u32 i0 = (i >> 0) & 15;
615 const u32 i1 = (i >> 4) & 15;
617 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
618 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
621 barrier (CLK_LOCAL_MEM_FENCE);
623 if (gid >= gid_max) return;
631 w0[0] = pws[gid].i[ 0];
632 w0[1] = pws[gid].i[ 1];
633 w0[2] = pws[gid].i[ 2];
634 w0[3] = pws[gid].i[ 3];
654 w3[2] = pws[gid].i[14];
657 const u32 pw_len = pws[gid].pw_len;
663 m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, l_bin2asc);
666 __kernel void m04700_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
672 const u32 gid = get_global_id (0);
673 const u32 lid = get_local_id (0);
674 const u32 lsz = get_local_size (0);
680 __local u32 l_bin2asc[256];
682 for (u32 i = lid; i < 256; i += lsz)
684 const u32 i0 = (i >> 0) & 15;
685 const u32 i1 = (i >> 4) & 15;
687 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
688 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
691 barrier (CLK_LOCAL_MEM_FENCE);
693 if (gid >= gid_max) return;
701 w0[0] = pws[gid].i[ 0];
702 w0[1] = pws[gid].i[ 1];
703 w0[2] = pws[gid].i[ 2];
704 w0[3] = pws[gid].i[ 3];
708 w1[0] = pws[gid].i[ 4];
709 w1[1] = pws[gid].i[ 5];
710 w1[2] = pws[gid].i[ 6];
711 w1[3] = pws[gid].i[ 7];
724 w3[2] = pws[gid].i[14];
727 const u32 pw_len = pws[gid].pw_len;
733 m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, l_bin2asc);
736 __kernel void m04700_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
742 const u32 gid = get_global_id (0);
743 const u32 lid = get_local_id (0);
744 const u32 lsz = get_local_size (0);
750 __local u32 l_bin2asc[256];
752 for (u32 i = lid; i < 256; i += lsz)
754 const u32 i0 = (i >> 0) & 15;
755 const u32 i1 = (i >> 4) & 15;
757 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
758 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
761 barrier (CLK_LOCAL_MEM_FENCE);
763 if (gid >= gid_max) return;
771 w0[0] = pws[gid].i[ 0];
772 w0[1] = pws[gid].i[ 1];
773 w0[2] = pws[gid].i[ 2];
774 w0[3] = pws[gid].i[ 3];
778 w1[0] = pws[gid].i[ 4];
779 w1[1] = pws[gid].i[ 5];
780 w1[2] = pws[gid].i[ 6];
781 w1[3] = pws[gid].i[ 7];
785 w2[0] = pws[gid].i[ 8];
786 w2[1] = pws[gid].i[ 9];
787 w2[2] = pws[gid].i[10];
788 w2[3] = pws[gid].i[11];
792 w3[0] = pws[gid].i[12];
793 w3[1] = pws[gid].i[13];
794 w3[2] = pws[gid].i[14];
795 w3[3] = pws[gid].i[15];
797 const u32 pw_len = pws[gid].pw_len;
803 m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, l_bin2asc);
806 __kernel void m04700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
812 const u32 gid = get_global_id (0);
813 const u32 lid = get_local_id (0);
814 const u32 lsz = get_local_size (0);
820 __local u32 l_bin2asc[256];
822 for (u32 i = lid; i < 256; i += lsz)
824 const u32 i0 = (i >> 0) & 15;
825 const u32 i1 = (i >> 4) & 15;
827 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
828 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
831 barrier (CLK_LOCAL_MEM_FENCE);
833 if (gid >= gid_max) return;
841 w0[0] = pws[gid].i[ 0];
842 w0[1] = pws[gid].i[ 1];
843 w0[2] = pws[gid].i[ 2];
844 w0[3] = pws[gid].i[ 3];
864 w3[2] = pws[gid].i[14];
867 const u32 pw_len = pws[gid].pw_len;
873 m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, l_bin2asc);
876 __kernel void m04700_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
882 const u32 gid = get_global_id (0);
883 const u32 lid = get_local_id (0);
884 const u32 lsz = get_local_size (0);
890 __local u32 l_bin2asc[256];
892 for (u32 i = lid; i < 256; i += lsz)
894 const u32 i0 = (i >> 0) & 15;
895 const u32 i1 = (i >> 4) & 15;
897 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
898 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
901 barrier (CLK_LOCAL_MEM_FENCE);
903 if (gid >= gid_max) return;
911 w0[0] = pws[gid].i[ 0];
912 w0[1] = pws[gid].i[ 1];
913 w0[2] = pws[gid].i[ 2];
914 w0[3] = pws[gid].i[ 3];
918 w1[0] = pws[gid].i[ 4];
919 w1[1] = pws[gid].i[ 5];
920 w1[2] = pws[gid].i[ 6];
921 w1[3] = pws[gid].i[ 7];
934 w3[2] = pws[gid].i[14];
937 const u32 pw_len = pws[gid].pw_len;
943 m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, l_bin2asc);
946 __kernel void m04700_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
952 const u32 gid = get_global_id (0);
953 const u32 lid = get_local_id (0);
954 const u32 lsz = get_local_size (0);
960 __local u32 l_bin2asc[256];
962 for (u32 i = lid; i < 256; i += lsz)
964 const u32 i0 = (i >> 0) & 15;
965 const u32 i1 = (i >> 4) & 15;
967 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
968 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
971 barrier (CLK_LOCAL_MEM_FENCE);
973 if (gid >= gid_max) return;
981 w0[0] = pws[gid].i[ 0];
982 w0[1] = pws[gid].i[ 1];
983 w0[2] = pws[gid].i[ 2];
984 w0[3] = pws[gid].i[ 3];
988 w1[0] = pws[gid].i[ 4];
989 w1[1] = pws[gid].i[ 5];
990 w1[2] = pws[gid].i[ 6];
991 w1[3] = pws[gid].i[ 7];
995 w2[0] = pws[gid].i[ 8];
996 w2[1] = pws[gid].i[ 9];
997 w2[2] = pws[gid].i[10];
998 w2[3] = pws[gid].i[11];
1002 w3[0] = pws[gid].i[12];
1003 w3[1] = pws[gid].i[13];
1004 w3[2] = pws[gid].i[14];
1005 w3[3] = pws[gid].i[15];
1007 const u32 pw_len = pws[gid].pw_len;
1013 m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, l_bin2asc);