2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
12 #include "include/constants.h"
13 #include "include/kernel_vendor.h"
20 #include "include/kernel_functions.c"
22 #include "OpenCL/types_ocl.c"
23 #include "OpenCL/common.c"
24 #include "OpenCL/simd.c"
27 #define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
29 #define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
31 #define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
33 #define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
35 #define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
38 static void m04700m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256])
44 const u32 gid = get_global_id (0);
45 const u32 lid = get_local_id (0);
53 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
55 const u32x w0r = ix_create_bft (bfs_buf, il_pos);
57 const u32x w0lr = w0l | w0r;
96 MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
97 MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
98 MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
99 MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
100 MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
101 MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
102 MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
103 MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
104 MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
105 MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
106 MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
107 MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
108 MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
109 MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
110 MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
111 MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
113 MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
114 MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
115 MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
116 MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
117 MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
118 MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
119 MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
120 MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
121 MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
122 MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
123 MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
124 MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
125 MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
126 MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
127 MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
128 MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
130 MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
131 MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
132 MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
133 MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
134 MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
135 MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
136 MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
137 MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
138 MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
139 MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
140 MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
141 MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
142 MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
143 MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
144 MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
145 MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
147 MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
148 MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
149 MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
150 MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
151 MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
152 MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
153 MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
154 MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
155 MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
156 MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
157 MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
158 MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
159 MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
160 MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
161 MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
162 MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
173 w0_t[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
174 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
175 w0_t[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
176 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
177 w0_t[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
178 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
179 w0_t[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
180 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
181 w1_t[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
182 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
183 w1_t[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
184 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
185 w1_t[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
186 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
187 w1_t[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
188 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
190 w2_t[0] = 0x80000000;
211 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]);
212 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]);
213 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]);
214 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]);
215 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]);
216 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]);
217 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]);
218 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]);
219 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]);
220 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]);
221 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]);
222 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]);
223 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]);
224 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]);
225 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]);
226 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]);
227 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]);
228 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]);
229 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]);
230 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]);
235 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]);
236 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]);
237 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]);
238 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]);
239 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]);
240 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]);
241 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]);
242 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]);
243 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]);
244 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]);
245 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]);
246 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]);
247 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]);
248 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]);
249 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]);
250 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]);
251 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]);
252 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]);
253 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]);
254 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]);
259 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]);
260 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]);
261 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]);
262 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]);
263 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]);
264 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]);
265 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]);
266 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]);
267 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]);
268 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]);
269 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]);
270 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]);
271 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]);
272 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]);
273 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]);
274 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]);
275 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]);
276 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]);
277 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]);
278 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]);
283 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]);
284 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]);
285 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]);
286 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]);
287 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]);
288 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]);
289 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]);
290 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]);
291 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]);
292 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]);
293 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]);
294 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]);
295 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]);
296 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]);
297 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]);
298 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
299 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
300 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
301 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
302 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
304 COMPARE_M_SIMD (d, e, c, b);
308 static void m04700s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256])
314 const u32 gid = get_global_id (0);
315 const u32 lid = get_local_id (0);
321 const u32 search[4] =
323 digests_buf[digests_offset].digest_buf[DGST_R0],
324 digests_buf[digests_offset].digest_buf[DGST_R1],
325 digests_buf[digests_offset].digest_buf[DGST_R2],
326 digests_buf[digests_offset].digest_buf[DGST_R3]
333 const u32 e_rev = rotl32_S (search[1], 2u);
341 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
343 const u32x w0r = ix_create_bft (bfs_buf, il_pos);
345 const u32x w0lr = w0l | w0r;
384 MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
385 MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
386 MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
387 MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
388 MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
389 MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
390 MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
391 MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
392 MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
393 MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
394 MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
395 MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
396 MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
397 MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
398 MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
399 MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
401 MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
402 MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
403 MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
404 MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
405 MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
406 MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
407 MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
408 MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
409 MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
410 MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
411 MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
412 MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
413 MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
414 MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
415 MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
416 MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
418 MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
419 MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
420 MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
421 MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
422 MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
423 MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
424 MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
425 MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
426 MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
427 MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
428 MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
429 MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
430 MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
431 MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
432 MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
433 MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
435 MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
436 MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
437 MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
438 MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
439 MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
440 MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
441 MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
442 MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
443 MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
444 MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
445 MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
446 MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
447 MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
448 MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
449 MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
450 MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
461 w0_t[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
462 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
463 w0_t[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
464 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
465 w0_t[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
466 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
467 w0_t[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
468 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
469 w1_t[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
470 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
471 w1_t[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
472 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
473 w1_t[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
474 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
475 w1_t[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
476 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
478 w2_t[0] = 0x80000000;
499 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]);
500 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]);
501 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]);
502 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]);
503 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]);
504 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]);
505 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]);
506 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]);
507 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]);
508 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]);
509 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]);
510 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]);
511 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]);
512 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]);
513 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]);
514 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]);
515 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]);
516 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]);
517 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]);
518 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]);
523 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]);
524 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]);
525 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]);
526 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]);
527 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]);
528 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]);
529 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]);
530 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]);
531 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]);
532 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]);
533 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]);
534 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]);
535 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]);
536 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]);
537 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]);
538 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]);
539 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]);
540 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]);
541 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]);
542 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]);
547 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]);
548 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]);
549 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]);
550 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]);
551 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]);
552 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]);
553 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]);
554 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]);
555 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]);
556 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]);
557 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]);
558 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]);
559 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]);
560 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]);
561 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]);
562 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]);
563 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]);
564 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]);
565 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]);
566 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]);
571 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]);
572 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]);
573 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]);
574 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]);
575 w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]);
576 w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]);
577 w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]);
578 w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]);
579 w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]);
580 w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]);
581 w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]);
582 w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]);
583 w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]);
584 w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]);
585 w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]);
586 w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
588 if (MATCHES_NONE_VS (e, e_rev)) continue;
590 w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
591 w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
592 w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
593 w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
595 COMPARE_S_SIMD (d, e, c, b);
600 __kernel void m04700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
606 const u32 gid = get_global_id (0);
607 const u32 lid = get_local_id (0);
608 const u32 lsz = get_local_size (0);
614 __local u32 l_bin2asc[256];
616 for (u32 i = lid; i < 256; i += lsz)
618 const u32 i0 = (i >> 0) & 15;
619 const u32 i1 = (i >> 4) & 15;
621 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
622 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
625 barrier (CLK_LOCAL_MEM_FENCE);
627 if (gid >= gid_max) return;
635 w0[0] = pws[gid].i[ 0];
636 w0[1] = pws[gid].i[ 1];
637 w0[2] = pws[gid].i[ 2];
638 w0[3] = pws[gid].i[ 3];
658 w3[2] = pws[gid].i[14];
661 const u32 pw_len = pws[gid].pw_len;
667 m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, l_bin2asc);
670 __kernel void m04700_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
676 const u32 gid = get_global_id (0);
677 const u32 lid = get_local_id (0);
678 const u32 lsz = get_local_size (0);
684 __local u32 l_bin2asc[256];
686 for (u32 i = lid; i < 256; i += lsz)
688 const u32 i0 = (i >> 0) & 15;
689 const u32 i1 = (i >> 4) & 15;
691 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
692 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
695 barrier (CLK_LOCAL_MEM_FENCE);
697 if (gid >= gid_max) return;
705 w0[0] = pws[gid].i[ 0];
706 w0[1] = pws[gid].i[ 1];
707 w0[2] = pws[gid].i[ 2];
708 w0[3] = pws[gid].i[ 3];
712 w1[0] = pws[gid].i[ 4];
713 w1[1] = pws[gid].i[ 5];
714 w1[2] = pws[gid].i[ 6];
715 w1[3] = pws[gid].i[ 7];
728 w3[2] = pws[gid].i[14];
731 const u32 pw_len = pws[gid].pw_len;
737 m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, l_bin2asc);
740 __kernel void m04700_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
746 const u32 gid = get_global_id (0);
747 const u32 lid = get_local_id (0);
748 const u32 lsz = get_local_size (0);
754 __local u32 l_bin2asc[256];
756 for (u32 i = lid; i < 256; i += lsz)
758 const u32 i0 = (i >> 0) & 15;
759 const u32 i1 = (i >> 4) & 15;
761 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
762 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
765 barrier (CLK_LOCAL_MEM_FENCE);
767 if (gid >= gid_max) return;
775 w0[0] = pws[gid].i[ 0];
776 w0[1] = pws[gid].i[ 1];
777 w0[2] = pws[gid].i[ 2];
778 w0[3] = pws[gid].i[ 3];
782 w1[0] = pws[gid].i[ 4];
783 w1[1] = pws[gid].i[ 5];
784 w1[2] = pws[gid].i[ 6];
785 w1[3] = pws[gid].i[ 7];
789 w2[0] = pws[gid].i[ 8];
790 w2[1] = pws[gid].i[ 9];
791 w2[2] = pws[gid].i[10];
792 w2[3] = pws[gid].i[11];
796 w3[0] = pws[gid].i[12];
797 w3[1] = pws[gid].i[13];
798 w3[2] = pws[gid].i[14];
799 w3[3] = pws[gid].i[15];
801 const u32 pw_len = pws[gid].pw_len;
807 m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, l_bin2asc);
810 __kernel void m04700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
816 const u32 gid = get_global_id (0);
817 const u32 lid = get_local_id (0);
818 const u32 lsz = get_local_size (0);
824 __local u32 l_bin2asc[256];
826 for (u32 i = lid; i < 256; i += lsz)
828 const u32 i0 = (i >> 0) & 15;
829 const u32 i1 = (i >> 4) & 15;
831 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
832 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
835 barrier (CLK_LOCAL_MEM_FENCE);
837 if (gid >= gid_max) return;
845 w0[0] = pws[gid].i[ 0];
846 w0[1] = pws[gid].i[ 1];
847 w0[2] = pws[gid].i[ 2];
848 w0[3] = pws[gid].i[ 3];
868 w3[2] = pws[gid].i[14];
871 const u32 pw_len = pws[gid].pw_len;
877 m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, l_bin2asc);
880 __kernel void m04700_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
886 const u32 gid = get_global_id (0);
887 const u32 lid = get_local_id (0);
888 const u32 lsz = get_local_size (0);
894 __local u32 l_bin2asc[256];
896 for (u32 i = lid; i < 256; i += lsz)
898 const u32 i0 = (i >> 0) & 15;
899 const u32 i1 = (i >> 4) & 15;
901 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
902 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
905 barrier (CLK_LOCAL_MEM_FENCE);
907 if (gid >= gid_max) return;
915 w0[0] = pws[gid].i[ 0];
916 w0[1] = pws[gid].i[ 1];
917 w0[2] = pws[gid].i[ 2];
918 w0[3] = pws[gid].i[ 3];
922 w1[0] = pws[gid].i[ 4];
923 w1[1] = pws[gid].i[ 5];
924 w1[2] = pws[gid].i[ 6];
925 w1[3] = pws[gid].i[ 7];
938 w3[2] = pws[gid].i[14];
941 const u32 pw_len = pws[gid].pw_len;
947 m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, l_bin2asc);
950 __kernel void m04700_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
956 const u32 gid = get_global_id (0);
957 const u32 lid = get_local_id (0);
958 const u32 lsz = get_local_size (0);
964 __local u32 l_bin2asc[256];
966 for (u32 i = lid; i < 256; i += lsz)
968 const u32 i0 = (i >> 0) & 15;
969 const u32 i1 = (i >> 4) & 15;
971 l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
972 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
975 barrier (CLK_LOCAL_MEM_FENCE);
977 if (gid >= gid_max) return;
985 w0[0] = pws[gid].i[ 0];
986 w0[1] = pws[gid].i[ 1];
987 w0[2] = pws[gid].i[ 2];
988 w0[3] = pws[gid].i[ 3];
992 w1[0] = pws[gid].i[ 4];
993 w1[1] = pws[gid].i[ 5];
994 w1[2] = pws[gid].i[ 6];
995 w1[3] = pws[gid].i[ 7];
999 w2[0] = pws[gid].i[ 8];
1000 w2[1] = pws[gid].i[ 9];
1001 w2[2] = pws[gid].i[10];
1002 w2[3] = pws[gid].i[11];
1006 w3[0] = pws[gid].i[12];
1007 w3[1] = pws[gid].i[13];
1008 w3[2] = pws[gid].i[14];
1009 w3[3] = pws[gid].i[15];
1011 const u32 pw_len = pws[gid].pw_len;
1017 m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, l_bin2asc);