2 * Author......: Jens Steube <jens.steube@gmail.com>
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
25 #include "include/kernel_functions.c"
27 #include "common_nv.c"
30 #define VECT_COMPARE_S "check_single_vect1_comp4_warp.c"
31 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c"
35 #define VECT_COMPARE_S "check_single_vect2_comp4_warp.c"
36 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c"
40 #define VECT_COMPARE_S "check_single_vect4_comp4_warp.c"
41 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c"
44 #define MD5_STEP_REV(f,a,b,c,d,x,t,s) \
53 #define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \
61 __device__ __constant__ u32x c_bfs[1024];
63 __device__ static void m00030m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
69 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
70 const u32 lid = threadIdx.x;
78 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
79 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
80 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
81 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
85 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
86 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
87 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
88 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
92 salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
104 switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
106 w[ 0] |= salt_buf0[0];
107 w[ 1] |= salt_buf0[1];
108 w[ 2] |= salt_buf0[2];
109 w[ 3] |= salt_buf0[3];
110 w[ 4] |= salt_buf1[0];
111 w[ 5] |= salt_buf1[1];
112 w[ 6] |= salt_buf1[2];
113 w[ 7] |= salt_buf1[3];
114 w[ 8] |= salt_buf2[0];
115 w[ 9] |= salt_buf2[1];
116 w[10] |= salt_buf2[2];
117 w[11] |= salt_buf2[3];
118 w[12] |= salt_buf3[0];
119 w[13] |= salt_buf3[1];
120 w[14] |= salt_buf3[2];
121 w[15] |= salt_buf3[3];
123 const u32 salt_len = salt_bufs[salt_pos].salt_len;
125 const u32 pw_salt_len = pw_len + salt_len;
127 w[14] = pw_salt_len * 8;
133 const u32 F_w0c00 = 0 + MD5C00;
134 const u32 F_w1c01 = w[ 1] + MD5C01;
135 const u32 F_w2c02 = w[ 2] + MD5C02;
136 const u32 F_w3c03 = w[ 3] + MD5C03;
137 const u32 F_w4c04 = w[ 4] + MD5C04;
138 const u32 F_w5c05 = w[ 5] + MD5C05;
139 const u32 F_w6c06 = w[ 6] + MD5C06;
140 const u32 F_w7c07 = w[ 7] + MD5C07;
141 const u32 F_w8c08 = w[ 8] + MD5C08;
142 const u32 F_w9c09 = w[ 9] + MD5C09;
143 const u32 F_wac0a = w[10] + MD5C0a;
144 const u32 F_wbc0b = w[11] + MD5C0b;
145 const u32 F_wcc0c = w[12] + MD5C0c;
146 const u32 F_wdc0d = w[13] + MD5C0d;
147 const u32 F_wec0e = w[14] + MD5C0e;
148 const u32 F_wfc0f = w[15] + MD5C0f;
150 const u32 G_w1c10 = w[ 1] + MD5C10;
151 const u32 G_w6c11 = w[ 6] + MD5C11;
152 const u32 G_wbc12 = w[11] + MD5C12;
153 const u32 G_w0c13 = 0 + MD5C13;
154 const u32 G_w5c14 = w[ 5] + MD5C14;
155 const u32 G_wac15 = w[10] + MD5C15;
156 const u32 G_wfc16 = w[15] + MD5C16;
157 const u32 G_w4c17 = w[ 4] + MD5C17;
158 const u32 G_w9c18 = w[ 9] + MD5C18;
159 const u32 G_wec19 = w[14] + MD5C19;
160 const u32 G_w3c1a = w[ 3] + MD5C1a;
161 const u32 G_w8c1b = w[ 8] + MD5C1b;
162 const u32 G_wdc1c = w[13] + MD5C1c;
163 const u32 G_w2c1d = w[ 2] + MD5C1d;
164 const u32 G_w7c1e = w[ 7] + MD5C1e;
165 const u32 G_wcc1f = w[12] + MD5C1f;
167 const u32 H_w5c20 = w[ 5] + MD5C20;
168 const u32 H_w8c21 = w[ 8] + MD5C21;
169 const u32 H_wbc22 = w[11] + MD5C22;
170 const u32 H_wec23 = w[14] + MD5C23;
171 const u32 H_w1c24 = w[ 1] + MD5C24;
172 const u32 H_w4c25 = w[ 4] + MD5C25;
173 const u32 H_w7c26 = w[ 7] + MD5C26;
174 const u32 H_wac27 = w[10] + MD5C27;
175 const u32 H_wdc28 = w[13] + MD5C28;
176 const u32 H_w0c29 = 0 + MD5C29;
177 const u32 H_w3c2a = w[ 3] + MD5C2a;
178 const u32 H_w6c2b = w[ 6] + MD5C2b;
179 const u32 H_w9c2c = w[ 9] + MD5C2c;
180 const u32 H_wcc2d = w[12] + MD5C2d;
181 const u32 H_wfc2e = w[15] + MD5C2e;
182 const u32 H_w2c2f = w[ 2] + MD5C2f;
184 const u32 I_w0c30 = 0 + MD5C30;
185 const u32 I_w7c31 = w[ 7] + MD5C31;
186 const u32 I_wec32 = w[14] + MD5C32;
187 const u32 I_w5c33 = w[ 5] + MD5C33;
188 const u32 I_wcc34 = w[12] + MD5C34;
189 const u32 I_w3c35 = w[ 3] + MD5C35;
190 const u32 I_wac36 = w[10] + MD5C36;
191 const u32 I_w1c37 = w[ 1] + MD5C37;
192 const u32 I_w8c38 = w[ 8] + MD5C38;
193 const u32 I_wfc39 = w[15] + MD5C39;
194 const u32 I_w6c3a = w[ 6] + MD5C3a;
195 const u32 I_wdc3b = w[13] + MD5C3b;
196 const u32 I_w4c3c = w[ 4] + MD5C3c;
197 const u32 I_wbc3d = w[11] + MD5C3d;
198 const u32 I_w2c3e = w[ 2] + MD5C3e;
199 const u32 I_w9c3f = w[ 9] + MD5C3f;
205 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
209 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
211 const u32x w0r = c_bfs[il_pos];
213 const u32x w0 = w0l | w0r;
220 MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00);
221 MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01);
222 MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02);
223 MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03);
224 MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00);
225 MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01);
226 MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02);
227 MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03);
228 MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00);
229 MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01);
230 MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02);
231 MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03);
232 MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00);
233 MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01);
234 MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02);
235 MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03);
237 MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10);
238 MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11);
239 MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12);
240 MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13);
241 MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10);
242 MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11);
243 MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12);
244 MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13);
245 MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10);
246 MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11);
247 MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12);
248 MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13);
249 MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10);
250 MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11);
251 MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12);
252 MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13);
254 MD5_STEP0(MD5_H , a, b, c, d, H_w5c20, MD5S20);
255 MD5_STEP0(MD5_H , d, a, b, c, H_w8c21, MD5S21);
256 MD5_STEP0(MD5_H , c, d, a, b, H_wbc22, MD5S22);
257 MD5_STEP0(MD5_H , b, c, d, a, H_wec23, MD5S23);
258 MD5_STEP0(MD5_H , a, b, c, d, H_w1c24, MD5S20);
259 MD5_STEP0(MD5_H , d, a, b, c, H_w4c25, MD5S21);
260 MD5_STEP0(MD5_H , c, d, a, b, H_w7c26, MD5S22);
261 MD5_STEP0(MD5_H , b, c, d, a, H_wac27, MD5S23);
262 MD5_STEP0(MD5_H , a, b, c, d, H_wdc28, MD5S20);
263 MD5_STEP (MD5_H , d, a, b, c, w0, H_w0c29, MD5S21);
264 MD5_STEP0(MD5_H , c, d, a, b, H_w3c2a, MD5S22);
265 MD5_STEP0(MD5_H , b, c, d, a, H_w6c2b, MD5S23);
266 MD5_STEP0(MD5_H , a, b, c, d, H_w9c2c, MD5S20);
267 MD5_STEP0(MD5_H , d, a, b, c, H_wcc2d, MD5S21);
268 MD5_STEP0(MD5_H , c, d, a, b, H_wfc2e, MD5S22);
269 MD5_STEP0(MD5_H , b, c, d, a, H_w2c2f, MD5S23);
271 MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30);
272 MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31);
273 MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32);
274 MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33);
275 MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30);
276 MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31);
277 MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32);
278 MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33);
279 MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30);
280 MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31);
281 MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32);
282 MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33);
283 MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30);
284 MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31);
285 MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32);
286 MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33);
294 #include VECT_COMPARE_M
298 __device__ static void m00030s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
304 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
305 const u32 lid = threadIdx.x;
311 const u32 F_w0c00 = 0 + MD5C00;
312 const u32 F_w1c01 = w[ 1] + MD5C01;
313 const u32 F_w2c02 = w[ 2] + MD5C02;
314 const u32 F_w3c03 = w[ 3] + MD5C03;
315 const u32 F_w4c04 = w[ 4] + MD5C04;
316 const u32 F_w5c05 = w[ 5] + MD5C05;
317 const u32 F_w6c06 = w[ 6] + MD5C06;
318 const u32 F_w7c07 = w[ 7] + MD5C07;
319 const u32 F_w8c08 = w[ 8] + MD5C08;
320 const u32 F_w9c09 = w[ 9] + MD5C09;
321 const u32 F_wac0a = w[10] + MD5C0a;
322 const u32 F_wbc0b = w[11] + MD5C0b;
323 const u32 F_wcc0c = w[12] + MD5C0c;
324 const u32 F_wdc0d = w[13] + MD5C0d;
325 const u32 F_wec0e = w[14] + MD5C0e;
326 const u32 F_wfc0f = w[15] + MD5C0f;
328 const u32 G_w1c10 = w[ 1] + MD5C10;
329 const u32 G_w6c11 = w[ 6] + MD5C11;
330 const u32 G_wbc12 = w[11] + MD5C12;
331 const u32 G_w0c13 = 0 + MD5C13;
332 const u32 G_w5c14 = w[ 5] + MD5C14;
333 const u32 G_wac15 = w[10] + MD5C15;
334 const u32 G_wfc16 = w[15] + MD5C16;
335 const u32 G_w4c17 = w[ 4] + MD5C17;
336 const u32 G_w9c18 = w[ 9] + MD5C18;
337 const u32 G_wec19 = w[14] + MD5C19;
338 const u32 G_w3c1a = w[ 3] + MD5C1a;
339 const u32 G_w8c1b = w[ 8] + MD5C1b;
340 const u32 G_wdc1c = w[13] + MD5C1c;
341 const u32 G_w2c1d = w[ 2] + MD5C1d;
342 const u32 G_w7c1e = w[ 7] + MD5C1e;
343 const u32 G_wcc1f = w[12] + MD5C1f;
345 const u32 H_w5c20 = w[ 5] + MD5C20;
346 const u32 H_w8c21 = w[ 8] + MD5C21;
347 const u32 H_wbc22 = w[11] + MD5C22;
348 const u32 H_wec23 = w[14] + MD5C23;
349 const u32 H_w1c24 = w[ 1] + MD5C24;
350 const u32 H_w4c25 = w[ 4] + MD5C25;
351 const u32 H_w7c26 = w[ 7] + MD5C26;
352 const u32 H_wac27 = w[10] + MD5C27;
353 const u32 H_wdc28 = w[13] + MD5C28;
354 const u32 H_w0c29 = 0 + MD5C29;
355 const u32 H_w3c2a = w[ 3] + MD5C2a;
356 const u32 H_w6c2b = w[ 6] + MD5C2b;
357 const u32 H_w9c2c = w[ 9] + MD5C2c;
358 const u32 H_wcc2d = w[12] + MD5C2d;
359 const u32 H_wfc2e = w[15] + MD5C2e;
360 const u32 H_w2c2f = w[ 2] + MD5C2f;
362 const u32 I_w0c30 = 0 + MD5C30;
363 const u32 I_w7c31 = w[ 7] + MD5C31;
364 const u32 I_wec32 = w[14] + MD5C32;
365 const u32 I_w5c33 = w[ 5] + MD5C33;
366 const u32 I_wcc34 = w[12] + MD5C34;
367 const u32 I_w3c35 = w[ 3] + MD5C35;
368 const u32 I_wac36 = w[10] + MD5C36;
369 const u32 I_w1c37 = w[ 1] + MD5C37;
370 const u32 I_w8c38 = w[ 8] + MD5C38;
371 const u32 I_wfc39 = w[15] + MD5C39;
372 const u32 I_w6c3a = w[ 6] + MD5C3a;
373 const u32 I_wdc3b = w[13] + MD5C3b;
374 const u32 I_w4c3c = w[ 4] + MD5C3c;
375 const u32 I_wbc3d = w[11] + MD5C3d;
376 const u32 I_w2c3e = w[ 2] + MD5C3e;
377 const u32 I_w9c3f = w[ 9] + MD5C3f;
383 const u32 search[4] =
385 digests_buf[digests_offset].digest_buf[DGST_R0],
386 digests_buf[digests_offset].digest_buf[DGST_R1],
387 digests_buf[digests_offset].digest_buf[DGST_R2],
388 digests_buf[digests_offset].digest_buf[DGST_R3]
395 u32 a_rev = digests_buf[digests_offset].digest_buf[0];
396 u32 b_rev = digests_buf[digests_offset].digest_buf[1];
397 u32 c_rev = digests_buf[digests_offset].digest_buf[2];
398 u32 d_rev = digests_buf[digests_offset].digest_buf[3];
400 MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33);
401 MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32);
402 MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31);
403 MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30);
404 MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33);
405 MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32);
406 MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31);
407 MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30);
408 MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33);
409 MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32);
410 MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31);
411 MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30);
412 MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33);
413 MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32);
414 MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31);
415 MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30);
417 const u32x pre_cd = c_rev ^ d_rev;
419 MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23);
420 MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22);
426 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
430 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
432 const u32x w0r = c_bfs[il_pos];
434 const u32x w0 = w0l | w0r;
436 const u32x pre_d = d_rev;
437 const u32x pre_a = a_rev - w0;
438 const u32x pre_b = b_rev - (pre_a ^ pre_cd);
439 const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d);
446 MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00);
447 MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01);
448 MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02);
449 MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03);
450 MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00);
451 MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01);
452 MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02);
453 MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03);
454 MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00);
455 MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01);
456 MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02);
457 MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03);
458 MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00);
459 MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01);
460 MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02);
461 MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03);
463 MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10);
464 MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11);
465 MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12);
466 MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13);
467 MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10);
468 MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11);
469 MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12);
470 MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13);
471 MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10);
472 MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11);
473 MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12);
474 MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13);
475 MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10);
476 MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11);
477 MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12);
478 MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13);
480 MD5_STEP0(MD5_H , a, b, c, d, H_w5c20, MD5S20);
481 MD5_STEP0(MD5_H , d, a, b, c, H_w8c21, MD5S21);
482 MD5_STEP0(MD5_H , c, d, a, b, H_wbc22, MD5S22);
483 MD5_STEP0(MD5_H , b, c, d, a, H_wec23, MD5S23);
484 MD5_STEP0(MD5_H , a, b, c, d, H_w1c24, MD5S20);
485 MD5_STEP0(MD5_H , d, a, b, c, H_w4c25, MD5S21);
486 MD5_STEP0(MD5_H , c, d, a, b, H_w7c26, MD5S22);
487 MD5_STEP0(MD5_H , b, c, d, a, H_wac27, MD5S23);
488 MD5_STEP0(MD5_H , a, b, c, d, H_wdc28, MD5S20);
489 MD5_STEP (MD5_H , d, a, b, c, w0, H_w0c29, MD5S21);
490 MD5_STEP0(MD5_H , c, d, a, b, H_w3c2a, MD5S22);
492 bool q_cond = (pre_c != c);
494 if (q_cond) continue;
496 MD5_STEP0(MD5_H , b, c, d, a, H_w6c2b, MD5S23);
497 MD5_STEP0(MD5_H , a, b, c, d, H_w9c2c, MD5S20);
498 MD5_STEP0(MD5_H , d, a, b, c, H_wcc2d, MD5S21);
499 MD5_STEP0(MD5_H , c, d, a, b, H_wfc2e, MD5S22);
500 MD5_STEP0(MD5_H , b, c, d, a, H_w2c2f, MD5S23);
502 MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30);
503 MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31);
504 MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32);
505 MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33);
506 MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30);
507 MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31);
508 MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32);
509 MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33);
510 MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30);
511 MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31);
512 MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32);
513 MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33);
514 MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30);
515 MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31);
516 MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32);
517 MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33);
525 #include VECT_COMPARE_S
529 extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
535 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
537 if (gid >= gid_max) return;
541 w[ 0] = pws[gid].i[ 0];
542 w[ 1] = pws[gid].i[ 1];
543 w[ 2] = pws[gid].i[ 2];
544 w[ 3] = pws[gid].i[ 3];
555 w[14] = pws[gid].i[14];
558 const u32 pw_len = pws[gid].pw_len;
564 m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
567 extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
573 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
575 if (gid >= gid_max) return;
579 w[ 0] = pws[gid].i[ 0];
580 w[ 1] = pws[gid].i[ 1];
581 w[ 2] = pws[gid].i[ 2];
582 w[ 3] = pws[gid].i[ 3];
583 w[ 4] = pws[gid].i[ 4];
584 w[ 5] = pws[gid].i[ 5];
585 w[ 6] = pws[gid].i[ 6];
586 w[ 7] = pws[gid].i[ 7];
593 w[14] = pws[gid].i[14];
596 const u32 pw_len = pws[gid].pw_len;
602 m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
605 extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
611 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
613 if (gid >= gid_max) return;
617 w[ 0] = pws[gid].i[ 0];
618 w[ 1] = pws[gid].i[ 1];
619 w[ 2] = pws[gid].i[ 2];
620 w[ 3] = pws[gid].i[ 3];
621 w[ 4] = pws[gid].i[ 4];
622 w[ 5] = pws[gid].i[ 5];
623 w[ 6] = pws[gid].i[ 6];
624 w[ 7] = pws[gid].i[ 7];
625 w[ 8] = pws[gid].i[ 8];
626 w[ 9] = pws[gid].i[ 9];
627 w[10] = pws[gid].i[10];
628 w[11] = pws[gid].i[11];
629 w[12] = pws[gid].i[12];
630 w[13] = pws[gid].i[13];
631 w[14] = pws[gid].i[14];
632 w[15] = pws[gid].i[15];
634 const u32 pw_len = pws[gid].pw_len;
640 m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
643 extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
649 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
651 if (gid >= gid_max) return;
655 w[ 0] = pws[gid].i[ 0];
656 w[ 1] = pws[gid].i[ 1];
657 w[ 2] = pws[gid].i[ 2];
658 w[ 3] = pws[gid].i[ 3];
669 w[14] = pws[gid].i[14];
672 const u32 pw_len = pws[gid].pw_len;
678 m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
681 extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
687 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
689 if (gid >= gid_max) return;
693 w[ 0] = pws[gid].i[ 0];
694 w[ 1] = pws[gid].i[ 1];
695 w[ 2] = pws[gid].i[ 2];
696 w[ 3] = pws[gid].i[ 3];
697 w[ 4] = pws[gid].i[ 4];
698 w[ 5] = pws[gid].i[ 5];
699 w[ 6] = pws[gid].i[ 6];
700 w[ 7] = pws[gid].i[ 7];
707 w[14] = pws[gid].i[14];
710 const u32 pw_len = pws[gid].pw_len;
716 m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
719 extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
725 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
727 if (gid >= gid_max) return;
731 w[ 0] = pws[gid].i[ 0];
732 w[ 1] = pws[gid].i[ 1];
733 w[ 2] = pws[gid].i[ 2];
734 w[ 3] = pws[gid].i[ 3];
735 w[ 4] = pws[gid].i[ 4];
736 w[ 5] = pws[gid].i[ 5];
737 w[ 6] = pws[gid].i[ 6];
738 w[ 7] = pws[gid].i[ 7];
739 w[ 8] = pws[gid].i[ 8];
740 w[ 9] = pws[gid].i[ 9];
741 w[10] = pws[gid].i[10];
742 w[11] = pws[gid].i[11];
743 w[12] = pws[gid].i[12];
744 w[13] = pws[gid].i[13];
745 w[14] = pws[gid].i[14];
746 w[15] = pws[gid].i[15];
748 const u32 pw_len = pws[gid].pw_len;
754 m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);