2 * Author......: Jens Steube <jens.steube@gmail.com>
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
25 #include "include/kernel_functions.c"
27 #include "common_nv.c"
30 #define VECT_COMPARE_S "check_single_vect1_comp4_warp.c"
31 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c"
35 #define VECT_COMPARE_S "check_single_vect2_comp4_warp.c"
36 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c"
40 #define VECT_COMPARE_S "check_single_vect4_comp4_warp.c"
41 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c"
44 #define MD5_STEP_REV(f,a,b,c,d,x,t,s) \
53 #define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \
61 __device__ __constant__ u32x c_bfs[1024];
63 __device__ static void m00010m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
69 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
70 const u32 lid = threadIdx.x;
78 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
79 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
80 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
81 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
85 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
86 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
87 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
88 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
92 salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
104 switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
106 w[ 0] |= salt_buf0[0];
107 w[ 1] |= salt_buf0[1];
108 w[ 2] |= salt_buf0[2];
109 w[ 3] |= salt_buf0[3];
110 w[ 4] |= salt_buf1[0];
111 w[ 5] |= salt_buf1[1];
112 w[ 6] |= salt_buf1[2];
113 w[ 7] |= salt_buf1[3];
114 w[ 8] |= salt_buf2[0];
115 w[ 9] |= salt_buf2[1];
116 w[10] |= salt_buf2[2];
117 w[11] |= salt_buf2[3];
118 w[12] |= salt_buf3[0];
119 w[13] |= salt_buf3[1];
120 w[14] |= salt_buf3[2];
121 w[15] |= salt_buf3[3];
123 const u32 salt_len = salt_bufs[salt_pos].salt_len;
125 const u32 pw_salt_len = pw_len + salt_len;
127 w[14] = pw_salt_len * 8;
133 const u32 F_w0c00 = 0 + MD5C00;
134 const u32 F_w1c01 = w[ 1] + MD5C01;
135 const u32 F_w2c02 = w[ 2] + MD5C02;
136 const u32 F_w3c03 = w[ 3] + MD5C03;
137 const u32 F_w4c04 = w[ 4] + MD5C04;
138 const u32 F_w5c05 = w[ 5] + MD5C05;
139 const u32 F_w6c06 = w[ 6] + MD5C06;
140 const u32 F_w7c07 = w[ 7] + MD5C07;
141 const u32 F_w8c08 = w[ 8] + MD5C08;
142 const u32 F_w9c09 = w[ 9] + MD5C09;
143 const u32 F_wac0a = w[10] + MD5C0a;
144 const u32 F_wbc0b = w[11] + MD5C0b;
145 const u32 F_wcc0c = w[12] + MD5C0c;
146 const u32 F_wdc0d = w[13] + MD5C0d;
147 const u32 F_wec0e = w[14] + MD5C0e;
148 const u32 F_wfc0f = w[15] + MD5C0f;
150 const u32 G_w1c10 = w[ 1] + MD5C10;
151 const u32 G_w6c11 = w[ 6] + MD5C11;
152 const u32 G_wbc12 = w[11] + MD5C12;
153 const u32 G_w0c13 = 0 + MD5C13;
154 const u32 G_w5c14 = w[ 5] + MD5C14;
155 const u32 G_wac15 = w[10] + MD5C15;
156 const u32 G_wfc16 = w[15] + MD5C16;
157 const u32 G_w4c17 = w[ 4] + MD5C17;
158 const u32 G_w9c18 = w[ 9] + MD5C18;
159 const u32 G_wec19 = w[14] + MD5C19;
160 const u32 G_w3c1a = w[ 3] + MD5C1a;
161 const u32 G_w8c1b = w[ 8] + MD5C1b;
162 const u32 G_wdc1c = w[13] + MD5C1c;
163 const u32 G_w2c1d = w[ 2] + MD5C1d;
164 const u32 G_w7c1e = w[ 7] + MD5C1e;
165 const u32 G_wcc1f = w[12] + MD5C1f;
167 const u32 H_w5c20 = w[ 5] + MD5C20;
168 const u32 H_w8c21 = w[ 8] + MD5C21;
169 const u32 H_wbc22 = w[11] + MD5C22;
170 const u32 H_wec23 = w[14] + MD5C23;
171 const u32 H_w1c24 = w[ 1] + MD5C24;
172 const u32 H_w4c25 = w[ 4] + MD5C25;
173 const u32 H_w7c26 = w[ 7] + MD5C26;
174 const u32 H_wac27 = w[10] + MD5C27;
175 const u32 H_wdc28 = w[13] + MD5C28;
176 const u32 H_w0c29 = 0 + MD5C29;
177 const u32 H_w3c2a = w[ 3] + MD5C2a;
178 const u32 H_w6c2b = w[ 6] + MD5C2b;
179 const u32 H_w9c2c = w[ 9] + MD5C2c;
180 const u32 H_wcc2d = w[12] + MD5C2d;
181 const u32 H_wfc2e = w[15] + MD5C2e;
182 const u32 H_w2c2f = w[ 2] + MD5C2f;
184 const u32 I_w0c30 = 0 + MD5C30;
185 const u32 I_w7c31 = w[ 7] + MD5C31;
186 const u32 I_wec32 = w[14] + MD5C32;
187 const u32 I_w5c33 = w[ 5] + MD5C33;
188 const u32 I_wcc34 = w[12] + MD5C34;
189 const u32 I_w3c35 = w[ 3] + MD5C35;
190 const u32 I_wac36 = w[10] + MD5C36;
191 const u32 I_w1c37 = w[ 1] + MD5C37;
192 const u32 I_w8c38 = w[ 8] + MD5C38;
193 const u32 I_wfc39 = w[15] + MD5C39;
194 const u32 I_w6c3a = w[ 6] + MD5C3a;
195 const u32 I_wdc3b = w[13] + MD5C3b;
196 const u32 I_w4c3c = w[ 4] + MD5C3c;
197 const u32 I_wbc3d = w[11] + MD5C3d;
198 const u32 I_w2c3e = w[ 2] + MD5C3e;
199 const u32 I_w9c3f = w[ 9] + MD5C3f;
205 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
209 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
211 const u32x w0r = c_bfs[il_pos];
213 const u32x w0 = w0l | w0r;
222 MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00);
223 MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01);
224 MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02);
225 MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03);
226 MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00);
227 MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01);
228 MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02);
229 MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03);
230 MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00);
231 MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01);
232 MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02);
233 MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03);
234 MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00);
235 MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01);
236 MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02);
237 MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03);
239 MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10);
240 MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11);
241 MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12);
242 MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13);
243 MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10);
244 MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11);
245 MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12);
246 MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13);
247 MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10);
248 MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11);
249 MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12);
250 MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13);
251 MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10);
252 MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11);
253 MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12);
254 MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13);
256 MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20);
257 MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21);
258 MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22);
259 MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23);
260 MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20);
261 MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21);
262 MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22);
263 MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23);
264 MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20);
265 MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21);
266 MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22);
267 MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23);
268 MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20);
269 MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21);
270 MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22);
271 MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23);
273 MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30);
274 MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31);
275 MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32);
276 MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33);
277 MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30);
278 MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31);
279 MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32);
280 MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33);
281 MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30);
282 MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31);
283 MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32);
284 MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33);
285 MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30);
286 MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31);
287 MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32);
288 MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33);
296 #include VECT_COMPARE_M
300 __device__ static void m00010s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
306 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
307 const u32 lid = threadIdx.x;
313 const u32 F_w0c00 = 0 + MD5C00;
314 const u32 F_w1c01 = w[ 1] + MD5C01;
315 const u32 F_w2c02 = w[ 2] + MD5C02;
316 const u32 F_w3c03 = w[ 3] + MD5C03;
317 const u32 F_w4c04 = w[ 4] + MD5C04;
318 const u32 F_w5c05 = w[ 5] + MD5C05;
319 const u32 F_w6c06 = w[ 6] + MD5C06;
320 const u32 F_w7c07 = w[ 7] + MD5C07;
321 const u32 F_w8c08 = w[ 8] + MD5C08;
322 const u32 F_w9c09 = w[ 9] + MD5C09;
323 const u32 F_wac0a = w[10] + MD5C0a;
324 const u32 F_wbc0b = w[11] + MD5C0b;
325 const u32 F_wcc0c = w[12] + MD5C0c;
326 const u32 F_wdc0d = w[13] + MD5C0d;
327 const u32 F_wec0e = w[14] + MD5C0e;
328 const u32 F_wfc0f = w[15] + MD5C0f;
330 const u32 G_w1c10 = w[ 1] + MD5C10;
331 const u32 G_w6c11 = w[ 6] + MD5C11;
332 const u32 G_wbc12 = w[11] + MD5C12;
333 const u32 G_w0c13 = 0 + MD5C13;
334 const u32 G_w5c14 = w[ 5] + MD5C14;
335 const u32 G_wac15 = w[10] + MD5C15;
336 const u32 G_wfc16 = w[15] + MD5C16;
337 const u32 G_w4c17 = w[ 4] + MD5C17;
338 const u32 G_w9c18 = w[ 9] + MD5C18;
339 const u32 G_wec19 = w[14] + MD5C19;
340 const u32 G_w3c1a = w[ 3] + MD5C1a;
341 const u32 G_w8c1b = w[ 8] + MD5C1b;
342 const u32 G_wdc1c = w[13] + MD5C1c;
343 const u32 G_w2c1d = w[ 2] + MD5C1d;
344 const u32 G_w7c1e = w[ 7] + MD5C1e;
345 const u32 G_wcc1f = w[12] + MD5C1f;
347 const u32 H_w5c20 = w[ 5] + MD5C20;
348 const u32 H_w8c21 = w[ 8] + MD5C21;
349 const u32 H_wbc22 = w[11] + MD5C22;
350 const u32 H_wec23 = w[14] + MD5C23;
351 const u32 H_w1c24 = w[ 1] + MD5C24;
352 const u32 H_w4c25 = w[ 4] + MD5C25;
353 const u32 H_w7c26 = w[ 7] + MD5C26;
354 const u32 H_wac27 = w[10] + MD5C27;
355 const u32 H_wdc28 = w[13] + MD5C28;
356 const u32 H_w0c29 = 0 + MD5C29;
357 const u32 H_w3c2a = w[ 3] + MD5C2a;
358 const u32 H_w6c2b = w[ 6] + MD5C2b;
359 const u32 H_w9c2c = w[ 9] + MD5C2c;
360 const u32 H_wcc2d = w[12] + MD5C2d;
361 const u32 H_wfc2e = w[15] + MD5C2e;
362 const u32 H_w2c2f = w[ 2] + MD5C2f;
364 const u32 I_w0c30 = 0 + MD5C30;
365 const u32 I_w7c31 = w[ 7] + MD5C31;
366 const u32 I_wec32 = w[14] + MD5C32;
367 const u32 I_w5c33 = w[ 5] + MD5C33;
368 const u32 I_wcc34 = w[12] + MD5C34;
369 const u32 I_w3c35 = w[ 3] + MD5C35;
370 const u32 I_wac36 = w[10] + MD5C36;
371 const u32 I_w1c37 = w[ 1] + MD5C37;
372 const u32 I_w8c38 = w[ 8] + MD5C38;
373 const u32 I_wfc39 = w[15] + MD5C39;
374 const u32 I_w6c3a = w[ 6] + MD5C3a;
375 const u32 I_wdc3b = w[13] + MD5C3b;
376 const u32 I_w4c3c = w[ 4] + MD5C3c;
377 const u32 I_wbc3d = w[11] + MD5C3d;
378 const u32 I_w2c3e = w[ 2] + MD5C3e;
379 const u32 I_w9c3f = w[ 9] + MD5C3f;
385 const u32 search[4] =
387 digests_buf[digests_offset].digest_buf[DGST_R0],
388 digests_buf[digests_offset].digest_buf[DGST_R1],
389 digests_buf[digests_offset].digest_buf[DGST_R2],
390 digests_buf[digests_offset].digest_buf[DGST_R3]
397 u32 a_rev = digests_buf[digests_offset].digest_buf[0];
398 u32 b_rev = digests_buf[digests_offset].digest_buf[1];
399 u32 c_rev = digests_buf[digests_offset].digest_buf[2];
400 u32 d_rev = digests_buf[digests_offset].digest_buf[3];
402 MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33);
403 MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32);
404 MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31);
405 MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30);
406 MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33);
407 MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32);
408 MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31);
409 MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30);
410 MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33);
411 MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32);
412 MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31);
413 MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30);
414 MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33);
415 MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32);
416 MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31);
417 MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30);
419 const u32x pre_cd = c_rev ^ d_rev;
421 MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23);
422 MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22);
428 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
432 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
434 const u32x w0r = c_bfs[il_pos];
436 const u32x w0 = w0l | w0r;
438 const u32x pre_d = d_rev;
439 const u32x pre_a = a_rev - w0;
440 const u32x pre_b = b_rev - (pre_a ^ pre_cd);
441 const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d);
450 MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00);
451 MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01);
452 MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02);
453 MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03);
454 MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00);
455 MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01);
456 MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02);
457 MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03);
458 MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00);
459 MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01);
460 MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02);
461 MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03);
462 MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00);
463 MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01);
464 MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02);
465 MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03);
467 MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10);
468 MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11);
469 MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12);
470 MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13);
471 MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10);
472 MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11);
473 MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12);
474 MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13);
475 MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10);
476 MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11);
477 MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12);
478 MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13);
479 MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10);
480 MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11);
481 MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12);
482 MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13);
484 MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20);
485 MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21);
486 MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22);
487 MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23);
488 MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20);
489 MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21);
490 MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22);
491 MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23);
492 MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20);
493 MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21);
494 MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22);
496 bool q_cond = (pre_c != c);
498 if (q_cond) continue;
500 MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23);
501 MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20);
502 MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21);
503 MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22);
504 MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23);
506 MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30);
507 MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31);
508 MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32);
509 MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33);
510 MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30);
511 MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31);
512 MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32);
513 MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33);
514 MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30);
515 MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31);
516 MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32);
517 MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33);
518 MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30);
519 MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31);
520 MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32);
521 MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33);
529 #include VECT_COMPARE_S
533 extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
539 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
541 if (gid >= gid_max) return;
545 w[ 0] = pws[gid].i[ 0];
546 w[ 1] = pws[gid].i[ 1];
547 w[ 2] = pws[gid].i[ 2];
548 w[ 3] = pws[gid].i[ 3];
559 w[14] = pws[gid].i[14];
562 const u32 pw_len = pws[gid].pw_len;
568 m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
571 extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
577 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
579 if (gid >= gid_max) return;
583 w[ 0] = pws[gid].i[ 0];
584 w[ 1] = pws[gid].i[ 1];
585 w[ 2] = pws[gid].i[ 2];
586 w[ 3] = pws[gid].i[ 3];
587 w[ 4] = pws[gid].i[ 4];
588 w[ 5] = pws[gid].i[ 5];
589 w[ 6] = pws[gid].i[ 6];
590 w[ 7] = pws[gid].i[ 7];
597 w[14] = pws[gid].i[14];
600 const u32 pw_len = pws[gid].pw_len;
606 m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
609 extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
615 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
617 if (gid >= gid_max) return;
621 w[ 0] = pws[gid].i[ 0];
622 w[ 1] = pws[gid].i[ 1];
623 w[ 2] = pws[gid].i[ 2];
624 w[ 3] = pws[gid].i[ 3];
625 w[ 4] = pws[gid].i[ 4];
626 w[ 5] = pws[gid].i[ 5];
627 w[ 6] = pws[gid].i[ 6];
628 w[ 7] = pws[gid].i[ 7];
629 w[ 8] = pws[gid].i[ 8];
630 w[ 9] = pws[gid].i[ 9];
631 w[10] = pws[gid].i[10];
632 w[11] = pws[gid].i[11];
633 w[12] = pws[gid].i[12];
634 w[13] = pws[gid].i[13];
635 w[14] = pws[gid].i[14];
636 w[15] = pws[gid].i[15];
638 const u32 pw_len = pws[gid].pw_len;
644 m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
647 extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
653 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
655 if (gid >= gid_max) return;
659 w[ 0] = pws[gid].i[ 0];
660 w[ 1] = pws[gid].i[ 1];
661 w[ 2] = pws[gid].i[ 2];
662 w[ 3] = pws[gid].i[ 3];
673 w[14] = pws[gid].i[14];
676 const u32 pw_len = pws[gid].pw_len;
682 m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
685 extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
691 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
693 if (gid >= gid_max) return;
697 w[ 0] = pws[gid].i[ 0];
698 w[ 1] = pws[gid].i[ 1];
699 w[ 2] = pws[gid].i[ 2];
700 w[ 3] = pws[gid].i[ 3];
701 w[ 4] = pws[gid].i[ 4];
702 w[ 5] = pws[gid].i[ 5];
703 w[ 6] = pws[gid].i[ 6];
704 w[ 7] = pws[gid].i[ 7];
711 w[14] = pws[gid].i[14];
714 const u32 pw_len = pws[gid].pw_len;
720 m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
723 extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
729 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
731 if (gid >= gid_max) return;
735 w[ 0] = pws[gid].i[ 0];
736 w[ 1] = pws[gid].i[ 1];
737 w[ 2] = pws[gid].i[ 2];
738 w[ 3] = pws[gid].i[ 3];
739 w[ 4] = pws[gid].i[ 4];
740 w[ 5] = pws[gid].i[ 5];
741 w[ 6] = pws[gid].i[ 6];
742 w[ 7] = pws[gid].i[ 7];
743 w[ 8] = pws[gid].i[ 8];
744 w[ 9] = pws[gid].i[ 9];
745 w[10] = pws[gid].i[10];
746 w[11] = pws[gid].i[11];
747 w[12] = pws[gid].i[12];
748 w[13] = pws[gid].i[13];
749 w[14] = pws[gid].i[14];
750 w[15] = pws[gid].i[15];
752 const u32 pw_len = pws[gid].pw_len;
758 m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);