2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
42 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
45 __device__ __constant__ gpu_rule_t c_rules[1024];
47 extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
53 const u32 lid = threadIdx.x;
59 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
61 if (gid >= gid_max) return;
65 pw_buf0[0] = pws[gid].i[ 0];
66 pw_buf0[1] = pws[gid].i[ 1];
67 pw_buf0[2] = pws[gid].i[ 2];
68 pw_buf0[3] = pws[gid].i[ 3];
72 pw_buf1[0] = pws[gid].i[ 4];
73 pw_buf1[1] = pws[gid].i[ 5];
74 pw_buf1[2] = pws[gid].i[ 6];
75 pw_buf1[3] = pws[gid].i[ 7];
77 const u32 pw_len = pws[gid].pw_len;
83 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
113 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
115 append_0x80_2 (w0, w1, out_len);
122 make_unicode (w0, w0_t, w1_t);
123 make_unicode (w1, w2_t, w3_t);
125 w3_t[2] = out_len * 8 * 2;
134 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
135 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
136 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
137 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
138 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
139 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
140 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
141 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
142 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
143 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
144 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
145 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
146 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
147 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
148 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
149 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
151 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
152 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
153 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
154 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
155 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
156 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
157 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
158 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
159 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
160 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
161 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
162 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
163 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
164 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
165 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
166 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
168 MD4_STEP (MD4_H1, a, b, c, d, w0_t[0], MD4C02, MD4S20);
169 MD4_STEP (MD4_H2, d, a, b, c, w2_t[0], MD4C02, MD4S21);
170 MD4_STEP (MD4_H1, c, d, a, b, w1_t[0], MD4C02, MD4S22);
171 MD4_STEP (MD4_H2, b, c, d, a, w3_t[0], MD4C02, MD4S23);
172 MD4_STEP (MD4_H1, a, b, c, d, w0_t[2], MD4C02, MD4S20);
173 MD4_STEP (MD4_H2, d, a, b, c, w2_t[2], MD4C02, MD4S21);
174 MD4_STEP (MD4_H1, c, d, a, b, w1_t[2], MD4C02, MD4S22);
175 MD4_STEP (MD4_H2, b, c, d, a, w3_t[2], MD4C02, MD4S23);
176 MD4_STEP (MD4_H1, a, b, c, d, w0_t[1], MD4C02, MD4S20);
177 MD4_STEP (MD4_H2, d, a, b, c, w2_t[1], MD4C02, MD4S21);
178 MD4_STEP (MD4_H1, c, d, a, b, w1_t[1], MD4C02, MD4S22);
179 MD4_STEP (MD4_H2, b, c, d, a, w3_t[1], MD4C02, MD4S23);
180 MD4_STEP (MD4_H1, a, b, c, d, w0_t[3], MD4C02, MD4S20);
181 MD4_STEP (MD4_H2, d, a, b, c, w2_t[3], MD4C02, MD4S21);
182 MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22);
183 MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23);
190 #include VECT_COMPARE_M
194 extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
198 extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
202 extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
208 const u32 lid = threadIdx.x;
214 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
216 if (gid >= gid_max) return;
220 pw_buf0[0] = pws[gid].i[ 0];
221 pw_buf0[1] = pws[gid].i[ 1];
222 pw_buf0[2] = pws[gid].i[ 2];
223 pw_buf0[3] = pws[gid].i[ 3];
227 pw_buf1[0] = pws[gid].i[ 4];
228 pw_buf1[1] = pws[gid].i[ 5];
229 pw_buf1[2] = pws[gid].i[ 6];
230 pw_buf1[3] = pws[gid].i[ 7];
232 const u32 pw_len = pws[gid].pw_len;
238 const u32 search[4] =
240 digests_buf[digests_offset].digest_buf[DGST_R0],
241 digests_buf[digests_offset].digest_buf[DGST_R1],
242 digests_buf[digests_offset].digest_buf[DGST_R2],
243 digests_buf[digests_offset].digest_buf[DGST_R3]
250 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
280 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
282 append_0x80_2 (w0, w1, out_len);
289 make_unicode (w0, w0_t, w1_t);
290 make_unicode (w1, w2_t, w3_t);
292 w3_t[2] = out_len * 8 * 2;
301 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
302 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
303 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
304 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
305 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
306 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
307 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
308 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
309 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
310 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
311 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
312 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
313 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
314 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
315 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
316 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
318 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
319 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
320 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
321 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
322 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
323 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
324 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
325 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
326 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
327 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
328 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
329 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
330 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
331 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
332 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
333 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
335 MD4_STEP (MD4_H1, a, b, c, d, w0_t[0], MD4C02, MD4S20);
336 MD4_STEP (MD4_H2, d, a, b, c, w2_t[0], MD4C02, MD4S21);
337 MD4_STEP (MD4_H1, c, d, a, b, w1_t[0], MD4C02, MD4S22);
338 MD4_STEP (MD4_H2, b, c, d, a, w3_t[0], MD4C02, MD4S23);
339 MD4_STEP (MD4_H1, a, b, c, d, w0_t[2], MD4C02, MD4S20);
340 MD4_STEP (MD4_H2, d, a, b, c, w2_t[2], MD4C02, MD4S21);
341 MD4_STEP (MD4_H1, c, d, a, b, w1_t[2], MD4C02, MD4S22);
342 MD4_STEP (MD4_H2, b, c, d, a, w3_t[2], MD4C02, MD4S23);
343 MD4_STEP (MD4_H1, a, b, c, d, w0_t[1], MD4C02, MD4S20);
344 MD4_STEP (MD4_H2, d, a, b, c, w2_t[1], MD4C02, MD4S21);
345 MD4_STEP (MD4_H1, c, d, a, b, w1_t[1], MD4C02, MD4S22);
346 MD4_STEP (MD4_H2, b, c, d, a, w3_t[1], MD4C02, MD4S23);
347 MD4_STEP (MD4_H1, a, b, c, d, w0_t[3], MD4C02, MD4S20);
348 MD4_STEP (MD4_H2, d, a, b, c, w2_t[3], MD4C02, MD4S21);
349 MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22);
350 MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23);
357 #include VECT_COMPARE_S
361 extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
365 extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)