2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
42 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
45 __device__ __constant__ gpu_rule_t c_rules[1024];
47 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
53 const u32 lid = threadIdx.x;
59 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
61 if (gid >= gid_max) return;
65 pw_buf0[0] = pws[gid].i[ 0];
66 pw_buf0[1] = pws[gid].i[ 1];
67 pw_buf0[2] = pws[gid].i[ 2];
68 pw_buf0[3] = pws[gid].i[ 3];
72 pw_buf1[0] = pws[gid].i[ 4];
73 pw_buf1[1] = pws[gid].i[ 5];
74 pw_buf1[2] = pws[gid].i[ 6];
75 pw_buf1[3] = pws[gid].i[ 7];
77 const u32 pw_len = pws[gid].pw_len;
83 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
113 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
115 append_0x80_2 (w0, w1, out_len);
124 MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00);
125 MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01);
126 MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02);
127 MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03);
128 MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00);
129 MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01);
130 MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02);
131 MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03);
132 MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00);
133 MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01);
134 MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02);
135 MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03);
136 MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00);
137 MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01);
138 MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02);
139 MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03);
141 MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10);
142 MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11);
143 MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12);
144 MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13);
145 MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10);
146 MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11);
147 MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12);
148 MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13);
149 MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10);
150 MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11);
151 MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12);
152 MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13);
153 MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10);
154 MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11);
155 MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12);
156 MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13);
158 MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20);
159 MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21);
160 MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22);
161 MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23);
162 MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20);
163 MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21);
164 MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22);
165 MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23);
166 MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20);
167 MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21);
168 MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22);
169 MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23);
170 MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20);
171 MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21);
172 MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
173 MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
180 #include VECT_COMPARE_M
184 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
188 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
192 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
198 const u32 lid = threadIdx.x;
204 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
206 if (gid >= gid_max) return;
210 pw_buf0[0] = pws[gid].i[ 0];
211 pw_buf0[1] = pws[gid].i[ 1];
212 pw_buf0[2] = pws[gid].i[ 2];
213 pw_buf0[3] = pws[gid].i[ 3];
217 pw_buf1[0] = pws[gid].i[ 4];
218 pw_buf1[1] = pws[gid].i[ 5];
219 pw_buf1[2] = pws[gid].i[ 6];
220 pw_buf1[3] = pws[gid].i[ 7];
222 const u32 pw_len = pws[gid].pw_len;
228 const u32 search[4] =
230 digests_buf[digests_offset].digest_buf[DGST_R0],
231 digests_buf[digests_offset].digest_buf[DGST_R1],
232 digests_buf[digests_offset].digest_buf[DGST_R2],
233 digests_buf[digests_offset].digest_buf[DGST_R3]
240 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
270 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
272 append_0x80_2 (w0, w1, out_len);
281 MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00);
282 MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01);
283 MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02);
284 MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03);
285 MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00);
286 MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01);
287 MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02);
288 MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03);
289 MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00);
290 MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01);
291 MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02);
292 MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03);
293 MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00);
294 MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01);
295 MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02);
296 MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03);
298 MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10);
299 MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11);
300 MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12);
301 MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13);
302 MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10);
303 MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11);
304 MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12);
305 MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13);
306 MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10);
307 MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11);
308 MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12);
309 MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13);
310 MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10);
311 MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11);
312 MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12);
313 MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13);
315 MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20);
316 MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21);
317 MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22);
318 MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23);
319 MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20);
320 MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21);
321 MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22);
322 MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23);
323 MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20);
324 MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21);
325 MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22);
326 MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23);
327 MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20);
328 MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21);
329 MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
330 MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
337 #include VECT_COMPARE_S
341 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
345 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)