2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
45 #define KECCAK_ROUNDS 24
48 #define Theta1(s) (st[0 + s] ^ st[5 + s] ^ st[10 + s] ^ st[15 + s] ^ st[20 + s])
61 u32 j = keccakf_piln[s]; \
62 u32 k = keccakf_rotc[s]; \
64 st[j] = rotl64 (t, k); \
75 st[0 + s] ^= ~bc1 & bc2; \
76 st[1 + s] ^= ~bc2 & bc3; \
77 st[2 + s] ^= ~bc3 & bc4; \
78 st[3 + s] ^= ~bc4 & bc0; \
79 st[4 + s] ^= ~bc0 & bc1; \
82 __device__ __constant__ gpu_rule_t c_rules[1024];
84 extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
90 const u32 lid = threadIdx.x;
96 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
98 if (gid >= gid_max) return;
104 const u64 keccakf_rndc[24] =
106 0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
107 0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
108 0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
109 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
110 0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
111 0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
112 0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
113 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
116 const u32 keccakf_rotc[24] =
118 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
119 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
122 const u32 keccakf_piln[24] =
124 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
125 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
130 pw_buf0[0] = pws[gid].i[ 0];
131 pw_buf0[1] = pws[gid].i[ 1];
132 pw_buf0[2] = pws[gid].i[ 2];
133 pw_buf0[3] = pws[gid].i[ 3];
137 pw_buf1[0] = pws[gid].i[ 4];
138 pw_buf1[1] = pws[gid].i[ 5];
139 pw_buf1[2] = pws[gid].i[ 6];
140 pw_buf1[3] = pws[gid].i[ 7];
142 const u32 pw_len = pws[gid].pw_len;
145 * 0x80 keccak, very special
148 const u32 mdlen = salt_bufs[salt_pos].keccak_mdlen;
150 const u32 rsiz = 200 - (2 * mdlen);
152 const u32 add80w = (rsiz - 1) / 8;
158 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
188 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
190 append_0x01_2 (w0, w1, out_len);
194 st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32;
195 st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32;
196 st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32;
197 st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32;
220 st[add80w] |= 0x8000000000000000;
224 for (round = 0; round < KECCAK_ROUNDS; round++)
228 u64x bc0 = Theta1 (0);
229 u64x bc1 = Theta1 (1);
230 u64x bc2 = Theta1 (2);
231 u64x bc3 = Theta1 (3);
232 u64x bc4 = Theta1 (4);
236 t = bc4 ^ rotl64 (bc1, 1); Theta2 (0);
237 t = bc0 ^ rotl64 (bc2, 1); Theta2 (1);
238 t = bc1 ^ rotl64 (bc3, 1); Theta2 (2);
239 t = bc2 ^ rotl64 (bc4, 1); Theta2 (3);
240 t = bc3 ^ rotl64 (bc0, 1); Theta2 (4);
281 st[0] ^= keccakf_rndc[round];
284 const u32x r0 = l32_from_64 (st[1]);
285 const u32x r1 = h32_from_64 (st[1]);
286 const u32x r2 = l32_from_64 (st[2]);
287 const u32x r3 = h32_from_64 (st[2]);
289 #include VECT_COMPARE_M
293 extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
297 extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
301 extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
307 const u32 lid = threadIdx.x;
313 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
315 if (gid >= gid_max) return;
321 const u32 search[4] =
323 digests_buf[digests_offset].digest_buf[DGST_R0],
324 digests_buf[digests_offset].digest_buf[DGST_R1],
325 digests_buf[digests_offset].digest_buf[DGST_R2],
326 digests_buf[digests_offset].digest_buf[DGST_R3]
333 const u64 keccakf_rndc[24] =
335 0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
336 0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
337 0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
338 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
339 0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
340 0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
341 0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
342 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
345 const u32 keccakf_rotc[24] =
347 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
348 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
351 const u32 keccakf_piln[24] =
353 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
354 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
359 pw_buf0[0] = pws[gid].i[ 0];
360 pw_buf0[1] = pws[gid].i[ 1];
361 pw_buf0[2] = pws[gid].i[ 2];
362 pw_buf0[3] = pws[gid].i[ 3];
366 pw_buf1[0] = pws[gid].i[ 4];
367 pw_buf1[1] = pws[gid].i[ 5];
368 pw_buf1[2] = pws[gid].i[ 6];
369 pw_buf1[3] = pws[gid].i[ 7];
371 const u32 pw_len = pws[gid].pw_len;
374 * 0x80 keccak, very special
377 const u32 mdlen = salt_bufs[salt_pos].keccak_mdlen;
379 const u32 rsiz = 200 - (2 * mdlen);
381 const u32 add80w = (rsiz - 1) / 8;
387 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
417 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
419 append_0x01_2 (w0, w1, out_len);
423 st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32;
424 st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32;
425 st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32;
426 st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32;
449 st[add80w] |= 0x8000000000000000;
453 for (round = 0; round < KECCAK_ROUNDS; round++)
457 u64x bc0 = Theta1 (0);
458 u64x bc1 = Theta1 (1);
459 u64x bc2 = Theta1 (2);
460 u64x bc3 = Theta1 (3);
461 u64x bc4 = Theta1 (4);
465 t = bc4 ^ rotl64 (bc1, 1); Theta2 (0);
466 t = bc0 ^ rotl64 (bc2, 1); Theta2 (1);
467 t = bc1 ^ rotl64 (bc3, 1); Theta2 (2);
468 t = bc2 ^ rotl64 (bc4, 1); Theta2 (3);
469 t = bc3 ^ rotl64 (bc0, 1); Theta2 (4);
510 st[0] ^= keccakf_rndc[round];
513 const u32x r0 = l32_from_64 (st[1]);
514 const u32x r1 = h32_from_64 (st[1]);
515 const u32x r2 = l32_from_64 (st[2]);
516 const u32x r3 = h32_from_64 (st[2]);
518 #include VECT_COMPARE_S
522 extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
526 extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)