2 * Author......: Jens Steube <jens.steube@gmail.com>
6 #include "include/kernel_vendor.h"
12 #include "types_amd.c"
14 static void generate_pw (u32 pw_buf[16], __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val)
33 __global cs_t *cs = &root_css_buf[pw_r_len];
38 for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++)
40 const u32 len = cs->cs_len;
42 const u64 next = val / len;
43 const u64 pos = val % len;
47 const u32 key = cs->cs_buf[pos];
49 const u32 jd4 = j / 4;
50 const u32 jm4 = j % 4;
52 pw_buf[jd4] |= key << (jm4 * 8);
54 cs = &markov_css_buf[(j * CHARSIZ) + key];
57 const u32 jd4 = j / 4;
58 const u32 jm4 = j % 4;
60 pw_buf[jd4] |= (0xff << (jm4 * 8)) & mask80;
62 if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8;
63 if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8;
66 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) l_markov (__global pw_t *pws_buf_l, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max)
68 const u32 gid = get_global_id (0);
70 if (gid >= gid_max) return;
74 generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid);
76 pws_buf_l[gid].i[ 0] = pw_buf[ 0];
77 pws_buf_l[gid].i[ 1] = pw_buf[ 1];
78 pws_buf_l[gid].i[ 2] = pw_buf[ 2];
79 pws_buf_l[gid].i[ 3] = pw_buf[ 3];
80 pws_buf_l[gid].i[ 4] = pw_buf[ 4];
81 pws_buf_l[gid].i[ 5] = pw_buf[ 5];
82 pws_buf_l[gid].i[ 6] = pw_buf[ 6];
83 pws_buf_l[gid].i[ 7] = pw_buf[ 7];
84 pws_buf_l[gid].i[ 8] = pw_buf[ 8];
85 pws_buf_l[gid].i[ 9] = pw_buf[ 9];
86 pws_buf_l[gid].i[10] = pw_buf[10];
87 pws_buf_l[gid].i[11] = pw_buf[11];
88 pws_buf_l[gid].i[12] = pw_buf[12];
89 pws_buf_l[gid].i[13] = pw_buf[13];
90 pws_buf_l[gid].i[14] = pw_buf[14];
91 pws_buf_l[gid].i[15] = pw_buf[15];
93 pws_buf_l[gid].pw_len = pw_l_len + pw_r_len;
96 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) r_markov (__global bf_t *pws_buf_r, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max)
98 const u32 gid = get_global_id (0);
100 if (gid >= gid_max) return;
104 generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid);
106 pws_buf_r[gid].i = pw_buf[0];
109 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) C_markov (__global comb_t *pws_buf, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max)
111 const u32 gid = get_global_id (0);
113 if (gid >= gid_max) return;
117 generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid);
119 pws_buf[gid].i[ 0] = pw_buf[ 0];
120 pws_buf[gid].i[ 1] = pw_buf[ 1];
121 pws_buf[gid].i[ 2] = pw_buf[ 2];
122 pws_buf[gid].i[ 3] = pw_buf[ 3];
123 pws_buf[gid].i[ 4] = pw_buf[ 4];
124 pws_buf[gid].i[ 5] = pw_buf[ 5];
125 pws_buf[gid].i[ 6] = pw_buf[ 6];
126 pws_buf[gid].i[ 7] = pw_buf[ 7];
128 pws_buf[gid].pw_len = pw_len;