2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 __constant u32 c_tables[4][256] =
26 0x00072000, 0x00075000, 0x00074800, 0x00071000,
27 0x00076800, 0x00074000, 0x00070000, 0x00077000,
28 0x00073000, 0x00075800, 0x00070800, 0x00076000,
29 0x00073800, 0x00077800, 0x00072800, 0x00071800,
30 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
31 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
32 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
33 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
34 0x00022000, 0x00025000, 0x00024800, 0x00021000,
35 0x00026800, 0x00024000, 0x00020000, 0x00027000,
36 0x00023000, 0x00025800, 0x00020800, 0x00026000,
37 0x00023800, 0x00027800, 0x00022800, 0x00021800,
38 0x00062000, 0x00065000, 0x00064800, 0x00061000,
39 0x00066800, 0x00064000, 0x00060000, 0x00067000,
40 0x00063000, 0x00065800, 0x00060800, 0x00066000,
41 0x00063800, 0x00067800, 0x00062800, 0x00061800,
42 0x00032000, 0x00035000, 0x00034800, 0x00031000,
43 0x00036800, 0x00034000, 0x00030000, 0x00037000,
44 0x00033000, 0x00035800, 0x00030800, 0x00036000,
45 0x00033800, 0x00037800, 0x00032800, 0x00031800,
46 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
47 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
48 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
49 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
50 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
51 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
52 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
53 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
54 0x00052000, 0x00055000, 0x00054800, 0x00051000,
55 0x00056800, 0x00054000, 0x00050000, 0x00057000,
56 0x00053000, 0x00055800, 0x00050800, 0x00056000,
57 0x00053800, 0x00057800, 0x00052800, 0x00051800,
58 0x00012000, 0x00015000, 0x00014800, 0x00011000,
59 0x00016800, 0x00014000, 0x00010000, 0x00017000,
60 0x00013000, 0x00015800, 0x00010800, 0x00016000,
61 0x00013800, 0x00017800, 0x00012800, 0x00011800,
62 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
63 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
64 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
65 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
66 0x00042000, 0x00045000, 0x00044800, 0x00041000,
67 0x00046800, 0x00044000, 0x00040000, 0x00047000,
68 0x00043000, 0x00045800, 0x00040800, 0x00046000,
69 0x00043800, 0x00047800, 0x00042800, 0x00041800,
70 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
71 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
72 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
73 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
74 0x00002000, 0x00005000, 0x00004800, 0x00001000,
75 0x00006800, 0x00004000, 0x00000000, 0x00007000,
76 0x00003000, 0x00005800, 0x00000800, 0x00006000,
77 0x00003800, 0x00007800, 0x00002800, 0x00001800,
78 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
79 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
80 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
81 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
82 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
83 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
84 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
85 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
86 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
87 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
88 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
89 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
92 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
93 0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
94 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
95 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
96 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
97 0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
98 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
99 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
100 0x05280000, 0x05400000, 0x05080000, 0x05680000,
101 0x05500000, 0x05180000, 0x05200000, 0x05100000,
102 0x05700000, 0x05780000, 0x05600000, 0x05380000,
103 0x05300000, 0x05000000, 0x05480000, 0x05580000,
104 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
105 0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
106 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
107 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
108 0x00280000, 0x00400000, 0x00080000, 0x00680000,
109 0x00500000, 0x00180000, 0x00200000, 0x00100000,
110 0x00700000, 0x00780000, 0x00600000, 0x00380000,
111 0x00300000, 0x00000000, 0x00480000, 0x00580000,
112 0x04280000, 0x04400000, 0x04080000, 0x04680000,
113 0x04500000, 0x04180000, 0x04200000, 0x04100000,
114 0x04700000, 0x04780000, 0x04600000, 0x04380000,
115 0x04300000, 0x04000000, 0x04480000, 0x04580000,
116 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
117 0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
118 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
119 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
120 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
121 0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
122 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
123 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
124 0x07280000, 0x07400000, 0x07080000, 0x07680000,
125 0x07500000, 0x07180000, 0x07200000, 0x07100000,
126 0x07700000, 0x07780000, 0x07600000, 0x07380000,
127 0x07300000, 0x07000000, 0x07480000, 0x07580000,
128 0x02280000, 0x02400000, 0x02080000, 0x02680000,
129 0x02500000, 0x02180000, 0x02200000, 0x02100000,
130 0x02700000, 0x02780000, 0x02600000, 0x02380000,
131 0x02300000, 0x02000000, 0x02480000, 0x02580000,
132 0x03280000, 0x03400000, 0x03080000, 0x03680000,
133 0x03500000, 0x03180000, 0x03200000, 0x03100000,
134 0x03700000, 0x03780000, 0x03600000, 0x03380000,
135 0x03300000, 0x03000000, 0x03480000, 0x03580000,
136 0x06280000, 0x06400000, 0x06080000, 0x06680000,
137 0x06500000, 0x06180000, 0x06200000, 0x06100000,
138 0x06700000, 0x06780000, 0x06600000, 0x06380000,
139 0x06300000, 0x06000000, 0x06480000, 0x06580000,
140 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
141 0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
142 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
143 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
144 0x01280000, 0x01400000, 0x01080000, 0x01680000,
145 0x01500000, 0x01180000, 0x01200000, 0x01100000,
146 0x01700000, 0x01780000, 0x01600000, 0x01380000,
147 0x01300000, 0x01000000, 0x01480000, 0x01580000,
148 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
149 0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
150 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
151 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
152 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
153 0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
154 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
155 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
158 0x30000002, 0x60000002, 0x38000002, 0x08000002,
159 0x28000002, 0x78000002, 0x68000002, 0x40000002,
160 0x20000002, 0x50000002, 0x48000002, 0x70000002,
161 0x00000002, 0x18000002, 0x58000002, 0x10000002,
162 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
163 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
164 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
165 0x80000005, 0x98000005, 0xd8000005, 0x90000005,
166 0x30000005, 0x60000005, 0x38000005, 0x08000005,
167 0x28000005, 0x78000005, 0x68000005, 0x40000005,
168 0x20000005, 0x50000005, 0x48000005, 0x70000005,
169 0x00000005, 0x18000005, 0x58000005, 0x10000005,
170 0x30000000, 0x60000000, 0x38000000, 0x08000000,
171 0x28000000, 0x78000000, 0x68000000, 0x40000000,
172 0x20000000, 0x50000000, 0x48000000, 0x70000000,
173 0x00000000, 0x18000000, 0x58000000, 0x10000000,
174 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
175 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
176 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
177 0x80000003, 0x98000003, 0xd8000003, 0x90000003,
178 0x30000001, 0x60000001, 0x38000001, 0x08000001,
179 0x28000001, 0x78000001, 0x68000001, 0x40000001,
180 0x20000001, 0x50000001, 0x48000001, 0x70000001,
181 0x00000001, 0x18000001, 0x58000001, 0x10000001,
182 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
183 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
184 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
185 0x80000000, 0x98000000, 0xd8000000, 0x90000000,
186 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
187 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
188 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
189 0x80000006, 0x98000006, 0xd8000006, 0x90000006,
190 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
191 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
192 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
193 0x80000001, 0x98000001, 0xd8000001, 0x90000001,
194 0x30000003, 0x60000003, 0x38000003, 0x08000003,
195 0x28000003, 0x78000003, 0x68000003, 0x40000003,
196 0x20000003, 0x50000003, 0x48000003, 0x70000003,
197 0x00000003, 0x18000003, 0x58000003, 0x10000003,
198 0x30000004, 0x60000004, 0x38000004, 0x08000004,
199 0x28000004, 0x78000004, 0x68000004, 0x40000004,
200 0x20000004, 0x50000004, 0x48000004, 0x70000004,
201 0x00000004, 0x18000004, 0x58000004, 0x10000004,
202 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
203 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
204 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
205 0x80000002, 0x98000002, 0xd8000002, 0x90000002,
206 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
207 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
208 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
209 0x80000004, 0x98000004, 0xd8000004, 0x90000004,
210 0x30000006, 0x60000006, 0x38000006, 0x08000006,
211 0x28000006, 0x78000006, 0x68000006, 0x40000006,
212 0x20000006, 0x50000006, 0x48000006, 0x70000006,
213 0x00000006, 0x18000006, 0x58000006, 0x10000006,
214 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
215 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
216 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
217 0x80000007, 0x98000007, 0xd8000007, 0x90000007,
218 0x30000007, 0x60000007, 0x38000007, 0x08000007,
219 0x28000007, 0x78000007, 0x68000007, 0x40000007,
220 0x20000007, 0x50000007, 0x48000007, 0x70000007,
221 0x00000007, 0x18000007, 0x58000007, 0x10000007,
224 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
225 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
226 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
227 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
228 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
229 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
230 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
231 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
232 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
233 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
234 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
235 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
236 0x00000068, 0x00000058, 0x00000020, 0x00000008,
237 0x00000018, 0x00000078, 0x00000028, 0x00000048,
238 0x00000000, 0x00000050, 0x00000070, 0x00000038,
239 0x00000030, 0x00000040, 0x00000010, 0x00000060,
240 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
241 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
242 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
243 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
244 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
245 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
246 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
247 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
248 0x00000568, 0x00000558, 0x00000520, 0x00000508,
249 0x00000518, 0x00000578, 0x00000528, 0x00000548,
250 0x00000500, 0x00000550, 0x00000570, 0x00000538,
251 0x00000530, 0x00000540, 0x00000510, 0x00000560,
252 0x00000268, 0x00000258, 0x00000220, 0x00000208,
253 0x00000218, 0x00000278, 0x00000228, 0x00000248,
254 0x00000200, 0x00000250, 0x00000270, 0x00000238,
255 0x00000230, 0x00000240, 0x00000210, 0x00000260,
256 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
257 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
258 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
259 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
260 0x00000168, 0x00000158, 0x00000120, 0x00000108,
261 0x00000118, 0x00000178, 0x00000128, 0x00000148,
262 0x00000100, 0x00000150, 0x00000170, 0x00000138,
263 0x00000130, 0x00000140, 0x00000110, 0x00000160,
264 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
265 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
266 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
267 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
268 0x00000768, 0x00000758, 0x00000720, 0x00000708,
269 0x00000718, 0x00000778, 0x00000728, 0x00000748,
270 0x00000700, 0x00000750, 0x00000770, 0x00000738,
271 0x00000730, 0x00000740, 0x00000710, 0x00000760,
272 0x00000368, 0x00000358, 0x00000320, 0x00000308,
273 0x00000318, 0x00000378, 0x00000328, 0x00000348,
274 0x00000300, 0x00000350, 0x00000370, 0x00000338,
275 0x00000330, 0x00000340, 0x00000310, 0x00000360,
276 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
277 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
278 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
279 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
280 0x00000468, 0x00000458, 0x00000420, 0x00000408,
281 0x00000418, 0x00000478, 0x00000428, 0x00000448,
282 0x00000400, 0x00000450, 0x00000470, 0x00000438,
283 0x00000430, 0x00000440, 0x00000410, 0x00000460,
284 0x00000668, 0x00000658, 0x00000620, 0x00000608,
285 0x00000618, 0x00000678, 0x00000628, 0x00000648,
286 0x00000600, 0x00000650, 0x00000670, 0x00000638,
287 0x00000630, 0x00000640, 0x00000610, 0x00000660,
292 #define BOX(i,n,S) (u32x) ((S)[(n)][(i)])
296 #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
299 #define round(k1,k2,tbl) \
303 l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \
304 BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \
305 BOX (amd_bfe (t, 16, 8), 2, tbl) ^ \
306 BOX (amd_bfe (t, 24, 8), 3, tbl); \
308 r ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \
309 BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \
310 BOX (amd_bfe (t, 16, 8), 2, tbl) ^ \
311 BOX (amd_bfe (t, 24, 8), 3, tbl); \
314 #define R(k,h,s,i,t) \
320 round (k[0], k[1], t); \
321 round (k[2], k[3], t); \
322 round (k[4], k[5], t); \
323 round (k[6], k[7], t); \
324 round (k[0], k[1], t); \
325 round (k[2], k[3], t); \
326 round (k[4], k[5], t); \
327 round (k[6], k[7], t); \
328 round (k[0], k[1], t); \
329 round (k[2], k[3], t); \
330 round (k[4], k[5], t); \
331 round (k[6], k[7], t); \
332 round (k[7], k[6], t); \
333 round (k[5], k[4], t); \
334 round (k[3], k[2], t); \
335 round (k[1], k[0], t); \
341 w[0] = u[0] ^ v[0]; \
342 w[1] = u[1] ^ v[1]; \
343 w[2] = u[2] ^ v[2]; \
344 w[3] = u[3] ^ v[3]; \
345 w[4] = u[4] ^ v[4]; \
346 w[5] = u[5] ^ v[5]; \
347 w[6] = u[6] ^ v[6]; \
351 k[0] = ((w[0] & 0x000000ff) << 0) \
352 | ((w[2] & 0x000000ff) << 8) \
353 | ((w[4] & 0x000000ff) << 16) \
354 | ((w[6] & 0x000000ff) << 24); \
355 k[1] = ((w[0] & 0x0000ff00) >> 8) \
356 | ((w[2] & 0x0000ff00) >> 0) \
357 | ((w[4] & 0x0000ff00) << 8) \
358 | ((w[6] & 0x0000ff00) << 16); \
359 k[2] = ((w[0] & 0x00ff0000) >> 16) \
360 | ((w[2] & 0x00ff0000) >> 8) \
361 | ((w[4] & 0x00ff0000) << 0) \
362 | ((w[6] & 0x00ff0000) << 8); \
363 k[3] = ((w[0] & 0xff000000) >> 24) \
364 | ((w[2] & 0xff000000) >> 16) \
365 | ((w[4] & 0xff000000) >> 8) \
366 | ((w[6] & 0xff000000) >> 0); \
367 k[4] = ((w[1] & 0x000000ff) << 0) \
368 | ((w[3] & 0x000000ff) << 8) \
369 | ((w[5] & 0x000000ff) << 16) \
370 | ((w[7] & 0x000000ff) << 24); \
371 k[5] = ((w[1] & 0x0000ff00) >> 8) \
372 | ((w[3] & 0x0000ff00) >> 0) \
373 | ((w[5] & 0x0000ff00) << 8) \
374 | ((w[7] & 0x0000ff00) << 16); \
375 k[6] = ((w[1] & 0x00ff0000) >> 16) \
376 | ((w[3] & 0x00ff0000) >> 8) \
377 | ((w[5] & 0x00ff0000) << 0) \
378 | ((w[7] & 0x00ff0000) << 8); \
379 k[7] = ((w[1] & 0xff000000) >> 24) \
380 | ((w[3] & 0xff000000) >> 16) \
381 | ((w[5] & 0xff000000) >> 8) \
382 | ((w[7] & 0xff000000) >> 0);
419 x[0] ^= 0xff00ff00; \
420 x[1] ^= 0xff00ff00; \
421 x[2] ^= 0x00ff00ff; \
422 x[3] ^= 0x00ff00ff; \
423 x[4] ^= 0x00ffff00; \
424 x[5] ^= 0xff0000ff; \
425 x[6] ^= 0x000000ff; \
428 #define SHIFT12(u,m,s) \
429 u[0] = m[0] ^ s[6]; \
430 u[1] = m[1] ^ s[7]; \
431 u[2] = m[2] ^ (s[0] << 16) \
433 ^ (s[0] & 0x0000ffff) \
434 ^ (s[1] & 0x0000ffff) \
439 ^ (s[7] & 0xffff0000) \
441 u[3] = m[3] ^ (s[0] & 0x0000ffff) \
443 ^ (s[1] & 0x0000ffff) \
452 ^ (s[7] & 0x0000ffff) \
455 u[4] = m[4] ^ (s[0] & 0xffff0000) \
458 ^ (s[1] & 0xffff0000) \
467 ^ (s[7] & 0x0000ffff) \
470 u[5] = m[5] ^ (s[0] << 16) \
472 ^ (s[0] & 0xffff0000) \
473 ^ (s[1] & 0x0000ffff) \
483 ^ (s[7] & 0xffff0000) \
499 u[7] = m[7] ^ (s[0] & 0xffff0000) \
501 ^ (s[1] & 0x0000ffff) \
510 ^ (s[7] & 0x0000ffff) \
514 #define SHIFT16(h,v,u) \
515 v[0] = h[0] ^ (u[1] << 16) \
517 v[1] = h[1] ^ (u[2] << 16) \
519 v[2] = h[2] ^ (u[3] << 16) \
521 v[3] = h[3] ^ (u[4] << 16) \
523 v[4] = h[4] ^ (u[5] << 16) \
525 v[5] = h[5] ^ (u[6] << 16) \
527 v[6] = h[6] ^ (u[7] << 16) \
529 v[7] = h[7] ^ (u[0] & 0xffff0000) \
532 ^ (u[1] & 0xffff0000) \
535 ^ (u[7] & 0xffff0000);
537 #define SHIFT61(h,v) \
538 h[0] = (v[0] & 0xffff0000) \
542 ^ (v[1] & 0xffff0000) \
551 ^ (v[7] & 0x0000ffff); \
552 h[1] = (v[0] << 16) \
554 ^ (v[0] & 0xffff0000) \
555 ^ (v[1] & 0x0000ffff) \
563 ^ (v[7] & 0xffff0000) \
565 h[2] = (v[0] & 0x0000ffff) \
569 ^ (v[1] & 0xffff0000) \
577 ^ (v[7] & 0x0000ffff) \
580 h[3] = (v[0] << 16) \
582 ^ (v[0] & 0xffff0000) \
583 ^ (v[1] & 0xffff0000) \
593 ^ (v[7] & 0x0000ffff) \
595 h[4] = (v[0] >> 16) \
609 h[5] = (v[0] << 16) \
610 ^ (v[0] & 0xffff0000) \
613 ^ (v[1] & 0xffff0000) \
627 ^ (v[7] & 0xffff0000); \
659 #define PASS0(h,s,u,v,t) \
670 #define PASS2(h,s,u,v,t) \
682 #define PASS4(h,s,u,v,t) \
693 #define PASS6(h,s,u,v,t) \
702 static void m06900m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_tables[4][256])
708 const u32 gid = get_global_id (0);
709 const u32 lid = get_local_id (0);
715 const u32 w14 = pw_len * 8;
723 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
725 const u32 w0r = bfs_buf[il_pos].i;
764 state_m[0] = state[0];
765 state_m[1] = state[1];
766 state_m[2] = state[2];
767 state_m[3] = state[3];
768 state_m[4] = state[4];
769 state_m[5] = state[5];
770 state_m[6] = state[6];
771 state_m[7] = state[7];
784 PASS0 (state, tmp, state_m, data_m, s_tables);
785 PASS2 (state, tmp, state_m, data_m, s_tables);
786 PASS4 (state, tmp, state_m, data_m, s_tables);
787 PASS6 (state, tmp, state_m, data_m, s_tables);
789 SHIFT12 (state_m, data, tmp);
790 SHIFT16 (state, data_m, state_m);
791 SHIFT61 (state, data_m);
804 state_m[0] = state[0];
805 state_m[1] = state[1];
806 state_m[2] = state[2];
807 state_m[3] = state[3];
808 state_m[4] = state[4];
809 state_m[5] = state[5];
810 state_m[6] = state[6];
811 state_m[7] = state[7];
822 PASS0 (state, tmp, state_m, data_m, s_tables);
823 PASS2 (state, tmp, state_m, data_m, s_tables);
824 PASS4 (state, tmp, state_m, data_m, s_tables);
825 PASS6 (state, tmp, state_m, data_m, s_tables);
827 SHIFT12 (state_m, data, tmp);
828 SHIFT16 (state, data_m, state_m);
829 SHIFT61 (state, data_m);
842 state_m[0] = state[0];
843 state_m[1] = state[1];
844 state_m[2] = state[2];
845 state_m[3] = state[3];
846 state_m[4] = state[4];
847 state_m[5] = state[5];
848 state_m[6] = state[6];
849 state_m[7] = state[7];
860 PASS0 (state, tmp, state_m, data_m, s_tables);
861 PASS2 (state, tmp, state_m, data_m, s_tables);
862 PASS4 (state, tmp, state_m, data_m, s_tables);
863 PASS6 (state, tmp, state_m, data_m, s_tables);
865 SHIFT12 (state_m, data, tmp);
866 SHIFT16 (state, data_m, state_m);
867 SHIFT61 (state, data_m);
871 const u32 r0 = state[0];
872 const u32 r1 = state[1];
873 const u32 r2 = state[2];
874 const u32 r3 = state[3];
879 static void m06900s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_tables[4][256])
885 const u32 gid = get_global_id (0);
886 const u32 lid = get_local_id (0);
892 const u32 w14 = pw_len * 8;
898 const u32 search[4] =
900 digests_buf[digests_offset].digest_buf[DGST_R0],
901 digests_buf[digests_offset].digest_buf[DGST_R1],
902 digests_buf[digests_offset].digest_buf[DGST_R2],
903 digests_buf[digests_offset].digest_buf[DGST_R3]
912 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
914 const u32 w0r = bfs_buf[il_pos].i;
953 state_m[0] = state[0];
954 state_m[1] = state[1];
955 state_m[2] = state[2];
956 state_m[3] = state[3];
957 state_m[4] = state[4];
958 state_m[5] = state[5];
959 state_m[6] = state[6];
960 state_m[7] = state[7];
973 PASS0 (state, tmp, state_m, data_m, s_tables);
974 PASS2 (state, tmp, state_m, data_m, s_tables);
975 PASS4 (state, tmp, state_m, data_m, s_tables);
976 PASS6 (state, tmp, state_m, data_m, s_tables);
978 SHIFT12 (state_m, data, tmp);
979 SHIFT16 (state, data_m, state_m);
980 SHIFT61 (state, data_m);
993 state_m[0] = state[0];
994 state_m[1] = state[1];
995 state_m[2] = state[2];
996 state_m[3] = state[3];
997 state_m[4] = state[4];
998 state_m[5] = state[5];
999 state_m[6] = state[6];
1000 state_m[7] = state[7];
1002 data_m[0] = data[0];
1003 data_m[1] = data[1];
1004 data_m[2] = data[2];
1005 data_m[3] = data[3];
1006 data_m[4] = data[4];
1007 data_m[5] = data[5];
1008 data_m[6] = data[6];
1009 data_m[7] = data[7];
1011 PASS0 (state, tmp, state_m, data_m, s_tables);
1012 PASS2 (state, tmp, state_m, data_m, s_tables);
1013 PASS4 (state, tmp, state_m, data_m, s_tables);
1014 PASS6 (state, tmp, state_m, data_m, s_tables);
1016 SHIFT12 (state_m, data, tmp);
1017 SHIFT16 (state, data_m, state_m);
1018 SHIFT61 (state, data_m);
1022 data[0] = state[ 8];
1023 data[1] = state[ 9];
1024 data[2] = state[10];
1025 data[3] = state[11];
1026 data[4] = state[12];
1027 data[5] = state[13];
1028 data[6] = state[14];
1029 data[7] = state[15];
1031 state_m[0] = state[0];
1032 state_m[1] = state[1];
1033 state_m[2] = state[2];
1034 state_m[3] = state[3];
1035 state_m[4] = state[4];
1036 state_m[5] = state[5];
1037 state_m[6] = state[6];
1038 state_m[7] = state[7];
1040 data_m[0] = data[0];
1041 data_m[1] = data[1];
1042 data_m[2] = data[2];
1043 data_m[3] = data[3];
1044 data_m[4] = data[4];
1045 data_m[5] = data[5];
1046 data_m[6] = data[6];
1047 data_m[7] = data[7];
1049 PASS0 (state, tmp, state_m, data_m, s_tables);
1050 PASS2 (state, tmp, state_m, data_m, s_tables);
1051 PASS4 (state, tmp, state_m, data_m, s_tables);
1052 PASS6 (state, tmp, state_m, data_m, s_tables);
1054 SHIFT12 (state_m, data, tmp);
1055 SHIFT16 (state, data_m, state_m);
1056 SHIFT61 (state, data_m);
1060 const u32 r0 = state[0];
1061 const u32 r1 = state[1];
1062 const u32 r2 = state[2];
1063 const u32 r3 = state[3];
1069 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1075 const u32 gid = get_global_id (0);
1081 const u32 lid = get_local_id (0);
1085 w0[0] = pws[gid].i[ 0];
1086 w0[1] = pws[gid].i[ 1];
1087 w0[2] = pws[gid].i[ 2];
1088 w0[3] = pws[gid].i[ 3];
1111 const u32 pw_len = pws[gid].pw_len;
1117 __local u32 s_tables[4][256];
1119 const u32 lid4 = lid * 4;
1121 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
1122 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
1123 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
1124 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
1126 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
1127 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
1128 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
1129 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
1131 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
1132 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
1133 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
1134 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
1136 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
1137 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
1138 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
1139 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
1141 barrier (CLK_LOCAL_MEM_FENCE);
1143 if (gid >= gid_max) return;
1149 m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_tables);
1152 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1158 const u32 gid = get_global_id (0);
1164 const u32 lid = get_local_id (0);
1168 w0[0] = pws[gid].i[ 0];
1169 w0[1] = pws[gid].i[ 1];
1170 w0[2] = pws[gid].i[ 2];
1171 w0[3] = pws[gid].i[ 3];
1175 w1[0] = pws[gid].i[ 4];
1176 w1[1] = pws[gid].i[ 5];
1177 w1[2] = pws[gid].i[ 6];
1178 w1[3] = pws[gid].i[ 7];
1194 const u32 pw_len = pws[gid].pw_len;
1200 __local u32 s_tables[4][256];
1202 const u32 lid4 = lid * 4;
1204 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
1205 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
1206 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
1207 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
1209 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
1210 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
1211 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
1212 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
1214 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
1215 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
1216 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
1217 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
1219 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
1220 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
1221 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
1222 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
1224 barrier (CLK_LOCAL_MEM_FENCE);
1226 if (gid >= gid_max) return;
1232 m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_tables);
1235 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1239 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1245 const u32 gid = get_global_id (0);
1251 const u32 lid = get_local_id (0);
1255 w0[0] = pws[gid].i[ 0];
1256 w0[1] = pws[gid].i[ 1];
1257 w0[2] = pws[gid].i[ 2];
1258 w0[3] = pws[gid].i[ 3];
1281 const u32 pw_len = pws[gid].pw_len;
1287 __local u32 s_tables[4][256];
1289 const u32 lid4 = lid * 4;
1291 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
1292 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
1293 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
1294 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
1296 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
1297 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
1298 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
1299 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
1301 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
1302 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
1303 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
1304 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
1306 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
1307 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
1308 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
1309 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
1311 barrier (CLK_LOCAL_MEM_FENCE);
1313 if (gid >= gid_max) return;
1319 m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_tables);
1322 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1328 const u32 gid = get_global_id (0);
1334 const u32 lid = get_local_id (0);
1338 w0[0] = pws[gid].i[ 0];
1339 w0[1] = pws[gid].i[ 1];
1340 w0[2] = pws[gid].i[ 2];
1341 w0[3] = pws[gid].i[ 3];
1345 w1[0] = pws[gid].i[ 4];
1346 w1[1] = pws[gid].i[ 5];
1347 w1[2] = pws[gid].i[ 6];
1348 w1[3] = pws[gid].i[ 7];
1364 const u32 pw_len = pws[gid].pw_len;
1370 __local u32 s_tables[4][256];
1372 const u32 lid4 = lid * 4;
1374 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
1375 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
1376 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
1377 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
1379 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
1380 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
1381 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
1382 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
1384 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
1385 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
1386 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
1387 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
1389 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
1390 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
1391 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
1392 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
1394 barrier (CLK_LOCAL_MEM_FENCE);
1396 if (gid >= gid_max) return;
1402 m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_tables);
1405 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)