2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
31 #include "include/rp_gpu.h"
35 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
36 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
40 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
41 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
44 __constant u32 c_tables[4][256] =
47 0x00072000, 0x00075000, 0x00074800, 0x00071000,
48 0x00076800, 0x00074000, 0x00070000, 0x00077000,
49 0x00073000, 0x00075800, 0x00070800, 0x00076000,
50 0x00073800, 0x00077800, 0x00072800, 0x00071800,
51 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
52 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
53 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
54 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
55 0x00022000, 0x00025000, 0x00024800, 0x00021000,
56 0x00026800, 0x00024000, 0x00020000, 0x00027000,
57 0x00023000, 0x00025800, 0x00020800, 0x00026000,
58 0x00023800, 0x00027800, 0x00022800, 0x00021800,
59 0x00062000, 0x00065000, 0x00064800, 0x00061000,
60 0x00066800, 0x00064000, 0x00060000, 0x00067000,
61 0x00063000, 0x00065800, 0x00060800, 0x00066000,
62 0x00063800, 0x00067800, 0x00062800, 0x00061800,
63 0x00032000, 0x00035000, 0x00034800, 0x00031000,
64 0x00036800, 0x00034000, 0x00030000, 0x00037000,
65 0x00033000, 0x00035800, 0x00030800, 0x00036000,
66 0x00033800, 0x00037800, 0x00032800, 0x00031800,
67 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
68 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
69 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
70 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
71 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
72 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
73 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
74 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
75 0x00052000, 0x00055000, 0x00054800, 0x00051000,
76 0x00056800, 0x00054000, 0x00050000, 0x00057000,
77 0x00053000, 0x00055800, 0x00050800, 0x00056000,
78 0x00053800, 0x00057800, 0x00052800, 0x00051800,
79 0x00012000, 0x00015000, 0x00014800, 0x00011000,
80 0x00016800, 0x00014000, 0x00010000, 0x00017000,
81 0x00013000, 0x00015800, 0x00010800, 0x00016000,
82 0x00013800, 0x00017800, 0x00012800, 0x00011800,
83 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
84 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
85 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
86 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
87 0x00042000, 0x00045000, 0x00044800, 0x00041000,
88 0x00046800, 0x00044000, 0x00040000, 0x00047000,
89 0x00043000, 0x00045800, 0x00040800, 0x00046000,
90 0x00043800, 0x00047800, 0x00042800, 0x00041800,
91 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
92 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
93 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
94 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
95 0x00002000, 0x00005000, 0x00004800, 0x00001000,
96 0x00006800, 0x00004000, 0x00000000, 0x00007000,
97 0x00003000, 0x00005800, 0x00000800, 0x00006000,
98 0x00003800, 0x00007800, 0x00002800, 0x00001800,
99 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
100 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
101 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
102 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
103 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
104 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
105 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
106 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
107 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
108 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
109 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
110 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
113 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
114 0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
115 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
116 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
117 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
118 0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
119 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
120 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
121 0x05280000, 0x05400000, 0x05080000, 0x05680000,
122 0x05500000, 0x05180000, 0x05200000, 0x05100000,
123 0x05700000, 0x05780000, 0x05600000, 0x05380000,
124 0x05300000, 0x05000000, 0x05480000, 0x05580000,
125 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
126 0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
127 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
128 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
129 0x00280000, 0x00400000, 0x00080000, 0x00680000,
130 0x00500000, 0x00180000, 0x00200000, 0x00100000,
131 0x00700000, 0x00780000, 0x00600000, 0x00380000,
132 0x00300000, 0x00000000, 0x00480000, 0x00580000,
133 0x04280000, 0x04400000, 0x04080000, 0x04680000,
134 0x04500000, 0x04180000, 0x04200000, 0x04100000,
135 0x04700000, 0x04780000, 0x04600000, 0x04380000,
136 0x04300000, 0x04000000, 0x04480000, 0x04580000,
137 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
138 0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
139 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
140 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
141 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
142 0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
143 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
144 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
145 0x07280000, 0x07400000, 0x07080000, 0x07680000,
146 0x07500000, 0x07180000, 0x07200000, 0x07100000,
147 0x07700000, 0x07780000, 0x07600000, 0x07380000,
148 0x07300000, 0x07000000, 0x07480000, 0x07580000,
149 0x02280000, 0x02400000, 0x02080000, 0x02680000,
150 0x02500000, 0x02180000, 0x02200000, 0x02100000,
151 0x02700000, 0x02780000, 0x02600000, 0x02380000,
152 0x02300000, 0x02000000, 0x02480000, 0x02580000,
153 0x03280000, 0x03400000, 0x03080000, 0x03680000,
154 0x03500000, 0x03180000, 0x03200000, 0x03100000,
155 0x03700000, 0x03780000, 0x03600000, 0x03380000,
156 0x03300000, 0x03000000, 0x03480000, 0x03580000,
157 0x06280000, 0x06400000, 0x06080000, 0x06680000,
158 0x06500000, 0x06180000, 0x06200000, 0x06100000,
159 0x06700000, 0x06780000, 0x06600000, 0x06380000,
160 0x06300000, 0x06000000, 0x06480000, 0x06580000,
161 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
162 0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
163 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
164 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
165 0x01280000, 0x01400000, 0x01080000, 0x01680000,
166 0x01500000, 0x01180000, 0x01200000, 0x01100000,
167 0x01700000, 0x01780000, 0x01600000, 0x01380000,
168 0x01300000, 0x01000000, 0x01480000, 0x01580000,
169 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
170 0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
171 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
172 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
173 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
174 0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
175 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
176 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
179 0x30000002, 0x60000002, 0x38000002, 0x08000002,
180 0x28000002, 0x78000002, 0x68000002, 0x40000002,
181 0x20000002, 0x50000002, 0x48000002, 0x70000002,
182 0x00000002, 0x18000002, 0x58000002, 0x10000002,
183 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
184 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
185 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
186 0x80000005, 0x98000005, 0xd8000005, 0x90000005,
187 0x30000005, 0x60000005, 0x38000005, 0x08000005,
188 0x28000005, 0x78000005, 0x68000005, 0x40000005,
189 0x20000005, 0x50000005, 0x48000005, 0x70000005,
190 0x00000005, 0x18000005, 0x58000005, 0x10000005,
191 0x30000000, 0x60000000, 0x38000000, 0x08000000,
192 0x28000000, 0x78000000, 0x68000000, 0x40000000,
193 0x20000000, 0x50000000, 0x48000000, 0x70000000,
194 0x00000000, 0x18000000, 0x58000000, 0x10000000,
195 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
196 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
197 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
198 0x80000003, 0x98000003, 0xd8000003, 0x90000003,
199 0x30000001, 0x60000001, 0x38000001, 0x08000001,
200 0x28000001, 0x78000001, 0x68000001, 0x40000001,
201 0x20000001, 0x50000001, 0x48000001, 0x70000001,
202 0x00000001, 0x18000001, 0x58000001, 0x10000001,
203 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
204 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
205 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
206 0x80000000, 0x98000000, 0xd8000000, 0x90000000,
207 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
208 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
209 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
210 0x80000006, 0x98000006, 0xd8000006, 0x90000006,
211 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
212 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
213 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
214 0x80000001, 0x98000001, 0xd8000001, 0x90000001,
215 0x30000003, 0x60000003, 0x38000003, 0x08000003,
216 0x28000003, 0x78000003, 0x68000003, 0x40000003,
217 0x20000003, 0x50000003, 0x48000003, 0x70000003,
218 0x00000003, 0x18000003, 0x58000003, 0x10000003,
219 0x30000004, 0x60000004, 0x38000004, 0x08000004,
220 0x28000004, 0x78000004, 0x68000004, 0x40000004,
221 0x20000004, 0x50000004, 0x48000004, 0x70000004,
222 0x00000004, 0x18000004, 0x58000004, 0x10000004,
223 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
224 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
225 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
226 0x80000002, 0x98000002, 0xd8000002, 0x90000002,
227 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
228 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
229 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
230 0x80000004, 0x98000004, 0xd8000004, 0x90000004,
231 0x30000006, 0x60000006, 0x38000006, 0x08000006,
232 0x28000006, 0x78000006, 0x68000006, 0x40000006,
233 0x20000006, 0x50000006, 0x48000006, 0x70000006,
234 0x00000006, 0x18000006, 0x58000006, 0x10000006,
235 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
236 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
237 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
238 0x80000007, 0x98000007, 0xd8000007, 0x90000007,
239 0x30000007, 0x60000007, 0x38000007, 0x08000007,
240 0x28000007, 0x78000007, 0x68000007, 0x40000007,
241 0x20000007, 0x50000007, 0x48000007, 0x70000007,
242 0x00000007, 0x18000007, 0x58000007, 0x10000007,
245 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
246 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
247 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
248 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
249 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
250 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
251 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
252 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
253 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
254 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
255 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
256 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
257 0x00000068, 0x00000058, 0x00000020, 0x00000008,
258 0x00000018, 0x00000078, 0x00000028, 0x00000048,
259 0x00000000, 0x00000050, 0x00000070, 0x00000038,
260 0x00000030, 0x00000040, 0x00000010, 0x00000060,
261 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
262 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
263 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
264 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
265 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
266 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
267 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
268 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
269 0x00000568, 0x00000558, 0x00000520, 0x00000508,
270 0x00000518, 0x00000578, 0x00000528, 0x00000548,
271 0x00000500, 0x00000550, 0x00000570, 0x00000538,
272 0x00000530, 0x00000540, 0x00000510, 0x00000560,
273 0x00000268, 0x00000258, 0x00000220, 0x00000208,
274 0x00000218, 0x00000278, 0x00000228, 0x00000248,
275 0x00000200, 0x00000250, 0x00000270, 0x00000238,
276 0x00000230, 0x00000240, 0x00000210, 0x00000260,
277 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
278 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
279 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
280 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
281 0x00000168, 0x00000158, 0x00000120, 0x00000108,
282 0x00000118, 0x00000178, 0x00000128, 0x00000148,
283 0x00000100, 0x00000150, 0x00000170, 0x00000138,
284 0x00000130, 0x00000140, 0x00000110, 0x00000160,
285 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
286 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
287 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
288 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
289 0x00000768, 0x00000758, 0x00000720, 0x00000708,
290 0x00000718, 0x00000778, 0x00000728, 0x00000748,
291 0x00000700, 0x00000750, 0x00000770, 0x00000738,
292 0x00000730, 0x00000740, 0x00000710, 0x00000760,
293 0x00000368, 0x00000358, 0x00000320, 0x00000308,
294 0x00000318, 0x00000378, 0x00000328, 0x00000348,
295 0x00000300, 0x00000350, 0x00000370, 0x00000338,
296 0x00000330, 0x00000340, 0x00000310, 0x00000360,
297 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
298 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
299 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
300 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
301 0x00000468, 0x00000458, 0x00000420, 0x00000408,
302 0x00000418, 0x00000478, 0x00000428, 0x00000448,
303 0x00000400, 0x00000450, 0x00000470, 0x00000438,
304 0x00000430, 0x00000440, 0x00000410, 0x00000460,
305 0x00000668, 0x00000658, 0x00000620, 0x00000608,
306 0x00000618, 0x00000678, 0x00000628, 0x00000648,
307 0x00000600, 0x00000650, 0x00000670, 0x00000638,
308 0x00000630, 0x00000640, 0x00000610, 0x00000660,
313 #define BOX(i,n,S) (u32x) ((S)[(n)][(i)])
317 #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
320 #define round(k1,k2,tbl) \
324 l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \
325 BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \
326 BOX (amd_bfe (t, 16, 8), 2, tbl) ^ \
327 BOX (amd_bfe (t, 24, 8), 3, tbl); \
329 r ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \
330 BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \
331 BOX (amd_bfe (t, 16, 8), 2, tbl) ^ \
332 BOX (amd_bfe (t, 24, 8), 3, tbl); \
335 #define R(k,h,s,i,t) \
341 round (k[0], k[1], t); \
342 round (k[2], k[3], t); \
343 round (k[4], k[5], t); \
344 round (k[6], k[7], t); \
345 round (k[0], k[1], t); \
346 round (k[2], k[3], t); \
347 round (k[4], k[5], t); \
348 round (k[6], k[7], t); \
349 round (k[0], k[1], t); \
350 round (k[2], k[3], t); \
351 round (k[4], k[5], t); \
352 round (k[6], k[7], t); \
353 round (k[7], k[6], t); \
354 round (k[5], k[4], t); \
355 round (k[3], k[2], t); \
356 round (k[1], k[0], t); \
362 w[0] = u[0] ^ v[0]; \
363 w[1] = u[1] ^ v[1]; \
364 w[2] = u[2] ^ v[2]; \
365 w[3] = u[3] ^ v[3]; \
366 w[4] = u[4] ^ v[4]; \
367 w[5] = u[5] ^ v[5]; \
368 w[6] = u[6] ^ v[6]; \
372 k[0] = ((w[0] & 0x000000ff) << 0) \
373 | ((w[2] & 0x000000ff) << 8) \
374 | ((w[4] & 0x000000ff) << 16) \
375 | ((w[6] & 0x000000ff) << 24); \
376 k[1] = ((w[0] & 0x0000ff00) >> 8) \
377 | ((w[2] & 0x0000ff00) >> 0) \
378 | ((w[4] & 0x0000ff00) << 8) \
379 | ((w[6] & 0x0000ff00) << 16); \
380 k[2] = ((w[0] & 0x00ff0000) >> 16) \
381 | ((w[2] & 0x00ff0000) >> 8) \
382 | ((w[4] & 0x00ff0000) << 0) \
383 | ((w[6] & 0x00ff0000) << 8); \
384 k[3] = ((w[0] & 0xff000000) >> 24) \
385 | ((w[2] & 0xff000000) >> 16) \
386 | ((w[4] & 0xff000000) >> 8) \
387 | ((w[6] & 0xff000000) >> 0); \
388 k[4] = ((w[1] & 0x000000ff) << 0) \
389 | ((w[3] & 0x000000ff) << 8) \
390 | ((w[5] & 0x000000ff) << 16) \
391 | ((w[7] & 0x000000ff) << 24); \
392 k[5] = ((w[1] & 0x0000ff00) >> 8) \
393 | ((w[3] & 0x0000ff00) >> 0) \
394 | ((w[5] & 0x0000ff00) << 8) \
395 | ((w[7] & 0x0000ff00) << 16); \
396 k[6] = ((w[1] & 0x00ff0000) >> 16) \
397 | ((w[3] & 0x00ff0000) >> 8) \
398 | ((w[5] & 0x00ff0000) << 0) \
399 | ((w[7] & 0x00ff0000) << 8); \
400 k[7] = ((w[1] & 0xff000000) >> 24) \
401 | ((w[3] & 0xff000000) >> 16) \
402 | ((w[5] & 0xff000000) >> 8) \
403 | ((w[7] & 0xff000000) >> 0);
440 x[0] ^= 0xff00ff00; \
441 x[1] ^= 0xff00ff00; \
442 x[2] ^= 0x00ff00ff; \
443 x[3] ^= 0x00ff00ff; \
444 x[4] ^= 0x00ffff00; \
445 x[5] ^= 0xff0000ff; \
446 x[6] ^= 0x000000ff; \
449 #define SHIFT12(u,m,s) \
450 u[0] = m[0] ^ s[6]; \
451 u[1] = m[1] ^ s[7]; \
452 u[2] = m[2] ^ (s[0] << 16) \
454 ^ (s[0] & 0x0000ffff) \
455 ^ (s[1] & 0x0000ffff) \
460 ^ (s[7] & 0xffff0000) \
462 u[3] = m[3] ^ (s[0] & 0x0000ffff) \
464 ^ (s[1] & 0x0000ffff) \
473 ^ (s[7] & 0x0000ffff) \
476 u[4] = m[4] ^ (s[0] & 0xffff0000) \
479 ^ (s[1] & 0xffff0000) \
488 ^ (s[7] & 0x0000ffff) \
491 u[5] = m[5] ^ (s[0] << 16) \
493 ^ (s[0] & 0xffff0000) \
494 ^ (s[1] & 0x0000ffff) \
504 ^ (s[7] & 0xffff0000) \
520 u[7] = m[7] ^ (s[0] & 0xffff0000) \
522 ^ (s[1] & 0x0000ffff) \
531 ^ (s[7] & 0x0000ffff) \
535 #define SHIFT16(h,v,u) \
536 v[0] = h[0] ^ (u[1] << 16) \
538 v[1] = h[1] ^ (u[2] << 16) \
540 v[2] = h[2] ^ (u[3] << 16) \
542 v[3] = h[3] ^ (u[4] << 16) \
544 v[4] = h[4] ^ (u[5] << 16) \
546 v[5] = h[5] ^ (u[6] << 16) \
548 v[6] = h[6] ^ (u[7] << 16) \
550 v[7] = h[7] ^ (u[0] & 0xffff0000) \
553 ^ (u[1] & 0xffff0000) \
556 ^ (u[7] & 0xffff0000);
558 #define SHIFT61(h,v) \
559 h[0] = (v[0] & 0xffff0000) \
563 ^ (v[1] & 0xffff0000) \
572 ^ (v[7] & 0x0000ffff); \
573 h[1] = (v[0] << 16) \
575 ^ (v[0] & 0xffff0000) \
576 ^ (v[1] & 0x0000ffff) \
584 ^ (v[7] & 0xffff0000) \
586 h[2] = (v[0] & 0x0000ffff) \
590 ^ (v[1] & 0xffff0000) \
598 ^ (v[7] & 0x0000ffff) \
601 h[3] = (v[0] << 16) \
603 ^ (v[0] & 0xffff0000) \
604 ^ (v[1] & 0xffff0000) \
614 ^ (v[7] & 0x0000ffff) \
616 h[4] = (v[0] >> 16) \
630 h[5] = (v[0] << 16) \
631 ^ (v[0] & 0xffff0000) \
634 ^ (v[1] & 0xffff0000) \
648 ^ (v[7] & 0xffff0000); \
680 #define PASS0(h,s,u,v,t) \
691 #define PASS2(h,s,u,v,t) \
703 #define PASS4(h,s,u,v,t) \
714 #define PASS6(h,s,u,v,t) \
723 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
729 const u32 lid = get_local_id (0);
735 const u32 gid = get_global_id (0);
739 pw_buf0[0] = pws[gid].i[ 0];
740 pw_buf0[1] = pws[gid].i[ 1];
741 pw_buf0[2] = pws[gid].i[ 2];
742 pw_buf0[3] = pws[gid].i[ 3];
746 pw_buf1[0] = pws[gid].i[ 4];
747 pw_buf1[1] = pws[gid].i[ 5];
748 pw_buf1[2] = pws[gid].i[ 6];
749 pw_buf1[3] = pws[gid].i[ 7];
751 const u32 pw_len = pws[gid].pw_len;
757 __local u32 s_tables[4][256];
759 const u32 lid4 = lid * 4;
761 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
762 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
763 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
764 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
766 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
767 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
768 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
769 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
771 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
772 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
773 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
774 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
776 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
777 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
778 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
779 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
781 barrier (CLK_LOCAL_MEM_FENCE);
783 if (gid >= gid_max) return;
789 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
819 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
821 u32 w14 = out_len * 8;
858 state_m[0] = state[0];
859 state_m[1] = state[1];
860 state_m[2] = state[2];
861 state_m[3] = state[3];
862 state_m[4] = state[4];
863 state_m[5] = state[5];
864 state_m[6] = state[6];
865 state_m[7] = state[7];
878 PASS0 (state, tmp, state_m, data_m, s_tables);
879 PASS2 (state, tmp, state_m, data_m, s_tables);
880 PASS4 (state, tmp, state_m, data_m, s_tables);
881 PASS6 (state, tmp, state_m, data_m, s_tables);
883 SHIFT12 (state_m, data, tmp);
884 SHIFT16 (state, data_m, state_m);
885 SHIFT61 (state, data_m);
898 state_m[0] = state[0];
899 state_m[1] = state[1];
900 state_m[2] = state[2];
901 state_m[3] = state[3];
902 state_m[4] = state[4];
903 state_m[5] = state[5];
904 state_m[6] = state[6];
905 state_m[7] = state[7];
916 PASS0 (state, tmp, state_m, data_m, s_tables);
917 PASS2 (state, tmp, state_m, data_m, s_tables);
918 PASS4 (state, tmp, state_m, data_m, s_tables);
919 PASS6 (state, tmp, state_m, data_m, s_tables);
921 SHIFT12 (state_m, data, tmp);
922 SHIFT16 (state, data_m, state_m);
923 SHIFT61 (state, data_m);
936 state_m[0] = state[0];
937 state_m[1] = state[1];
938 state_m[2] = state[2];
939 state_m[3] = state[3];
940 state_m[4] = state[4];
941 state_m[5] = state[5];
942 state_m[6] = state[6];
943 state_m[7] = state[7];
954 PASS0 (state, tmp, state_m, data_m, s_tables);
955 PASS2 (state, tmp, state_m, data_m, s_tables);
956 PASS4 (state, tmp, state_m, data_m, s_tables);
957 PASS6 (state, tmp, state_m, data_m, s_tables);
959 SHIFT12 (state_m, data, tmp);
960 SHIFT16 (state, data_m, state_m);
961 SHIFT61 (state, data_m);
965 const u32x r0 = state[0];
966 const u32x r1 = state[1];
967 const u32x r2 = state[2];
968 const u32x r3 = state[3];
970 #include VECT_COMPARE_M
974 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
978 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
982 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
988 const u32 lid = get_local_id (0);
994 const u32 gid = get_global_id (0);
998 pw_buf0[0] = pws[gid].i[ 0];
999 pw_buf0[1] = pws[gid].i[ 1];
1000 pw_buf0[2] = pws[gid].i[ 2];
1001 pw_buf0[3] = pws[gid].i[ 3];
1005 pw_buf1[0] = pws[gid].i[ 4];
1006 pw_buf1[1] = pws[gid].i[ 5];
1007 pw_buf1[2] = pws[gid].i[ 6];
1008 pw_buf1[3] = pws[gid].i[ 7];
1010 const u32 pw_len = pws[gid].pw_len;
1016 __local u32 s_tables[4][256];
1018 const u32 lid4 = lid * 4;
1020 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
1021 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
1022 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
1023 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
1025 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
1026 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
1027 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
1028 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
1030 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
1031 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
1032 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
1033 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
1035 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
1036 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
1037 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
1038 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
1040 barrier (CLK_LOCAL_MEM_FENCE);
1042 if (gid >= gid_max) return;
1048 const u32 search[4] =
1050 digests_buf[digests_offset].digest_buf[DGST_R0],
1051 digests_buf[digests_offset].digest_buf[DGST_R1],
1052 digests_buf[digests_offset].digest_buf[DGST_R2],
1053 digests_buf[digests_offset].digest_buf[DGST_R3]
1060 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
1090 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
1092 u32 w14 = out_len * 8;
1115 state[ 8] = data[0];
1116 state[ 9] = data[1];
1117 state[10] = data[2];
1118 state[11] = data[3];
1119 state[12] = data[4];
1120 state[13] = data[5];
1121 state[14] = data[6];
1122 state[15] = data[7];
1129 state_m[0] = state[0];
1130 state_m[1] = state[1];
1131 state_m[2] = state[2];
1132 state_m[3] = state[3];
1133 state_m[4] = state[4];
1134 state_m[5] = state[5];
1135 state_m[6] = state[6];
1136 state_m[7] = state[7];
1138 data_m[0] = data[0];
1139 data_m[1] = data[1];
1140 data_m[2] = data[2];
1141 data_m[3] = data[3];
1142 data_m[4] = data[4];
1143 data_m[5] = data[5];
1144 data_m[6] = data[6];
1145 data_m[7] = data[7];
1149 PASS0 (state, tmp, state_m, data_m, s_tables);
1150 PASS2 (state, tmp, state_m, data_m, s_tables);
1151 PASS4 (state, tmp, state_m, data_m, s_tables);
1152 PASS6 (state, tmp, state_m, data_m, s_tables);
1154 SHIFT12 (state_m, data, tmp);
1155 SHIFT16 (state, data_m, state_m);
1156 SHIFT61 (state, data_m);
1169 state_m[0] = state[0];
1170 state_m[1] = state[1];
1171 state_m[2] = state[2];
1172 state_m[3] = state[3];
1173 state_m[4] = state[4];
1174 state_m[5] = state[5];
1175 state_m[6] = state[6];
1176 state_m[7] = state[7];
1178 data_m[0] = data[0];
1179 data_m[1] = data[1];
1180 data_m[2] = data[2];
1181 data_m[3] = data[3];
1182 data_m[4] = data[4];
1183 data_m[5] = data[5];
1184 data_m[6] = data[6];
1185 data_m[7] = data[7];
1187 PASS0 (state, tmp, state_m, data_m, s_tables);
1188 PASS2 (state, tmp, state_m, data_m, s_tables);
1189 PASS4 (state, tmp, state_m, data_m, s_tables);
1190 PASS6 (state, tmp, state_m, data_m, s_tables);
1192 SHIFT12 (state_m, data, tmp);
1193 SHIFT16 (state, data_m, state_m);
1194 SHIFT61 (state, data_m);
1198 data[0] = state[ 8];
1199 data[1] = state[ 9];
1200 data[2] = state[10];
1201 data[3] = state[11];
1202 data[4] = state[12];
1203 data[5] = state[13];
1204 data[6] = state[14];
1205 data[7] = state[15];
1207 state_m[0] = state[0];
1208 state_m[1] = state[1];
1209 state_m[2] = state[2];
1210 state_m[3] = state[3];
1211 state_m[4] = state[4];
1212 state_m[5] = state[5];
1213 state_m[6] = state[6];
1214 state_m[7] = state[7];
1216 data_m[0] = data[0];
1217 data_m[1] = data[1];
1218 data_m[2] = data[2];
1219 data_m[3] = data[3];
1220 data_m[4] = data[4];
1221 data_m[5] = data[5];
1222 data_m[6] = data[6];
1223 data_m[7] = data[7];
1225 PASS0 (state, tmp, state_m, data_m, s_tables);
1226 PASS2 (state, tmp, state_m, data_m, s_tables);
1227 PASS4 (state, tmp, state_m, data_m, s_tables);
1228 PASS6 (state, tmp, state_m, data_m, s_tables);
1230 SHIFT12 (state_m, data, tmp);
1231 SHIFT16 (state, data_m, state_m);
1232 SHIFT61 (state, data_m);
1236 const u32x r0 = state[0];
1237 const u32x r1 = state[1];
1238 const u32x r2 = state[2];
1239 const u32x r3 = state[3];
1241 #include VECT_COMPARE_S
1245 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1249 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)