2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
34 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
38 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
39 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
42 __constant u32 c_tables[4][256] =
45 0x00072000, 0x00075000, 0x00074800, 0x00071000,
46 0x00076800, 0x00074000, 0x00070000, 0x00077000,
47 0x00073000, 0x00075800, 0x00070800, 0x00076000,
48 0x00073800, 0x00077800, 0x00072800, 0x00071800,
49 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
50 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
51 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
52 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
53 0x00022000, 0x00025000, 0x00024800, 0x00021000,
54 0x00026800, 0x00024000, 0x00020000, 0x00027000,
55 0x00023000, 0x00025800, 0x00020800, 0x00026000,
56 0x00023800, 0x00027800, 0x00022800, 0x00021800,
57 0x00062000, 0x00065000, 0x00064800, 0x00061000,
58 0x00066800, 0x00064000, 0x00060000, 0x00067000,
59 0x00063000, 0x00065800, 0x00060800, 0x00066000,
60 0x00063800, 0x00067800, 0x00062800, 0x00061800,
61 0x00032000, 0x00035000, 0x00034800, 0x00031000,
62 0x00036800, 0x00034000, 0x00030000, 0x00037000,
63 0x00033000, 0x00035800, 0x00030800, 0x00036000,
64 0x00033800, 0x00037800, 0x00032800, 0x00031800,
65 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
66 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
67 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
68 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
69 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
70 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
71 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
72 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
73 0x00052000, 0x00055000, 0x00054800, 0x00051000,
74 0x00056800, 0x00054000, 0x00050000, 0x00057000,
75 0x00053000, 0x00055800, 0x00050800, 0x00056000,
76 0x00053800, 0x00057800, 0x00052800, 0x00051800,
77 0x00012000, 0x00015000, 0x00014800, 0x00011000,
78 0x00016800, 0x00014000, 0x00010000, 0x00017000,
79 0x00013000, 0x00015800, 0x00010800, 0x00016000,
80 0x00013800, 0x00017800, 0x00012800, 0x00011800,
81 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
82 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
83 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
84 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
85 0x00042000, 0x00045000, 0x00044800, 0x00041000,
86 0x00046800, 0x00044000, 0x00040000, 0x00047000,
87 0x00043000, 0x00045800, 0x00040800, 0x00046000,
88 0x00043800, 0x00047800, 0x00042800, 0x00041800,
89 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
90 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
91 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
92 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
93 0x00002000, 0x00005000, 0x00004800, 0x00001000,
94 0x00006800, 0x00004000, 0x00000000, 0x00007000,
95 0x00003000, 0x00005800, 0x00000800, 0x00006000,
96 0x00003800, 0x00007800, 0x00002800, 0x00001800,
97 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
98 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
99 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
100 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
101 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
102 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
103 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
104 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
105 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
106 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
107 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
108 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
111 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
112 0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
113 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
114 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
115 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
116 0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
117 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
118 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
119 0x05280000, 0x05400000, 0x05080000, 0x05680000,
120 0x05500000, 0x05180000, 0x05200000, 0x05100000,
121 0x05700000, 0x05780000, 0x05600000, 0x05380000,
122 0x05300000, 0x05000000, 0x05480000, 0x05580000,
123 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
124 0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
125 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
126 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
127 0x00280000, 0x00400000, 0x00080000, 0x00680000,
128 0x00500000, 0x00180000, 0x00200000, 0x00100000,
129 0x00700000, 0x00780000, 0x00600000, 0x00380000,
130 0x00300000, 0x00000000, 0x00480000, 0x00580000,
131 0x04280000, 0x04400000, 0x04080000, 0x04680000,
132 0x04500000, 0x04180000, 0x04200000, 0x04100000,
133 0x04700000, 0x04780000, 0x04600000, 0x04380000,
134 0x04300000, 0x04000000, 0x04480000, 0x04580000,
135 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
136 0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
137 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
138 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
139 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
140 0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
141 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
142 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
143 0x07280000, 0x07400000, 0x07080000, 0x07680000,
144 0x07500000, 0x07180000, 0x07200000, 0x07100000,
145 0x07700000, 0x07780000, 0x07600000, 0x07380000,
146 0x07300000, 0x07000000, 0x07480000, 0x07580000,
147 0x02280000, 0x02400000, 0x02080000, 0x02680000,
148 0x02500000, 0x02180000, 0x02200000, 0x02100000,
149 0x02700000, 0x02780000, 0x02600000, 0x02380000,
150 0x02300000, 0x02000000, 0x02480000, 0x02580000,
151 0x03280000, 0x03400000, 0x03080000, 0x03680000,
152 0x03500000, 0x03180000, 0x03200000, 0x03100000,
153 0x03700000, 0x03780000, 0x03600000, 0x03380000,
154 0x03300000, 0x03000000, 0x03480000, 0x03580000,
155 0x06280000, 0x06400000, 0x06080000, 0x06680000,
156 0x06500000, 0x06180000, 0x06200000, 0x06100000,
157 0x06700000, 0x06780000, 0x06600000, 0x06380000,
158 0x06300000, 0x06000000, 0x06480000, 0x06580000,
159 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
160 0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
161 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
162 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
163 0x01280000, 0x01400000, 0x01080000, 0x01680000,
164 0x01500000, 0x01180000, 0x01200000, 0x01100000,
165 0x01700000, 0x01780000, 0x01600000, 0x01380000,
166 0x01300000, 0x01000000, 0x01480000, 0x01580000,
167 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
168 0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
169 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
170 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
171 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
172 0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
173 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
174 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
177 0x30000002, 0x60000002, 0x38000002, 0x08000002,
178 0x28000002, 0x78000002, 0x68000002, 0x40000002,
179 0x20000002, 0x50000002, 0x48000002, 0x70000002,
180 0x00000002, 0x18000002, 0x58000002, 0x10000002,
181 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
182 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
183 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
184 0x80000005, 0x98000005, 0xd8000005, 0x90000005,
185 0x30000005, 0x60000005, 0x38000005, 0x08000005,
186 0x28000005, 0x78000005, 0x68000005, 0x40000005,
187 0x20000005, 0x50000005, 0x48000005, 0x70000005,
188 0x00000005, 0x18000005, 0x58000005, 0x10000005,
189 0x30000000, 0x60000000, 0x38000000, 0x08000000,
190 0x28000000, 0x78000000, 0x68000000, 0x40000000,
191 0x20000000, 0x50000000, 0x48000000, 0x70000000,
192 0x00000000, 0x18000000, 0x58000000, 0x10000000,
193 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
194 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
195 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
196 0x80000003, 0x98000003, 0xd8000003, 0x90000003,
197 0x30000001, 0x60000001, 0x38000001, 0x08000001,
198 0x28000001, 0x78000001, 0x68000001, 0x40000001,
199 0x20000001, 0x50000001, 0x48000001, 0x70000001,
200 0x00000001, 0x18000001, 0x58000001, 0x10000001,
201 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
202 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
203 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
204 0x80000000, 0x98000000, 0xd8000000, 0x90000000,
205 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
206 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
207 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
208 0x80000006, 0x98000006, 0xd8000006, 0x90000006,
209 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
210 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
211 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
212 0x80000001, 0x98000001, 0xd8000001, 0x90000001,
213 0x30000003, 0x60000003, 0x38000003, 0x08000003,
214 0x28000003, 0x78000003, 0x68000003, 0x40000003,
215 0x20000003, 0x50000003, 0x48000003, 0x70000003,
216 0x00000003, 0x18000003, 0x58000003, 0x10000003,
217 0x30000004, 0x60000004, 0x38000004, 0x08000004,
218 0x28000004, 0x78000004, 0x68000004, 0x40000004,
219 0x20000004, 0x50000004, 0x48000004, 0x70000004,
220 0x00000004, 0x18000004, 0x58000004, 0x10000004,
221 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
222 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
223 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
224 0x80000002, 0x98000002, 0xd8000002, 0x90000002,
225 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
226 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
227 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
228 0x80000004, 0x98000004, 0xd8000004, 0x90000004,
229 0x30000006, 0x60000006, 0x38000006, 0x08000006,
230 0x28000006, 0x78000006, 0x68000006, 0x40000006,
231 0x20000006, 0x50000006, 0x48000006, 0x70000006,
232 0x00000006, 0x18000006, 0x58000006, 0x10000006,
233 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
234 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
235 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
236 0x80000007, 0x98000007, 0xd8000007, 0x90000007,
237 0x30000007, 0x60000007, 0x38000007, 0x08000007,
238 0x28000007, 0x78000007, 0x68000007, 0x40000007,
239 0x20000007, 0x50000007, 0x48000007, 0x70000007,
240 0x00000007, 0x18000007, 0x58000007, 0x10000007,
243 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
244 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
245 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
246 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
247 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
248 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
249 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
250 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
251 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
252 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
253 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
254 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
255 0x00000068, 0x00000058, 0x00000020, 0x00000008,
256 0x00000018, 0x00000078, 0x00000028, 0x00000048,
257 0x00000000, 0x00000050, 0x00000070, 0x00000038,
258 0x00000030, 0x00000040, 0x00000010, 0x00000060,
259 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
260 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
261 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
262 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
263 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
264 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
265 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
266 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
267 0x00000568, 0x00000558, 0x00000520, 0x00000508,
268 0x00000518, 0x00000578, 0x00000528, 0x00000548,
269 0x00000500, 0x00000550, 0x00000570, 0x00000538,
270 0x00000530, 0x00000540, 0x00000510, 0x00000560,
271 0x00000268, 0x00000258, 0x00000220, 0x00000208,
272 0x00000218, 0x00000278, 0x00000228, 0x00000248,
273 0x00000200, 0x00000250, 0x00000270, 0x00000238,
274 0x00000230, 0x00000240, 0x00000210, 0x00000260,
275 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
276 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
277 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
278 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
279 0x00000168, 0x00000158, 0x00000120, 0x00000108,
280 0x00000118, 0x00000178, 0x00000128, 0x00000148,
281 0x00000100, 0x00000150, 0x00000170, 0x00000138,
282 0x00000130, 0x00000140, 0x00000110, 0x00000160,
283 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
284 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
285 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
286 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
287 0x00000768, 0x00000758, 0x00000720, 0x00000708,
288 0x00000718, 0x00000778, 0x00000728, 0x00000748,
289 0x00000700, 0x00000750, 0x00000770, 0x00000738,
290 0x00000730, 0x00000740, 0x00000710, 0x00000760,
291 0x00000368, 0x00000358, 0x00000320, 0x00000308,
292 0x00000318, 0x00000378, 0x00000328, 0x00000348,
293 0x00000300, 0x00000350, 0x00000370, 0x00000338,
294 0x00000330, 0x00000340, 0x00000310, 0x00000360,
295 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
296 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
297 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
298 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
299 0x00000468, 0x00000458, 0x00000420, 0x00000408,
300 0x00000418, 0x00000478, 0x00000428, 0x00000448,
301 0x00000400, 0x00000450, 0x00000470, 0x00000438,
302 0x00000430, 0x00000440, 0x00000410, 0x00000460,
303 0x00000668, 0x00000658, 0x00000620, 0x00000608,
304 0x00000618, 0x00000678, 0x00000628, 0x00000648,
305 0x00000600, 0x00000650, 0x00000670, 0x00000638,
306 0x00000630, 0x00000640, 0x00000610, 0x00000660,
311 #define BOX(i,n,S) (u32x) ((S)[(n)][(i)])
315 #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
318 #define round(k1,k2,tbl) \
322 l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \
323 BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \
324 BOX (amd_bfe (t, 16, 8), 2, tbl) ^ \
325 BOX (amd_bfe (t, 24, 8), 3, tbl); \
327 r ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \
328 BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \
329 BOX (amd_bfe (t, 16, 8), 2, tbl) ^ \
330 BOX (amd_bfe (t, 24, 8), 3, tbl); \
333 #define R(k,h,s,i,t) \
339 round (k[0], k[1], t); \
340 round (k[2], k[3], t); \
341 round (k[4], k[5], t); \
342 round (k[6], k[7], t); \
343 round (k[0], k[1], t); \
344 round (k[2], k[3], t); \
345 round (k[4], k[5], t); \
346 round (k[6], k[7], t); \
347 round (k[0], k[1], t); \
348 round (k[2], k[3], t); \
349 round (k[4], k[5], t); \
350 round (k[6], k[7], t); \
351 round (k[7], k[6], t); \
352 round (k[5], k[4], t); \
353 round (k[3], k[2], t); \
354 round (k[1], k[0], t); \
360 w[0] = u[0] ^ v[0]; \
361 w[1] = u[1] ^ v[1]; \
362 w[2] = u[2] ^ v[2]; \
363 w[3] = u[3] ^ v[3]; \
364 w[4] = u[4] ^ v[4]; \
365 w[5] = u[5] ^ v[5]; \
366 w[6] = u[6] ^ v[6]; \
370 k[0] = ((w[0] & 0x000000ff) << 0) \
371 | ((w[2] & 0x000000ff) << 8) \
372 | ((w[4] & 0x000000ff) << 16) \
373 | ((w[6] & 0x000000ff) << 24); \
374 k[1] = ((w[0] & 0x0000ff00) >> 8) \
375 | ((w[2] & 0x0000ff00) >> 0) \
376 | ((w[4] & 0x0000ff00) << 8) \
377 | ((w[6] & 0x0000ff00) << 16); \
378 k[2] = ((w[0] & 0x00ff0000) >> 16) \
379 | ((w[2] & 0x00ff0000) >> 8) \
380 | ((w[4] & 0x00ff0000) << 0) \
381 | ((w[6] & 0x00ff0000) << 8); \
382 k[3] = ((w[0] & 0xff000000) >> 24) \
383 | ((w[2] & 0xff000000) >> 16) \
384 | ((w[4] & 0xff000000) >> 8) \
385 | ((w[6] & 0xff000000) >> 0); \
386 k[4] = ((w[1] & 0x000000ff) << 0) \
387 | ((w[3] & 0x000000ff) << 8) \
388 | ((w[5] & 0x000000ff) << 16) \
389 | ((w[7] & 0x000000ff) << 24); \
390 k[5] = ((w[1] & 0x0000ff00) >> 8) \
391 | ((w[3] & 0x0000ff00) >> 0) \
392 | ((w[5] & 0x0000ff00) << 8) \
393 | ((w[7] & 0x0000ff00) << 16); \
394 k[6] = ((w[1] & 0x00ff0000) >> 16) \
395 | ((w[3] & 0x00ff0000) >> 8) \
396 | ((w[5] & 0x00ff0000) << 0) \
397 | ((w[7] & 0x00ff0000) << 8); \
398 k[7] = ((w[1] & 0xff000000) >> 24) \
399 | ((w[3] & 0xff000000) >> 16) \
400 | ((w[5] & 0xff000000) >> 8) \
401 | ((w[7] & 0xff000000) >> 0);
438 x[0] ^= 0xff00ff00; \
439 x[1] ^= 0xff00ff00; \
440 x[2] ^= 0x00ff00ff; \
441 x[3] ^= 0x00ff00ff; \
442 x[4] ^= 0x00ffff00; \
443 x[5] ^= 0xff0000ff; \
444 x[6] ^= 0x000000ff; \
447 #define SHIFT12(u,m,s) \
448 u[0] = m[0] ^ s[6]; \
449 u[1] = m[1] ^ s[7]; \
450 u[2] = m[2] ^ (s[0] << 16) \
452 ^ (s[0] & 0x0000ffff) \
453 ^ (s[1] & 0x0000ffff) \
458 ^ (s[7] & 0xffff0000) \
460 u[3] = m[3] ^ (s[0] & 0x0000ffff) \
462 ^ (s[1] & 0x0000ffff) \
471 ^ (s[7] & 0x0000ffff) \
474 u[4] = m[4] ^ (s[0] & 0xffff0000) \
477 ^ (s[1] & 0xffff0000) \
486 ^ (s[7] & 0x0000ffff) \
489 u[5] = m[5] ^ (s[0] << 16) \
491 ^ (s[0] & 0xffff0000) \
492 ^ (s[1] & 0x0000ffff) \
502 ^ (s[7] & 0xffff0000) \
518 u[7] = m[7] ^ (s[0] & 0xffff0000) \
520 ^ (s[1] & 0x0000ffff) \
529 ^ (s[7] & 0x0000ffff) \
533 #define SHIFT16(h,v,u) \
534 v[0] = h[0] ^ (u[1] << 16) \
536 v[1] = h[1] ^ (u[2] << 16) \
538 v[2] = h[2] ^ (u[3] << 16) \
540 v[3] = h[3] ^ (u[4] << 16) \
542 v[4] = h[4] ^ (u[5] << 16) \
544 v[5] = h[5] ^ (u[6] << 16) \
546 v[6] = h[6] ^ (u[7] << 16) \
548 v[7] = h[7] ^ (u[0] & 0xffff0000) \
551 ^ (u[1] & 0xffff0000) \
554 ^ (u[7] & 0xffff0000);
556 #define SHIFT61(h,v) \
557 h[0] = (v[0] & 0xffff0000) \
561 ^ (v[1] & 0xffff0000) \
570 ^ (v[7] & 0x0000ffff); \
571 h[1] = (v[0] << 16) \
573 ^ (v[0] & 0xffff0000) \
574 ^ (v[1] & 0x0000ffff) \
582 ^ (v[7] & 0xffff0000) \
584 h[2] = (v[0] & 0x0000ffff) \
588 ^ (v[1] & 0xffff0000) \
596 ^ (v[7] & 0x0000ffff) \
599 h[3] = (v[0] << 16) \
601 ^ (v[0] & 0xffff0000) \
602 ^ (v[1] & 0xffff0000) \
612 ^ (v[7] & 0x0000ffff) \
614 h[4] = (v[0] >> 16) \
628 h[5] = (v[0] << 16) \
629 ^ (v[0] & 0xffff0000) \
632 ^ (v[1] & 0xffff0000) \
646 ^ (v[7] & 0xffff0000); \
678 #define PASS0(h,s,u,v,t) \
689 #define PASS2(h,s,u,v,t) \
701 #define PASS4(h,s,u,v,t) \
712 #define PASS6(h,s,u,v,t) \
721 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
727 const u32 lid = get_local_id (0);
733 const u32 gid = get_global_id (0);
737 wordl0[0] = pws[gid].i[ 0];
738 wordl0[1] = pws[gid].i[ 1];
739 wordl0[2] = pws[gid].i[ 2];
740 wordl0[3] = pws[gid].i[ 3];
744 wordl1[0] = pws[gid].i[ 4];
745 wordl1[1] = pws[gid].i[ 5];
746 wordl1[2] = pws[gid].i[ 6];
747 wordl1[3] = pws[gid].i[ 7];
763 const u32 pw_l_len = pws[gid].pw_len;
765 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
767 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
774 __local u32 s_tables[4][256];
776 const u32 lid4 = lid * 4;
778 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
779 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
780 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
781 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
783 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
784 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
785 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
786 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
788 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
789 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
790 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
791 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
793 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
794 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
795 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
796 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
798 barrier (CLK_LOCAL_MEM_FENCE);
800 if (gid >= gid_max) return;
806 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
808 const u32 pw_r_len = combs_buf[il_pos].pw_len;
810 const u32 pw_len = pw_l_len + pw_r_len;
814 wordr0[0] = combs_buf[il_pos].i[0];
815 wordr0[1] = combs_buf[il_pos].i[1];
816 wordr0[2] = combs_buf[il_pos].i[2];
817 wordr0[3] = combs_buf[il_pos].i[3];
821 wordr1[0] = combs_buf[il_pos].i[4];
822 wordr1[1] = combs_buf[il_pos].i[5];
823 wordr1[2] = combs_buf[il_pos].i[6];
824 wordr1[3] = combs_buf[il_pos].i[7];
840 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
842 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
847 w0[0] = wordl0[0] | wordr0[0];
848 w0[1] = wordl0[1] | wordr0[1];
849 w0[2] = wordl0[2] | wordr0[2];
850 w0[3] = wordl0[3] | wordr0[3];
854 w1[0] = wordl1[0] | wordr1[0];
855 w1[1] = wordl1[1] | wordr1[1];
856 w1[2] = wordl1[2] | wordr1[2];
857 w1[3] = wordl1[3] | wordr1[3];
861 w2[0] = wordl2[0] | wordr2[0];
862 w2[1] = wordl2[1] | wordr2[1];
863 w2[2] = wordl2[2] | wordr2[2];
864 w2[3] = wordl2[3] | wordr2[3];
868 w3[0] = wordl3[0] | wordr3[0];
869 w3[1] = wordl3[1] | wordr3[1];
873 const u32 w14 = pw_len * 8;
910 state_m[0] = state[0];
911 state_m[1] = state[1];
912 state_m[2] = state[2];
913 state_m[3] = state[3];
914 state_m[4] = state[4];
915 state_m[5] = state[5];
916 state_m[6] = state[6];
917 state_m[7] = state[7];
930 PASS0 (state, tmp, state_m, data_m, s_tables);
931 PASS2 (state, tmp, state_m, data_m, s_tables);
932 PASS4 (state, tmp, state_m, data_m, s_tables);
933 PASS6 (state, tmp, state_m, data_m, s_tables);
935 SHIFT12 (state_m, data, tmp);
936 SHIFT16 (state, data_m, state_m);
937 SHIFT61 (state, data_m);
950 state_m[0] = state[0];
951 state_m[1] = state[1];
952 state_m[2] = state[2];
953 state_m[3] = state[3];
954 state_m[4] = state[4];
955 state_m[5] = state[5];
956 state_m[6] = state[6];
957 state_m[7] = state[7];
968 PASS0 (state, tmp, state_m, data_m, s_tables);
969 PASS2 (state, tmp, state_m, data_m, s_tables);
970 PASS4 (state, tmp, state_m, data_m, s_tables);
971 PASS6 (state, tmp, state_m, data_m, s_tables);
973 SHIFT12 (state_m, data, tmp);
974 SHIFT16 (state, data_m, state_m);
975 SHIFT61 (state, data_m);
988 state_m[0] = state[0];
989 state_m[1] = state[1];
990 state_m[2] = state[2];
991 state_m[3] = state[3];
992 state_m[4] = state[4];
993 state_m[5] = state[5];
994 state_m[6] = state[6];
995 state_m[7] = state[7];
1000 data_m[3] = data[3];
1001 data_m[4] = data[4];
1002 data_m[5] = data[5];
1003 data_m[6] = data[6];
1004 data_m[7] = data[7];
1006 PASS0 (state, tmp, state_m, data_m, s_tables);
1007 PASS2 (state, tmp, state_m, data_m, s_tables);
1008 PASS4 (state, tmp, state_m, data_m, s_tables);
1009 PASS6 (state, tmp, state_m, data_m, s_tables);
1011 SHIFT12 (state_m, data, tmp);
1012 SHIFT16 (state, data_m, state_m);
1013 SHIFT61 (state, data_m);
1017 const u32x r0 = state[0];
1018 const u32x r1 = state[1];
1019 const u32x r2 = state[2];
1020 const u32x r3 = state[3];
1022 #include VECT_COMPARE_M
1026 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1030 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1034 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1040 const u32 lid = get_local_id (0);
1046 const u32 gid = get_global_id (0);
1050 wordl0[0] = pws[gid].i[ 0];
1051 wordl0[1] = pws[gid].i[ 1];
1052 wordl0[2] = pws[gid].i[ 2];
1053 wordl0[3] = pws[gid].i[ 3];
1057 wordl1[0] = pws[gid].i[ 4];
1058 wordl1[1] = pws[gid].i[ 5];
1059 wordl1[2] = pws[gid].i[ 6];
1060 wordl1[3] = pws[gid].i[ 7];
1076 const u32 pw_l_len = pws[gid].pw_len;
1078 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
1080 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
1087 __local u32 s_tables[4][256];
1089 const u32 lid4 = lid * 4;
1091 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
1092 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
1093 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
1094 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
1096 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
1097 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
1098 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
1099 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
1101 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
1102 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
1103 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
1104 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
1106 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
1107 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
1108 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
1109 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
1111 barrier (CLK_LOCAL_MEM_FENCE);
1113 if (gid >= gid_max) return;
1119 const u32 search[4] =
1121 digests_buf[digests_offset].digest_buf[DGST_R0],
1122 digests_buf[digests_offset].digest_buf[DGST_R1],
1123 digests_buf[digests_offset].digest_buf[DGST_R2],
1124 digests_buf[digests_offset].digest_buf[DGST_R3]
1131 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
1133 const u32 pw_r_len = combs_buf[il_pos].pw_len;
1135 const u32 pw_len = pw_l_len + pw_r_len;
1139 wordr0[0] = combs_buf[il_pos].i[0];
1140 wordr0[1] = combs_buf[il_pos].i[1];
1141 wordr0[2] = combs_buf[il_pos].i[2];
1142 wordr0[3] = combs_buf[il_pos].i[3];
1146 wordr1[0] = combs_buf[il_pos].i[4];
1147 wordr1[1] = combs_buf[il_pos].i[5];
1148 wordr1[2] = combs_buf[il_pos].i[6];
1149 wordr1[3] = combs_buf[il_pos].i[7];
1165 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
1167 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
1172 w0[0] = wordl0[0] | wordr0[0];
1173 w0[1] = wordl0[1] | wordr0[1];
1174 w0[2] = wordl0[2] | wordr0[2];
1175 w0[3] = wordl0[3] | wordr0[3];
1179 w1[0] = wordl1[0] | wordr1[0];
1180 w1[1] = wordl1[1] | wordr1[1];
1181 w1[2] = wordl1[2] | wordr1[2];
1182 w1[3] = wordl1[3] | wordr1[3];
1186 w2[0] = wordl2[0] | wordr2[0];
1187 w2[1] = wordl2[1] | wordr2[1];
1188 w2[2] = wordl2[2] | wordr2[2];
1189 w2[3] = wordl2[3] | wordr2[3];
1193 w3[0] = wordl3[0] | wordr3[0];
1194 w3[1] = wordl3[1] | wordr3[1];
1198 const u32 w14 = pw_len * 8;
1221 state[ 8] = data[0];
1222 state[ 9] = data[1];
1223 state[10] = data[2];
1224 state[11] = data[3];
1225 state[12] = data[4];
1226 state[13] = data[5];
1227 state[14] = data[6];
1228 state[15] = data[7];
1235 state_m[0] = state[0];
1236 state_m[1] = state[1];
1237 state_m[2] = state[2];
1238 state_m[3] = state[3];
1239 state_m[4] = state[4];
1240 state_m[5] = state[5];
1241 state_m[6] = state[6];
1242 state_m[7] = state[7];
1244 data_m[0] = data[0];
1245 data_m[1] = data[1];
1246 data_m[2] = data[2];
1247 data_m[3] = data[3];
1248 data_m[4] = data[4];
1249 data_m[5] = data[5];
1250 data_m[6] = data[6];
1251 data_m[7] = data[7];
1255 PASS0 (state, tmp, state_m, data_m, s_tables);
1256 PASS2 (state, tmp, state_m, data_m, s_tables);
1257 PASS4 (state, tmp, state_m, data_m, s_tables);
1258 PASS6 (state, tmp, state_m, data_m, s_tables);
1260 SHIFT12 (state_m, data, tmp);
1261 SHIFT16 (state, data_m, state_m);
1262 SHIFT61 (state, data_m);
1275 state_m[0] = state[0];
1276 state_m[1] = state[1];
1277 state_m[2] = state[2];
1278 state_m[3] = state[3];
1279 state_m[4] = state[4];
1280 state_m[5] = state[5];
1281 state_m[6] = state[6];
1282 state_m[7] = state[7];
1284 data_m[0] = data[0];
1285 data_m[1] = data[1];
1286 data_m[2] = data[2];
1287 data_m[3] = data[3];
1288 data_m[4] = data[4];
1289 data_m[5] = data[5];
1290 data_m[6] = data[6];
1291 data_m[7] = data[7];
1293 PASS0 (state, tmp, state_m, data_m, s_tables);
1294 PASS2 (state, tmp, state_m, data_m, s_tables);
1295 PASS4 (state, tmp, state_m, data_m, s_tables);
1296 PASS6 (state, tmp, state_m, data_m, s_tables);
1298 SHIFT12 (state_m, data, tmp);
1299 SHIFT16 (state, data_m, state_m);
1300 SHIFT61 (state, data_m);
1304 data[0] = state[ 8];
1305 data[1] = state[ 9];
1306 data[2] = state[10];
1307 data[3] = state[11];
1308 data[4] = state[12];
1309 data[5] = state[13];
1310 data[6] = state[14];
1311 data[7] = state[15];
1313 state_m[0] = state[0];
1314 state_m[1] = state[1];
1315 state_m[2] = state[2];
1316 state_m[3] = state[3];
1317 state_m[4] = state[4];
1318 state_m[5] = state[5];
1319 state_m[6] = state[6];
1320 state_m[7] = state[7];
1322 data_m[0] = data[0];
1323 data_m[1] = data[1];
1324 data_m[2] = data[2];
1325 data_m[3] = data[3];
1326 data_m[4] = data[4];
1327 data_m[5] = data[5];
1328 data_m[6] = data[6];
1329 data_m[7] = data[7];
1331 PASS0 (state, tmp, state_m, data_m, s_tables);
1332 PASS2 (state, tmp, state_m, data_m, s_tables);
1333 PASS4 (state, tmp, state_m, data_m, s_tables);
1334 PASS6 (state, tmp, state_m, data_m, s_tables);
1336 SHIFT12 (state_m, data, tmp);
1337 SHIFT16 (state, data_m, state_m);
1338 SHIFT61 (state, data_m);
1342 const u32x r0 = state[0];
1343 const u32x r1 = state[1];
1344 const u32x r2 = state[2];
1345 const u32x r3 = state[3];
1347 #include VECT_COMPARE_S
1351 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1355 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)