2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 __constant u32 c_tables[4][256] =
26 0x00072000, 0x00075000, 0x00074800, 0x00071000,
27 0x00076800, 0x00074000, 0x00070000, 0x00077000,
28 0x00073000, 0x00075800, 0x00070800, 0x00076000,
29 0x00073800, 0x00077800, 0x00072800, 0x00071800,
30 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
31 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
32 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
33 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
34 0x00022000, 0x00025000, 0x00024800, 0x00021000,
35 0x00026800, 0x00024000, 0x00020000, 0x00027000,
36 0x00023000, 0x00025800, 0x00020800, 0x00026000,
37 0x00023800, 0x00027800, 0x00022800, 0x00021800,
38 0x00062000, 0x00065000, 0x00064800, 0x00061000,
39 0x00066800, 0x00064000, 0x00060000, 0x00067000,
40 0x00063000, 0x00065800, 0x00060800, 0x00066000,
41 0x00063800, 0x00067800, 0x00062800, 0x00061800,
42 0x00032000, 0x00035000, 0x00034800, 0x00031000,
43 0x00036800, 0x00034000, 0x00030000, 0x00037000,
44 0x00033000, 0x00035800, 0x00030800, 0x00036000,
45 0x00033800, 0x00037800, 0x00032800, 0x00031800,
46 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
47 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
48 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
49 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
50 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
51 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
52 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
53 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
54 0x00052000, 0x00055000, 0x00054800, 0x00051000,
55 0x00056800, 0x00054000, 0x00050000, 0x00057000,
56 0x00053000, 0x00055800, 0x00050800, 0x00056000,
57 0x00053800, 0x00057800, 0x00052800, 0x00051800,
58 0x00012000, 0x00015000, 0x00014800, 0x00011000,
59 0x00016800, 0x00014000, 0x00010000, 0x00017000,
60 0x00013000, 0x00015800, 0x00010800, 0x00016000,
61 0x00013800, 0x00017800, 0x00012800, 0x00011800,
62 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
63 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
64 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
65 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
66 0x00042000, 0x00045000, 0x00044800, 0x00041000,
67 0x00046800, 0x00044000, 0x00040000, 0x00047000,
68 0x00043000, 0x00045800, 0x00040800, 0x00046000,
69 0x00043800, 0x00047800, 0x00042800, 0x00041800,
70 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
71 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
72 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
73 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
74 0x00002000, 0x00005000, 0x00004800, 0x00001000,
75 0x00006800, 0x00004000, 0x00000000, 0x00007000,
76 0x00003000, 0x00005800, 0x00000800, 0x00006000,
77 0x00003800, 0x00007800, 0x00002800, 0x00001800,
78 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
79 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
80 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
81 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
82 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
83 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
84 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
85 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
86 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
87 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
88 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
89 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
92 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
93 0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
94 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
95 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
96 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
97 0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
98 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
99 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
100 0x05280000, 0x05400000, 0x05080000, 0x05680000,
101 0x05500000, 0x05180000, 0x05200000, 0x05100000,
102 0x05700000, 0x05780000, 0x05600000, 0x05380000,
103 0x05300000, 0x05000000, 0x05480000, 0x05580000,
104 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
105 0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
106 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
107 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
108 0x00280000, 0x00400000, 0x00080000, 0x00680000,
109 0x00500000, 0x00180000, 0x00200000, 0x00100000,
110 0x00700000, 0x00780000, 0x00600000, 0x00380000,
111 0x00300000, 0x00000000, 0x00480000, 0x00580000,
112 0x04280000, 0x04400000, 0x04080000, 0x04680000,
113 0x04500000, 0x04180000, 0x04200000, 0x04100000,
114 0x04700000, 0x04780000, 0x04600000, 0x04380000,
115 0x04300000, 0x04000000, 0x04480000, 0x04580000,
116 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
117 0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
118 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
119 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
120 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
121 0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
122 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
123 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
124 0x07280000, 0x07400000, 0x07080000, 0x07680000,
125 0x07500000, 0x07180000, 0x07200000, 0x07100000,
126 0x07700000, 0x07780000, 0x07600000, 0x07380000,
127 0x07300000, 0x07000000, 0x07480000, 0x07580000,
128 0x02280000, 0x02400000, 0x02080000, 0x02680000,
129 0x02500000, 0x02180000, 0x02200000, 0x02100000,
130 0x02700000, 0x02780000, 0x02600000, 0x02380000,
131 0x02300000, 0x02000000, 0x02480000, 0x02580000,
132 0x03280000, 0x03400000, 0x03080000, 0x03680000,
133 0x03500000, 0x03180000, 0x03200000, 0x03100000,
134 0x03700000, 0x03780000, 0x03600000, 0x03380000,
135 0x03300000, 0x03000000, 0x03480000, 0x03580000,
136 0x06280000, 0x06400000, 0x06080000, 0x06680000,
137 0x06500000, 0x06180000, 0x06200000, 0x06100000,
138 0x06700000, 0x06780000, 0x06600000, 0x06380000,
139 0x06300000, 0x06000000, 0x06480000, 0x06580000,
140 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
141 0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
142 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
143 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
144 0x01280000, 0x01400000, 0x01080000, 0x01680000,
145 0x01500000, 0x01180000, 0x01200000, 0x01100000,
146 0x01700000, 0x01780000, 0x01600000, 0x01380000,
147 0x01300000, 0x01000000, 0x01480000, 0x01580000,
148 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
149 0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
150 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
151 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
152 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
153 0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
154 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
155 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
158 0x30000002, 0x60000002, 0x38000002, 0x08000002,
159 0x28000002, 0x78000002, 0x68000002, 0x40000002,
160 0x20000002, 0x50000002, 0x48000002, 0x70000002,
161 0x00000002, 0x18000002, 0x58000002, 0x10000002,
162 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
163 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
164 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
165 0x80000005, 0x98000005, 0xd8000005, 0x90000005,
166 0x30000005, 0x60000005, 0x38000005, 0x08000005,
167 0x28000005, 0x78000005, 0x68000005, 0x40000005,
168 0x20000005, 0x50000005, 0x48000005, 0x70000005,
169 0x00000005, 0x18000005, 0x58000005, 0x10000005,
170 0x30000000, 0x60000000, 0x38000000, 0x08000000,
171 0x28000000, 0x78000000, 0x68000000, 0x40000000,
172 0x20000000, 0x50000000, 0x48000000, 0x70000000,
173 0x00000000, 0x18000000, 0x58000000, 0x10000000,
174 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
175 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
176 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
177 0x80000003, 0x98000003, 0xd8000003, 0x90000003,
178 0x30000001, 0x60000001, 0x38000001, 0x08000001,
179 0x28000001, 0x78000001, 0x68000001, 0x40000001,
180 0x20000001, 0x50000001, 0x48000001, 0x70000001,
181 0x00000001, 0x18000001, 0x58000001, 0x10000001,
182 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
183 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
184 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
185 0x80000000, 0x98000000, 0xd8000000, 0x90000000,
186 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
187 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
188 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
189 0x80000006, 0x98000006, 0xd8000006, 0x90000006,
190 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
191 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
192 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
193 0x80000001, 0x98000001, 0xd8000001, 0x90000001,
194 0x30000003, 0x60000003, 0x38000003, 0x08000003,
195 0x28000003, 0x78000003, 0x68000003, 0x40000003,
196 0x20000003, 0x50000003, 0x48000003, 0x70000003,
197 0x00000003, 0x18000003, 0x58000003, 0x10000003,
198 0x30000004, 0x60000004, 0x38000004, 0x08000004,
199 0x28000004, 0x78000004, 0x68000004, 0x40000004,
200 0x20000004, 0x50000004, 0x48000004, 0x70000004,
201 0x00000004, 0x18000004, 0x58000004, 0x10000004,
202 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
203 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
204 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
205 0x80000002, 0x98000002, 0xd8000002, 0x90000002,
206 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
207 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
208 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
209 0x80000004, 0x98000004, 0xd8000004, 0x90000004,
210 0x30000006, 0x60000006, 0x38000006, 0x08000006,
211 0x28000006, 0x78000006, 0x68000006, 0x40000006,
212 0x20000006, 0x50000006, 0x48000006, 0x70000006,
213 0x00000006, 0x18000006, 0x58000006, 0x10000006,
214 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
215 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
216 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
217 0x80000007, 0x98000007, 0xd8000007, 0x90000007,
218 0x30000007, 0x60000007, 0x38000007, 0x08000007,
219 0x28000007, 0x78000007, 0x68000007, 0x40000007,
220 0x20000007, 0x50000007, 0x48000007, 0x70000007,
221 0x00000007, 0x18000007, 0x58000007, 0x10000007,
224 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
225 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
226 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
227 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
228 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
229 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
230 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
231 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
232 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
233 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
234 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
235 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
236 0x00000068, 0x00000058, 0x00000020, 0x00000008,
237 0x00000018, 0x00000078, 0x00000028, 0x00000048,
238 0x00000000, 0x00000050, 0x00000070, 0x00000038,
239 0x00000030, 0x00000040, 0x00000010, 0x00000060,
240 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
241 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
242 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
243 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
244 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
245 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
246 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
247 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
248 0x00000568, 0x00000558, 0x00000520, 0x00000508,
249 0x00000518, 0x00000578, 0x00000528, 0x00000548,
250 0x00000500, 0x00000550, 0x00000570, 0x00000538,
251 0x00000530, 0x00000540, 0x00000510, 0x00000560,
252 0x00000268, 0x00000258, 0x00000220, 0x00000208,
253 0x00000218, 0x00000278, 0x00000228, 0x00000248,
254 0x00000200, 0x00000250, 0x00000270, 0x00000238,
255 0x00000230, 0x00000240, 0x00000210, 0x00000260,
256 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
257 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
258 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
259 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
260 0x00000168, 0x00000158, 0x00000120, 0x00000108,
261 0x00000118, 0x00000178, 0x00000128, 0x00000148,
262 0x00000100, 0x00000150, 0x00000170, 0x00000138,
263 0x00000130, 0x00000140, 0x00000110, 0x00000160,
264 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
265 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
266 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
267 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
268 0x00000768, 0x00000758, 0x00000720, 0x00000708,
269 0x00000718, 0x00000778, 0x00000728, 0x00000748,
270 0x00000700, 0x00000750, 0x00000770, 0x00000738,
271 0x00000730, 0x00000740, 0x00000710, 0x00000760,
272 0x00000368, 0x00000358, 0x00000320, 0x00000308,
273 0x00000318, 0x00000378, 0x00000328, 0x00000348,
274 0x00000300, 0x00000350, 0x00000370, 0x00000338,
275 0x00000330, 0x00000340, 0x00000310, 0x00000360,
276 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
277 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
278 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
279 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
280 0x00000468, 0x00000458, 0x00000420, 0x00000408,
281 0x00000418, 0x00000478, 0x00000428, 0x00000448,
282 0x00000400, 0x00000450, 0x00000470, 0x00000438,
283 0x00000430, 0x00000440, 0x00000410, 0x00000460,
284 0x00000668, 0x00000658, 0x00000620, 0x00000608,
285 0x00000618, 0x00000678, 0x00000628, 0x00000648,
286 0x00000600, 0x00000650, 0x00000670, 0x00000638,
287 0x00000630, 0x00000640, 0x00000610, 0x00000660,
292 #define BOX(i,n,S) (u32x) ((S)[(n)][(i)])
296 #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
299 #define round(k1,k2,tbl) \
303 l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \
304 BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \
305 BOX (amd_bfe (t, 16, 8), 2, tbl) ^ \
306 BOX (amd_bfe (t, 24, 8), 3, tbl); \
308 r ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \
309 BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \
310 BOX (amd_bfe (t, 16, 8), 2, tbl) ^ \
311 BOX (amd_bfe (t, 24, 8), 3, tbl); \
314 #define R(k,h,s,i,t) \
320 round (k[0], k[1], t); \
321 round (k[2], k[3], t); \
322 round (k[4], k[5], t); \
323 round (k[6], k[7], t); \
324 round (k[0], k[1], t); \
325 round (k[2], k[3], t); \
326 round (k[4], k[5], t); \
327 round (k[6], k[7], t); \
328 round (k[0], k[1], t); \
329 round (k[2], k[3], t); \
330 round (k[4], k[5], t); \
331 round (k[6], k[7], t); \
332 round (k[7], k[6], t); \
333 round (k[5], k[4], t); \
334 round (k[3], k[2], t); \
335 round (k[1], k[0], t); \
341 w[0] = u[0] ^ v[0]; \
342 w[1] = u[1] ^ v[1]; \
343 w[2] = u[2] ^ v[2]; \
344 w[3] = u[3] ^ v[3]; \
345 w[4] = u[4] ^ v[4]; \
346 w[5] = u[5] ^ v[5]; \
347 w[6] = u[6] ^ v[6]; \
351 k[0] = ((w[0] & 0x000000ff) << 0) \
352 | ((w[2] & 0x000000ff) << 8) \
353 | ((w[4] & 0x000000ff) << 16) \
354 | ((w[6] & 0x000000ff) << 24); \
355 k[1] = ((w[0] & 0x0000ff00) >> 8) \
356 | ((w[2] & 0x0000ff00) >> 0) \
357 | ((w[4] & 0x0000ff00) << 8) \
358 | ((w[6] & 0x0000ff00) << 16); \
359 k[2] = ((w[0] & 0x00ff0000) >> 16) \
360 | ((w[2] & 0x00ff0000) >> 8) \
361 | ((w[4] & 0x00ff0000) << 0) \
362 | ((w[6] & 0x00ff0000) << 8); \
363 k[3] = ((w[0] & 0xff000000) >> 24) \
364 | ((w[2] & 0xff000000) >> 16) \
365 | ((w[4] & 0xff000000) >> 8) \
366 | ((w[6] & 0xff000000) >> 0); \
367 k[4] = ((w[1] & 0x000000ff) << 0) \
368 | ((w[3] & 0x000000ff) << 8) \
369 | ((w[5] & 0x000000ff) << 16) \
370 | ((w[7] & 0x000000ff) << 24); \
371 k[5] = ((w[1] & 0x0000ff00) >> 8) \
372 | ((w[3] & 0x0000ff00) >> 0) \
373 | ((w[5] & 0x0000ff00) << 8) \
374 | ((w[7] & 0x0000ff00) << 16); \
375 k[6] = ((w[1] & 0x00ff0000) >> 16) \
376 | ((w[3] & 0x00ff0000) >> 8) \
377 | ((w[5] & 0x00ff0000) << 0) \
378 | ((w[7] & 0x00ff0000) << 8); \
379 k[7] = ((w[1] & 0xff000000) >> 24) \
380 | ((w[3] & 0xff000000) >> 16) \
381 | ((w[5] & 0xff000000) >> 8) \
382 | ((w[7] & 0xff000000) >> 0);
419 x[0] ^= 0xff00ff00; \
420 x[1] ^= 0xff00ff00; \
421 x[2] ^= 0x00ff00ff; \
422 x[3] ^= 0x00ff00ff; \
423 x[4] ^= 0x00ffff00; \
424 x[5] ^= 0xff0000ff; \
425 x[6] ^= 0x000000ff; \
428 #define SHIFT12(u,m,s) \
429 u[0] = m[0] ^ s[6]; \
430 u[1] = m[1] ^ s[7]; \
431 u[2] = m[2] ^ (s[0] << 16) \
433 ^ (s[0] & 0x0000ffff) \
434 ^ (s[1] & 0x0000ffff) \
439 ^ (s[7] & 0xffff0000) \
441 u[3] = m[3] ^ (s[0] & 0x0000ffff) \
443 ^ (s[1] & 0x0000ffff) \
452 ^ (s[7] & 0x0000ffff) \
455 u[4] = m[4] ^ (s[0] & 0xffff0000) \
458 ^ (s[1] & 0xffff0000) \
467 ^ (s[7] & 0x0000ffff) \
470 u[5] = m[5] ^ (s[0] << 16) \
472 ^ (s[0] & 0xffff0000) \
473 ^ (s[1] & 0x0000ffff) \
483 ^ (s[7] & 0xffff0000) \
499 u[7] = m[7] ^ (s[0] & 0xffff0000) \
501 ^ (s[1] & 0x0000ffff) \
510 ^ (s[7] & 0x0000ffff) \
514 #define SHIFT16(h,v,u) \
515 v[0] = h[0] ^ (u[1] << 16) \
517 v[1] = h[1] ^ (u[2] << 16) \
519 v[2] = h[2] ^ (u[3] << 16) \
521 v[3] = h[3] ^ (u[4] << 16) \
523 v[4] = h[4] ^ (u[5] << 16) \
525 v[5] = h[5] ^ (u[6] << 16) \
527 v[6] = h[6] ^ (u[7] << 16) \
529 v[7] = h[7] ^ (u[0] & 0xffff0000) \
532 ^ (u[1] & 0xffff0000) \
535 ^ (u[7] & 0xffff0000);
537 #define SHIFT61(h,v) \
538 h[0] = (v[0] & 0xffff0000) \
542 ^ (v[1] & 0xffff0000) \
551 ^ (v[7] & 0x0000ffff); \
552 h[1] = (v[0] << 16) \
554 ^ (v[0] & 0xffff0000) \
555 ^ (v[1] & 0x0000ffff) \
563 ^ (v[7] & 0xffff0000) \
565 h[2] = (v[0] & 0x0000ffff) \
569 ^ (v[1] & 0xffff0000) \
577 ^ (v[7] & 0x0000ffff) \
580 h[3] = (v[0] << 16) \
582 ^ (v[0] & 0xffff0000) \
583 ^ (v[1] & 0xffff0000) \
593 ^ (v[7] & 0x0000ffff) \
595 h[4] = (v[0] >> 16) \
609 h[5] = (v[0] << 16) \
610 ^ (v[0] & 0xffff0000) \
613 ^ (v[1] & 0xffff0000) \
627 ^ (v[7] & 0xffff0000); \
659 #define PASS0(h,s,u,v,t) \
670 #define PASS2(h,s,u,v,t) \
682 #define PASS4(h,s,u,v,t) \
693 #define PASS6(h,s,u,v,t) \
702 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
708 const u32 lid = get_local_id (0);
714 const u32 gid = get_global_id (0);
718 wordl0[0] = pws[gid].i[ 0];
719 wordl0[1] = pws[gid].i[ 1];
720 wordl0[2] = pws[gid].i[ 2];
721 wordl0[3] = pws[gid].i[ 3];
725 wordl1[0] = pws[gid].i[ 4];
726 wordl1[1] = pws[gid].i[ 5];
727 wordl1[2] = pws[gid].i[ 6];
728 wordl1[3] = pws[gid].i[ 7];
744 const u32 pw_l_len = pws[gid].pw_len;
746 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
748 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
755 __local u32 s_tables[4][256];
757 const u32 lid4 = lid * 4;
759 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
760 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
761 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
762 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
764 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
765 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
766 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
767 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
769 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
770 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
771 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
772 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
774 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
775 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
776 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
777 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
779 barrier (CLK_LOCAL_MEM_FENCE);
781 if (gid >= gid_max) return;
787 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
789 const u32 pw_r_len = combs_buf[il_pos].pw_len;
791 const u32 pw_len = pw_l_len + pw_r_len;
795 wordr0[0] = combs_buf[il_pos].i[0];
796 wordr0[1] = combs_buf[il_pos].i[1];
797 wordr0[2] = combs_buf[il_pos].i[2];
798 wordr0[3] = combs_buf[il_pos].i[3];
802 wordr1[0] = combs_buf[il_pos].i[4];
803 wordr1[1] = combs_buf[il_pos].i[5];
804 wordr1[2] = combs_buf[il_pos].i[6];
805 wordr1[3] = combs_buf[il_pos].i[7];
821 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
823 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
828 w0[0] = wordl0[0] | wordr0[0];
829 w0[1] = wordl0[1] | wordr0[1];
830 w0[2] = wordl0[2] | wordr0[2];
831 w0[3] = wordl0[3] | wordr0[3];
835 w1[0] = wordl1[0] | wordr1[0];
836 w1[1] = wordl1[1] | wordr1[1];
837 w1[2] = wordl1[2] | wordr1[2];
838 w1[3] = wordl1[3] | wordr1[3];
842 w2[0] = wordl2[0] | wordr2[0];
843 w2[1] = wordl2[1] | wordr2[1];
844 w2[2] = wordl2[2] | wordr2[2];
845 w2[3] = wordl2[3] | wordr2[3];
849 w3[0] = wordl3[0] | wordr3[0];
850 w3[1] = wordl3[1] | wordr3[1];
854 const u32 w14 = pw_len * 8;
891 state_m[0] = state[0];
892 state_m[1] = state[1];
893 state_m[2] = state[2];
894 state_m[3] = state[3];
895 state_m[4] = state[4];
896 state_m[5] = state[5];
897 state_m[6] = state[6];
898 state_m[7] = state[7];
911 PASS0 (state, tmp, state_m, data_m, s_tables);
912 PASS2 (state, tmp, state_m, data_m, s_tables);
913 PASS4 (state, tmp, state_m, data_m, s_tables);
914 PASS6 (state, tmp, state_m, data_m, s_tables);
916 SHIFT12 (state_m, data, tmp);
917 SHIFT16 (state, data_m, state_m);
918 SHIFT61 (state, data_m);
931 state_m[0] = state[0];
932 state_m[1] = state[1];
933 state_m[2] = state[2];
934 state_m[3] = state[3];
935 state_m[4] = state[4];
936 state_m[5] = state[5];
937 state_m[6] = state[6];
938 state_m[7] = state[7];
949 PASS0 (state, tmp, state_m, data_m, s_tables);
950 PASS2 (state, tmp, state_m, data_m, s_tables);
951 PASS4 (state, tmp, state_m, data_m, s_tables);
952 PASS6 (state, tmp, state_m, data_m, s_tables);
954 SHIFT12 (state_m, data, tmp);
955 SHIFT16 (state, data_m, state_m);
956 SHIFT61 (state, data_m);
969 state_m[0] = state[0];
970 state_m[1] = state[1];
971 state_m[2] = state[2];
972 state_m[3] = state[3];
973 state_m[4] = state[4];
974 state_m[5] = state[5];
975 state_m[6] = state[6];
976 state_m[7] = state[7];
987 PASS0 (state, tmp, state_m, data_m, s_tables);
988 PASS2 (state, tmp, state_m, data_m, s_tables);
989 PASS4 (state, tmp, state_m, data_m, s_tables);
990 PASS6 (state, tmp, state_m, data_m, s_tables);
992 SHIFT12 (state_m, data, tmp);
993 SHIFT16 (state, data_m, state_m);
994 SHIFT61 (state, data_m);
998 const u32 r0 = state[0];
999 const u32 r1 = state[1];
1000 const u32 r2 = state[2];
1001 const u32 r3 = state[3];
1007 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1011 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1015 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1021 const u32 lid = get_local_id (0);
1027 const u32 gid = get_global_id (0);
1031 wordl0[0] = pws[gid].i[ 0];
1032 wordl0[1] = pws[gid].i[ 1];
1033 wordl0[2] = pws[gid].i[ 2];
1034 wordl0[3] = pws[gid].i[ 3];
1038 wordl1[0] = pws[gid].i[ 4];
1039 wordl1[1] = pws[gid].i[ 5];
1040 wordl1[2] = pws[gid].i[ 6];
1041 wordl1[3] = pws[gid].i[ 7];
1057 const u32 pw_l_len = pws[gid].pw_len;
1059 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
1061 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
1068 __local u32 s_tables[4][256];
1070 const u32 lid4 = lid * 4;
1072 s_tables[0][lid4 + 0] = c_tables[0][lid4 + 0];
1073 s_tables[0][lid4 + 1] = c_tables[0][lid4 + 1];
1074 s_tables[0][lid4 + 2] = c_tables[0][lid4 + 2];
1075 s_tables[0][lid4 + 3] = c_tables[0][lid4 + 3];
1077 s_tables[1][lid4 + 0] = c_tables[1][lid4 + 0];
1078 s_tables[1][lid4 + 1] = c_tables[1][lid4 + 1];
1079 s_tables[1][lid4 + 2] = c_tables[1][lid4 + 2];
1080 s_tables[1][lid4 + 3] = c_tables[1][lid4 + 3];
1082 s_tables[2][lid4 + 0] = c_tables[2][lid4 + 0];
1083 s_tables[2][lid4 + 1] = c_tables[2][lid4 + 1];
1084 s_tables[2][lid4 + 2] = c_tables[2][lid4 + 2];
1085 s_tables[2][lid4 + 3] = c_tables[2][lid4 + 3];
1087 s_tables[3][lid4 + 0] = c_tables[3][lid4 + 0];
1088 s_tables[3][lid4 + 1] = c_tables[3][lid4 + 1];
1089 s_tables[3][lid4 + 2] = c_tables[3][lid4 + 2];
1090 s_tables[3][lid4 + 3] = c_tables[3][lid4 + 3];
1092 barrier (CLK_LOCAL_MEM_FENCE);
1094 if (gid >= gid_max) return;
1100 const u32 search[4] =
1102 digests_buf[digests_offset].digest_buf[DGST_R0],
1103 digests_buf[digests_offset].digest_buf[DGST_R1],
1104 digests_buf[digests_offset].digest_buf[DGST_R2],
1105 digests_buf[digests_offset].digest_buf[DGST_R3]
1112 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
1114 const u32 pw_r_len = combs_buf[il_pos].pw_len;
1116 const u32 pw_len = pw_l_len + pw_r_len;
1120 wordr0[0] = combs_buf[il_pos].i[0];
1121 wordr0[1] = combs_buf[il_pos].i[1];
1122 wordr0[2] = combs_buf[il_pos].i[2];
1123 wordr0[3] = combs_buf[il_pos].i[3];
1127 wordr1[0] = combs_buf[il_pos].i[4];
1128 wordr1[1] = combs_buf[il_pos].i[5];
1129 wordr1[2] = combs_buf[il_pos].i[6];
1130 wordr1[3] = combs_buf[il_pos].i[7];
1146 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
1148 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
1153 w0[0] = wordl0[0] | wordr0[0];
1154 w0[1] = wordl0[1] | wordr0[1];
1155 w0[2] = wordl0[2] | wordr0[2];
1156 w0[3] = wordl0[3] | wordr0[3];
1160 w1[0] = wordl1[0] | wordr1[0];
1161 w1[1] = wordl1[1] | wordr1[1];
1162 w1[2] = wordl1[2] | wordr1[2];
1163 w1[3] = wordl1[3] | wordr1[3];
1167 w2[0] = wordl2[0] | wordr2[0];
1168 w2[1] = wordl2[1] | wordr2[1];
1169 w2[2] = wordl2[2] | wordr2[2];
1170 w2[3] = wordl2[3] | wordr2[3];
1174 w3[0] = wordl3[0] | wordr3[0];
1175 w3[1] = wordl3[1] | wordr3[1];
1179 const u32 w14 = pw_len * 8;
1202 state[ 8] = data[0];
1203 state[ 9] = data[1];
1204 state[10] = data[2];
1205 state[11] = data[3];
1206 state[12] = data[4];
1207 state[13] = data[5];
1208 state[14] = data[6];
1209 state[15] = data[7];
1216 state_m[0] = state[0];
1217 state_m[1] = state[1];
1218 state_m[2] = state[2];
1219 state_m[3] = state[3];
1220 state_m[4] = state[4];
1221 state_m[5] = state[5];
1222 state_m[6] = state[6];
1223 state_m[7] = state[7];
1225 data_m[0] = data[0];
1226 data_m[1] = data[1];
1227 data_m[2] = data[2];
1228 data_m[3] = data[3];
1229 data_m[4] = data[4];
1230 data_m[5] = data[5];
1231 data_m[6] = data[6];
1232 data_m[7] = data[7];
1236 PASS0 (state, tmp, state_m, data_m, s_tables);
1237 PASS2 (state, tmp, state_m, data_m, s_tables);
1238 PASS4 (state, tmp, state_m, data_m, s_tables);
1239 PASS6 (state, tmp, state_m, data_m, s_tables);
1241 SHIFT12 (state_m, data, tmp);
1242 SHIFT16 (state, data_m, state_m);
1243 SHIFT61 (state, data_m);
1256 state_m[0] = state[0];
1257 state_m[1] = state[1];
1258 state_m[2] = state[2];
1259 state_m[3] = state[3];
1260 state_m[4] = state[4];
1261 state_m[5] = state[5];
1262 state_m[6] = state[6];
1263 state_m[7] = state[7];
1265 data_m[0] = data[0];
1266 data_m[1] = data[1];
1267 data_m[2] = data[2];
1268 data_m[3] = data[3];
1269 data_m[4] = data[4];
1270 data_m[5] = data[5];
1271 data_m[6] = data[6];
1272 data_m[7] = data[7];
1274 PASS0 (state, tmp, state_m, data_m, s_tables);
1275 PASS2 (state, tmp, state_m, data_m, s_tables);
1276 PASS4 (state, tmp, state_m, data_m, s_tables);
1277 PASS6 (state, tmp, state_m, data_m, s_tables);
1279 SHIFT12 (state_m, data, tmp);
1280 SHIFT16 (state, data_m, state_m);
1281 SHIFT61 (state, data_m);
1285 data[0] = state[ 8];
1286 data[1] = state[ 9];
1287 data[2] = state[10];
1288 data[3] = state[11];
1289 data[4] = state[12];
1290 data[5] = state[13];
1291 data[6] = state[14];
1292 data[7] = state[15];
1294 state_m[0] = state[0];
1295 state_m[1] = state[1];
1296 state_m[2] = state[2];
1297 state_m[3] = state[3];
1298 state_m[4] = state[4];
1299 state_m[5] = state[5];
1300 state_m[6] = state[6];
1301 state_m[7] = state[7];
1303 data_m[0] = data[0];
1304 data_m[1] = data[1];
1305 data_m[2] = data[2];
1306 data_m[3] = data[3];
1307 data_m[4] = data[4];
1308 data_m[5] = data[5];
1309 data_m[6] = data[6];
1310 data_m[7] = data[7];
1312 PASS0 (state, tmp, state_m, data_m, s_tables);
1313 PASS2 (state, tmp, state_m, data_m, s_tables);
1314 PASS4 (state, tmp, state_m, data_m, s_tables);
1315 PASS6 (state, tmp, state_m, data_m, s_tables);
1317 SHIFT12 (state_m, data, tmp);
1318 SHIFT16 (state, data_m, state_m);
1319 SHIFT61 (state, data_m);
1323 const u32 r0 = state[0];
1324 const u32 r1 = state[1];
1325 const u32 r2 = state[2];
1326 const u32 r3 = state[3];
1332 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1336 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)