2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "OpenCL/types_ocl.c"
18 #include "OpenCL/common.c"
20 #define COMPARE_S "OpenCL/check_single_comp4.c"
21 #define COMPARE_M "OpenCL/check_multi_comp4.c"
23 __constant u32 c_tables[4][256] =
26 0x00072000, 0x00075000, 0x00074800, 0x00071000,
27 0x00076800, 0x00074000, 0x00070000, 0x00077000,
28 0x00073000, 0x00075800, 0x00070800, 0x00076000,
29 0x00073800, 0x00077800, 0x00072800, 0x00071800,
30 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
31 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
32 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
33 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
34 0x00022000, 0x00025000, 0x00024800, 0x00021000,
35 0x00026800, 0x00024000, 0x00020000, 0x00027000,
36 0x00023000, 0x00025800, 0x00020800, 0x00026000,
37 0x00023800, 0x00027800, 0x00022800, 0x00021800,
38 0x00062000, 0x00065000, 0x00064800, 0x00061000,
39 0x00066800, 0x00064000, 0x00060000, 0x00067000,
40 0x00063000, 0x00065800, 0x00060800, 0x00066000,
41 0x00063800, 0x00067800, 0x00062800, 0x00061800,
42 0x00032000, 0x00035000, 0x00034800, 0x00031000,
43 0x00036800, 0x00034000, 0x00030000, 0x00037000,
44 0x00033000, 0x00035800, 0x00030800, 0x00036000,
45 0x00033800, 0x00037800, 0x00032800, 0x00031800,
46 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
47 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
48 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
49 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
50 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
51 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
52 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
53 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
54 0x00052000, 0x00055000, 0x00054800, 0x00051000,
55 0x00056800, 0x00054000, 0x00050000, 0x00057000,
56 0x00053000, 0x00055800, 0x00050800, 0x00056000,
57 0x00053800, 0x00057800, 0x00052800, 0x00051800,
58 0x00012000, 0x00015000, 0x00014800, 0x00011000,
59 0x00016800, 0x00014000, 0x00010000, 0x00017000,
60 0x00013000, 0x00015800, 0x00010800, 0x00016000,
61 0x00013800, 0x00017800, 0x00012800, 0x00011800,
62 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
63 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
64 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
65 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
66 0x00042000, 0x00045000, 0x00044800, 0x00041000,
67 0x00046800, 0x00044000, 0x00040000, 0x00047000,
68 0x00043000, 0x00045800, 0x00040800, 0x00046000,
69 0x00043800, 0x00047800, 0x00042800, 0x00041800,
70 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
71 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
72 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
73 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
74 0x00002000, 0x00005000, 0x00004800, 0x00001000,
75 0x00006800, 0x00004000, 0x00000000, 0x00007000,
76 0x00003000, 0x00005800, 0x00000800, 0x00006000,
77 0x00003800, 0x00007800, 0x00002800, 0x00001800,
78 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
79 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
80 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
81 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
82 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
83 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
84 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
85 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
86 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
87 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
88 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
89 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
92 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
93 0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
94 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
95 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
96 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
97 0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
98 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
99 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
100 0x05280000, 0x05400000, 0x05080000, 0x05680000,
101 0x05500000, 0x05180000, 0x05200000, 0x05100000,
102 0x05700000, 0x05780000, 0x05600000, 0x05380000,
103 0x05300000, 0x05000000, 0x05480000, 0x05580000,
104 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
105 0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
106 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
107 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
108 0x00280000, 0x00400000, 0x00080000, 0x00680000,
109 0x00500000, 0x00180000, 0x00200000, 0x00100000,
110 0x00700000, 0x00780000, 0x00600000, 0x00380000,
111 0x00300000, 0x00000000, 0x00480000, 0x00580000,
112 0x04280000, 0x04400000, 0x04080000, 0x04680000,
113 0x04500000, 0x04180000, 0x04200000, 0x04100000,
114 0x04700000, 0x04780000, 0x04600000, 0x04380000,
115 0x04300000, 0x04000000, 0x04480000, 0x04580000,
116 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
117 0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
118 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
119 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
120 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
121 0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
122 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
123 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
124 0x07280000, 0x07400000, 0x07080000, 0x07680000,
125 0x07500000, 0x07180000, 0x07200000, 0x07100000,
126 0x07700000, 0x07780000, 0x07600000, 0x07380000,
127 0x07300000, 0x07000000, 0x07480000, 0x07580000,
128 0x02280000, 0x02400000, 0x02080000, 0x02680000,
129 0x02500000, 0x02180000, 0x02200000, 0x02100000,
130 0x02700000, 0x02780000, 0x02600000, 0x02380000,
131 0x02300000, 0x02000000, 0x02480000, 0x02580000,
132 0x03280000, 0x03400000, 0x03080000, 0x03680000,
133 0x03500000, 0x03180000, 0x03200000, 0x03100000,
134 0x03700000, 0x03780000, 0x03600000, 0x03380000,
135 0x03300000, 0x03000000, 0x03480000, 0x03580000,
136 0x06280000, 0x06400000, 0x06080000, 0x06680000,
137 0x06500000, 0x06180000, 0x06200000, 0x06100000,
138 0x06700000, 0x06780000, 0x06600000, 0x06380000,
139 0x06300000, 0x06000000, 0x06480000, 0x06580000,
140 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
141 0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
142 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
143 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
144 0x01280000, 0x01400000, 0x01080000, 0x01680000,
145 0x01500000, 0x01180000, 0x01200000, 0x01100000,
146 0x01700000, 0x01780000, 0x01600000, 0x01380000,
147 0x01300000, 0x01000000, 0x01480000, 0x01580000,
148 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
149 0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
150 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
151 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
152 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
153 0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
154 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
155 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
158 0x30000002, 0x60000002, 0x38000002, 0x08000002,
159 0x28000002, 0x78000002, 0x68000002, 0x40000002,
160 0x20000002, 0x50000002, 0x48000002, 0x70000002,
161 0x00000002, 0x18000002, 0x58000002, 0x10000002,
162 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
163 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
164 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
165 0x80000005, 0x98000005, 0xd8000005, 0x90000005,
166 0x30000005, 0x60000005, 0x38000005, 0x08000005,
167 0x28000005, 0x78000005, 0x68000005, 0x40000005,
168 0x20000005, 0x50000005, 0x48000005, 0x70000005,
169 0x00000005, 0x18000005, 0x58000005, 0x10000005,
170 0x30000000, 0x60000000, 0x38000000, 0x08000000,
171 0x28000000, 0x78000000, 0x68000000, 0x40000000,
172 0x20000000, 0x50000000, 0x48000000, 0x70000000,
173 0x00000000, 0x18000000, 0x58000000, 0x10000000,
174 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
175 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
176 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
177 0x80000003, 0x98000003, 0xd8000003, 0x90000003,
178 0x30000001, 0x60000001, 0x38000001, 0x08000001,
179 0x28000001, 0x78000001, 0x68000001, 0x40000001,
180 0x20000001, 0x50000001, 0x48000001, 0x70000001,
181 0x00000001, 0x18000001, 0x58000001, 0x10000001,
182 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
183 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
184 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
185 0x80000000, 0x98000000, 0xd8000000, 0x90000000,
186 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
187 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
188 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
189 0x80000006, 0x98000006, 0xd8000006, 0x90000006,
190 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
191 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
192 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
193 0x80000001, 0x98000001, 0xd8000001, 0x90000001,
194 0x30000003, 0x60000003, 0x38000003, 0x08000003,
195 0x28000003, 0x78000003, 0x68000003, 0x40000003,
196 0x20000003, 0x50000003, 0x48000003, 0x70000003,
197 0x00000003, 0x18000003, 0x58000003, 0x10000003,
198 0x30000004, 0x60000004, 0x38000004, 0x08000004,
199 0x28000004, 0x78000004, 0x68000004, 0x40000004,
200 0x20000004, 0x50000004, 0x48000004, 0x70000004,
201 0x00000004, 0x18000004, 0x58000004, 0x10000004,
202 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
203 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
204 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
205 0x80000002, 0x98000002, 0xd8000002, 0x90000002,
206 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
207 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
208 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
209 0x80000004, 0x98000004, 0xd8000004, 0x90000004,
210 0x30000006, 0x60000006, 0x38000006, 0x08000006,
211 0x28000006, 0x78000006, 0x68000006, 0x40000006,
212 0x20000006, 0x50000006, 0x48000006, 0x70000006,
213 0x00000006, 0x18000006, 0x58000006, 0x10000006,
214 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
215 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
216 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
217 0x80000007, 0x98000007, 0xd8000007, 0x90000007,
218 0x30000007, 0x60000007, 0x38000007, 0x08000007,
219 0x28000007, 0x78000007, 0x68000007, 0x40000007,
220 0x20000007, 0x50000007, 0x48000007, 0x70000007,
221 0x00000007, 0x18000007, 0x58000007, 0x10000007,
224 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
225 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
226 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
227 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
228 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
229 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
230 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
231 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
232 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
233 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
234 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
235 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
236 0x00000068, 0x00000058, 0x00000020, 0x00000008,
237 0x00000018, 0x00000078, 0x00000028, 0x00000048,
238 0x00000000, 0x00000050, 0x00000070, 0x00000038,
239 0x00000030, 0x00000040, 0x00000010, 0x00000060,
240 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
241 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
242 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
243 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
244 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
245 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
246 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
247 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
248 0x00000568, 0x00000558, 0x00000520, 0x00000508,
249 0x00000518, 0x00000578, 0x00000528, 0x00000548,
250 0x00000500, 0x00000550, 0x00000570, 0x00000538,
251 0x00000530, 0x00000540, 0x00000510, 0x00000560,
252 0x00000268, 0x00000258, 0x00000220, 0x00000208,
253 0x00000218, 0x00000278, 0x00000228, 0x00000248,
254 0x00000200, 0x00000250, 0x00000270, 0x00000238,
255 0x00000230, 0x00000240, 0x00000210, 0x00000260,
256 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
257 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
258 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
259 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
260 0x00000168, 0x00000158, 0x00000120, 0x00000108,
261 0x00000118, 0x00000178, 0x00000128, 0x00000148,
262 0x00000100, 0x00000150, 0x00000170, 0x00000138,
263 0x00000130, 0x00000140, 0x00000110, 0x00000160,
264 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
265 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
266 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
267 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
268 0x00000768, 0x00000758, 0x00000720, 0x00000708,
269 0x00000718, 0x00000778, 0x00000728, 0x00000748,
270 0x00000700, 0x00000750, 0x00000770, 0x00000738,
271 0x00000730, 0x00000740, 0x00000710, 0x00000760,
272 0x00000368, 0x00000358, 0x00000320, 0x00000308,
273 0x00000318, 0x00000378, 0x00000328, 0x00000348,
274 0x00000300, 0x00000350, 0x00000370, 0x00000338,
275 0x00000330, 0x00000340, 0x00000310, 0x00000360,
276 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
277 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
278 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
279 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
280 0x00000468, 0x00000458, 0x00000420, 0x00000408,
281 0x00000418, 0x00000478, 0x00000428, 0x00000448,
282 0x00000400, 0x00000450, 0x00000470, 0x00000438,
283 0x00000430, 0x00000440, 0x00000410, 0x00000460,
284 0x00000668, 0x00000658, 0x00000620, 0x00000608,
285 0x00000618, 0x00000678, 0x00000628, 0x00000648,
286 0x00000600, 0x00000650, 0x00000670, 0x00000638,
287 0x00000630, 0x00000640, 0x00000610, 0x00000660,
291 #define BOX(i,n,S) (S)[(n)][(i)]
293 #define round(k1,k2,tbl) \
297 l ^= BOX (((t >> 0) & 0xff), 0, tbl) ^ \
298 BOX (((t >> 8) & 0xff), 1, tbl) ^ \
299 BOX (((t >> 16) & 0xff), 2, tbl) ^ \
300 BOX (((t >> 24) & 0xff), 3, tbl); \
302 r ^= BOX (((t >> 0) & 0xff), 0, tbl) ^ \
303 BOX (((t >> 8) & 0xff), 1, tbl) ^ \
304 BOX (((t >> 16) & 0xff), 2, tbl) ^ \
305 BOX (((t >> 24) & 0xff), 3, tbl); \
308 #define R(k,h,s,i,t) \
314 round (k[0], k[1], t); \
315 round (k[2], k[3], t); \
316 round (k[4], k[5], t); \
317 round (k[6], k[7], t); \
318 round (k[0], k[1], t); \
319 round (k[2], k[3], t); \
320 round (k[4], k[5], t); \
321 round (k[6], k[7], t); \
322 round (k[0], k[1], t); \
323 round (k[2], k[3], t); \
324 round (k[4], k[5], t); \
325 round (k[6], k[7], t); \
326 round (k[7], k[6], t); \
327 round (k[5], k[4], t); \
328 round (k[3], k[2], t); \
329 round (k[1], k[0], t); \
335 w[0] = u[0] ^ v[0]; \
336 w[1] = u[1] ^ v[1]; \
337 w[2] = u[2] ^ v[2]; \
338 w[3] = u[3] ^ v[3]; \
339 w[4] = u[4] ^ v[4]; \
340 w[5] = u[5] ^ v[5]; \
341 w[6] = u[6] ^ v[6]; \
345 k[0] = ((w[0] & 0x000000ff) << 0) \
346 | ((w[2] & 0x000000ff) << 8) \
347 | ((w[4] & 0x000000ff) << 16) \
348 | ((w[6] & 0x000000ff) << 24); \
349 k[1] = ((w[0] & 0x0000ff00) >> 8) \
350 | ((w[2] & 0x0000ff00) >> 0) \
351 | ((w[4] & 0x0000ff00) << 8) \
352 | ((w[6] & 0x0000ff00) << 16); \
353 k[2] = ((w[0] & 0x00ff0000) >> 16) \
354 | ((w[2] & 0x00ff0000) >> 8) \
355 | ((w[4] & 0x00ff0000) << 0) \
356 | ((w[6] & 0x00ff0000) << 8); \
357 k[3] = ((w[0] & 0xff000000) >> 24) \
358 | ((w[2] & 0xff000000) >> 16) \
359 | ((w[4] & 0xff000000) >> 8) \
360 | ((w[6] & 0xff000000) >> 0); \
361 k[4] = ((w[1] & 0x000000ff) << 0) \
362 | ((w[3] & 0x000000ff) << 8) \
363 | ((w[5] & 0x000000ff) << 16) \
364 | ((w[7] & 0x000000ff) << 24); \
365 k[5] = ((w[1] & 0x0000ff00) >> 8) \
366 | ((w[3] & 0x0000ff00) >> 0) \
367 | ((w[5] & 0x0000ff00) << 8) \
368 | ((w[7] & 0x0000ff00) << 16); \
369 k[6] = ((w[1] & 0x00ff0000) >> 16) \
370 | ((w[3] & 0x00ff0000) >> 8) \
371 | ((w[5] & 0x00ff0000) << 0) \
372 | ((w[7] & 0x00ff0000) << 8); \
373 k[7] = ((w[1] & 0xff000000) >> 24) \
374 | ((w[3] & 0xff000000) >> 16) \
375 | ((w[5] & 0xff000000) >> 8) \
376 | ((w[7] & 0xff000000) >> 0);
413 x[0] ^= 0xff00ff00; \
414 x[1] ^= 0xff00ff00; \
415 x[2] ^= 0x00ff00ff; \
416 x[3] ^= 0x00ff00ff; \
417 x[4] ^= 0x00ffff00; \
418 x[5] ^= 0xff0000ff; \
419 x[6] ^= 0x000000ff; \
422 #define SHIFT12(u,m,s) \
423 u[0] = m[0] ^ s[6]; \
424 u[1] = m[1] ^ s[7]; \
425 u[2] = m[2] ^ (s[0] << 16) \
427 ^ (s[0] & 0x0000ffff) \
428 ^ (s[1] & 0x0000ffff) \
433 ^ (s[7] & 0xffff0000) \
435 u[3] = m[3] ^ (s[0] & 0x0000ffff) \
437 ^ (s[1] & 0x0000ffff) \
446 ^ (s[7] & 0x0000ffff) \
449 u[4] = m[4] ^ (s[0] & 0xffff0000) \
452 ^ (s[1] & 0xffff0000) \
461 ^ (s[7] & 0x0000ffff) \
464 u[5] = m[5] ^ (s[0] << 16) \
466 ^ (s[0] & 0xffff0000) \
467 ^ (s[1] & 0x0000ffff) \
477 ^ (s[7] & 0xffff0000) \
493 u[7] = m[7] ^ (s[0] & 0xffff0000) \
495 ^ (s[1] & 0x0000ffff) \
504 ^ (s[7] & 0x0000ffff) \
508 #define SHIFT16(h,v,u) \
509 v[0] = h[0] ^ (u[1] << 16) \
511 v[1] = h[1] ^ (u[2] << 16) \
513 v[2] = h[2] ^ (u[3] << 16) \
515 v[3] = h[3] ^ (u[4] << 16) \
517 v[4] = h[4] ^ (u[5] << 16) \
519 v[5] = h[5] ^ (u[6] << 16) \
521 v[6] = h[6] ^ (u[7] << 16) \
523 v[7] = h[7] ^ (u[0] & 0xffff0000) \
526 ^ (u[1] & 0xffff0000) \
529 ^ (u[7] & 0xffff0000);
531 #define SHIFT61(h,v) \
532 h[0] = (v[0] & 0xffff0000) \
536 ^ (v[1] & 0xffff0000) \
545 ^ (v[7] & 0x0000ffff); \
546 h[1] = (v[0] << 16) \
548 ^ (v[0] & 0xffff0000) \
549 ^ (v[1] & 0x0000ffff) \
557 ^ (v[7] & 0xffff0000) \
559 h[2] = (v[0] & 0x0000ffff) \
563 ^ (v[1] & 0xffff0000) \
571 ^ (v[7] & 0x0000ffff) \
574 h[3] = (v[0] << 16) \
576 ^ (v[0] & 0xffff0000) \
577 ^ (v[1] & 0xffff0000) \
587 ^ (v[7] & 0x0000ffff) \
589 h[4] = (v[0] >> 16) \
603 h[5] = (v[0] << 16) \
604 ^ (v[0] & 0xffff0000) \
607 ^ (v[1] & 0xffff0000) \
621 ^ (v[7] & 0xffff0000); \
653 #define PASS0(h,s,u,v,t) \
664 #define PASS2(h,s,u,v,t) \
676 #define PASS4(h,s,u,v,t) \
687 #define PASS6(h,s,u,v,t) \
696 __kernel void m06900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
702 const u32 gid = get_global_id (0);
703 const u32 lid = get_local_id (0);
704 const u32 lsz = get_local_size (0);
710 __local u32 s_tables[4][256];
712 for (u32 i = lid; i < 256; i += lsz)
714 s_tables[0][i] = c_tables[0][i];
715 s_tables[1][i] = c_tables[1][i];
716 s_tables[2][i] = c_tables[2][i];
717 s_tables[3][i] = c_tables[3][i];
720 barrier (CLK_LOCAL_MEM_FENCE);
722 if (gid >= gid_max) return;
730 wordl0[0] = pws[gid].i[ 0];
731 wordl0[1] = pws[gid].i[ 1];
732 wordl0[2] = pws[gid].i[ 2];
733 wordl0[3] = pws[gid].i[ 3];
737 wordl1[0] = pws[gid].i[ 4];
738 wordl1[1] = pws[gid].i[ 5];
739 wordl1[2] = pws[gid].i[ 6];
740 wordl1[3] = pws[gid].i[ 7];
756 const u32 pw_l_len = pws[gid].pw_len;
758 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
760 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
767 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
769 const u32 pw_r_len = combs_buf[il_pos].pw_len;
771 const u32 pw_len = pw_l_len + pw_r_len;
775 wordr0[0] = combs_buf[il_pos].i[0];
776 wordr0[1] = combs_buf[il_pos].i[1];
777 wordr0[2] = combs_buf[il_pos].i[2];
778 wordr0[3] = combs_buf[il_pos].i[3];
782 wordr1[0] = combs_buf[il_pos].i[4];
783 wordr1[1] = combs_buf[il_pos].i[5];
784 wordr1[2] = combs_buf[il_pos].i[6];
785 wordr1[3] = combs_buf[il_pos].i[7];
801 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
803 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
808 w0[0] = wordl0[0] | wordr0[0];
809 w0[1] = wordl0[1] | wordr0[1];
810 w0[2] = wordl0[2] | wordr0[2];
811 w0[3] = wordl0[3] | wordr0[3];
815 w1[0] = wordl1[0] | wordr1[0];
816 w1[1] = wordl1[1] | wordr1[1];
817 w1[2] = wordl1[2] | wordr1[2];
818 w1[3] = wordl1[3] | wordr1[3];
822 w2[0] = wordl2[0] | wordr2[0];
823 w2[1] = wordl2[1] | wordr2[1];
824 w2[2] = wordl2[2] | wordr2[2];
825 w2[3] = wordl2[3] | wordr2[3];
829 w3[0] = wordl3[0] | wordr3[0];
830 w3[1] = wordl3[1] | wordr3[1];
834 const u32 w14 = pw_len * 8;
871 state_m[0] = state[0];
872 state_m[1] = state[1];
873 state_m[2] = state[2];
874 state_m[3] = state[3];
875 state_m[4] = state[4];
876 state_m[5] = state[5];
877 state_m[6] = state[6];
878 state_m[7] = state[7];
893 PASS0 (state, tmp, state_m, data_m, s_tables);
894 PASS2 (state, tmp, state_m, data_m, s_tables);
895 PASS4 (state, tmp, state_m, data_m, s_tables);
896 PASS6 (state, tmp, state_m, data_m, s_tables);
898 SHIFT12 (state_m, data, tmp);
899 SHIFT16 (state, data_m, state_m);
900 SHIFT61 (state, data_m);
914 state_m[0] = state[0];
915 state_m[1] = state[1];
916 state_m[2] = state[2];
917 state_m[3] = state[3];
918 state_m[4] = state[4];
919 state_m[5] = state[5];
920 state_m[6] = state[6];
921 state_m[7] = state[7];
932 PASS0 (state, tmp, state_m, data_m, s_tables);
933 PASS2 (state, tmp, state_m, data_m, s_tables);
934 PASS4 (state, tmp, state_m, data_m, s_tables);
935 PASS6 (state, tmp, state_m, data_m, s_tables);
937 SHIFT12 (state_m, data, tmp);
938 SHIFT16 (state, data_m, state_m);
939 SHIFT61 (state, data_m);
952 state_m[0] = state[0];
953 state_m[1] = state[1];
954 state_m[2] = state[2];
955 state_m[3] = state[3];
956 state_m[4] = state[4];
957 state_m[5] = state[5];
958 state_m[6] = state[6];
959 state_m[7] = state[7];
970 PASS0 (state, tmp, state_m, data_m, s_tables);
971 PASS2 (state, tmp, state_m, data_m, s_tables);
972 PASS4 (state, tmp, state_m, data_m, s_tables);
973 PASS6 (state, tmp, state_m, data_m, s_tables);
975 SHIFT12 (state_m, data, tmp);
976 SHIFT16 (state, data_m, state_m);
977 SHIFT61 (state, data_m);
981 const u32 r0 = state[0];
982 const u32 r1 = state[1];
983 const u32 r2 = state[2];
984 const u32 r3 = state[3];
990 __kernel void m06900_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
994 __kernel void m06900_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
998 __kernel void m06900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1004 const u32 gid = get_global_id (0);
1005 const u32 lid = get_local_id (0);
1006 const u32 lsz = get_local_size (0);
1012 __local u32 s_tables[4][256];
1014 for (u32 i = lid; i < 256; i += lsz)
1016 s_tables[0][i] = c_tables[0][i];
1017 s_tables[1][i] = c_tables[1][i];
1018 s_tables[2][i] = c_tables[2][i];
1019 s_tables[3][i] = c_tables[3][i];
1022 barrier (CLK_LOCAL_MEM_FENCE);
1024 if (gid >= gid_max) return;
1032 wordl0[0] = pws[gid].i[ 0];
1033 wordl0[1] = pws[gid].i[ 1];
1034 wordl0[2] = pws[gid].i[ 2];
1035 wordl0[3] = pws[gid].i[ 3];
1039 wordl1[0] = pws[gid].i[ 4];
1040 wordl1[1] = pws[gid].i[ 5];
1041 wordl1[2] = pws[gid].i[ 6];
1042 wordl1[3] = pws[gid].i[ 7];
1058 const u32 pw_l_len = pws[gid].pw_len;
1060 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
1062 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
1069 const u32 search[4] =
1071 digests_buf[digests_offset].digest_buf[DGST_R0],
1072 digests_buf[digests_offset].digest_buf[DGST_R1],
1073 digests_buf[digests_offset].digest_buf[DGST_R2],
1074 digests_buf[digests_offset].digest_buf[DGST_R3]
1081 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
1083 const u32 pw_r_len = combs_buf[il_pos].pw_len;
1085 const u32 pw_len = pw_l_len + pw_r_len;
1089 wordr0[0] = combs_buf[il_pos].i[0];
1090 wordr0[1] = combs_buf[il_pos].i[1];
1091 wordr0[2] = combs_buf[il_pos].i[2];
1092 wordr0[3] = combs_buf[il_pos].i[3];
1096 wordr1[0] = combs_buf[il_pos].i[4];
1097 wordr1[1] = combs_buf[il_pos].i[5];
1098 wordr1[2] = combs_buf[il_pos].i[6];
1099 wordr1[3] = combs_buf[il_pos].i[7];
1115 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
1117 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
1122 w0[0] = wordl0[0] | wordr0[0];
1123 w0[1] = wordl0[1] | wordr0[1];
1124 w0[2] = wordl0[2] | wordr0[2];
1125 w0[3] = wordl0[3] | wordr0[3];
1129 w1[0] = wordl1[0] | wordr1[0];
1130 w1[1] = wordl1[1] | wordr1[1];
1131 w1[2] = wordl1[2] | wordr1[2];
1132 w1[3] = wordl1[3] | wordr1[3];
1136 w2[0] = wordl2[0] | wordr2[0];
1137 w2[1] = wordl2[1] | wordr2[1];
1138 w2[2] = wordl2[2] | wordr2[2];
1139 w2[3] = wordl2[3] | wordr2[3];
1143 w3[0] = wordl3[0] | wordr3[0];
1144 w3[1] = wordl3[1] | wordr3[1];
1148 const u32 w14 = pw_len * 8;
1171 state[ 8] = data[0];
1172 state[ 9] = data[1];
1173 state[10] = data[2];
1174 state[11] = data[3];
1175 state[12] = data[4];
1176 state[13] = data[5];
1177 state[14] = data[6];
1178 state[15] = data[7];
1185 state_m[0] = state[0];
1186 state_m[1] = state[1];
1187 state_m[2] = state[2];
1188 state_m[3] = state[3];
1189 state_m[4] = state[4];
1190 state_m[5] = state[5];
1191 state_m[6] = state[6];
1192 state_m[7] = state[7];
1194 data_m[0] = data[0];
1195 data_m[1] = data[1];
1196 data_m[2] = data[2];
1197 data_m[3] = data[3];
1198 data_m[4] = data[4];
1199 data_m[5] = data[5];
1200 data_m[6] = data[6];
1201 data_m[7] = data[7];
1207 PASS0 (state, tmp, state_m, data_m, s_tables);
1208 PASS2 (state, tmp, state_m, data_m, s_tables);
1209 PASS4 (state, tmp, state_m, data_m, s_tables);
1210 PASS6 (state, tmp, state_m, data_m, s_tables);
1212 SHIFT12 (state_m, data, tmp);
1213 SHIFT16 (state, data_m, state_m);
1214 SHIFT61 (state, data_m);
1228 state_m[0] = state[0];
1229 state_m[1] = state[1];
1230 state_m[2] = state[2];
1231 state_m[3] = state[3];
1232 state_m[4] = state[4];
1233 state_m[5] = state[5];
1234 state_m[6] = state[6];
1235 state_m[7] = state[7];
1237 data_m[0] = data[0];
1238 data_m[1] = data[1];
1239 data_m[2] = data[2];
1240 data_m[3] = data[3];
1241 data_m[4] = data[4];
1242 data_m[5] = data[5];
1243 data_m[6] = data[6];
1244 data_m[7] = data[7];
1246 PASS0 (state, tmp, state_m, data_m, s_tables);
1247 PASS2 (state, tmp, state_m, data_m, s_tables);
1248 PASS4 (state, tmp, state_m, data_m, s_tables);
1249 PASS6 (state, tmp, state_m, data_m, s_tables);
1251 SHIFT12 (state_m, data, tmp);
1252 SHIFT16 (state, data_m, state_m);
1253 SHIFT61 (state, data_m);
1257 data[0] = state[ 8];
1258 data[1] = state[ 9];
1259 data[2] = state[10];
1260 data[3] = state[11];
1261 data[4] = state[12];
1262 data[5] = state[13];
1263 data[6] = state[14];
1264 data[7] = state[15];
1266 state_m[0] = state[0];
1267 state_m[1] = state[1];
1268 state_m[2] = state[2];
1269 state_m[3] = state[3];
1270 state_m[4] = state[4];
1271 state_m[5] = state[5];
1272 state_m[6] = state[6];
1273 state_m[7] = state[7];
1275 data_m[0] = data[0];
1276 data_m[1] = data[1];
1277 data_m[2] = data[2];
1278 data_m[3] = data[3];
1279 data_m[4] = data[4];
1280 data_m[5] = data[5];
1281 data_m[6] = data[6];
1282 data_m[7] = data[7];
1284 PASS0 (state, tmp, state_m, data_m, s_tables);
1285 PASS2 (state, tmp, state_m, data_m, s_tables);
1286 PASS4 (state, tmp, state_m, data_m, s_tables);
1287 PASS6 (state, tmp, state_m, data_m, s_tables);
1289 SHIFT12 (state_m, data, tmp);
1290 SHIFT16 (state, data_m, state_m);
1291 SHIFT61 (state, data_m);
1295 const u32 r0 = state[0];
1296 const u32 r1 = state[1];
1297 const u32 r2 = state[2];
1298 const u32 r3 = state[3];
1304 __kernel void m06900_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1308 __kernel void m06900_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)