2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
38 __device__ __constant__ u32 c_tables[4][256] =
41 0x00072000, 0x00075000, 0x00074800, 0x00071000,
42 0x00076800, 0x00074000, 0x00070000, 0x00077000,
43 0x00073000, 0x00075800, 0x00070800, 0x00076000,
44 0x00073800, 0x00077800, 0x00072800, 0x00071800,
45 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
46 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
47 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
48 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
49 0x00022000, 0x00025000, 0x00024800, 0x00021000,
50 0x00026800, 0x00024000, 0x00020000, 0x00027000,
51 0x00023000, 0x00025800, 0x00020800, 0x00026000,
52 0x00023800, 0x00027800, 0x00022800, 0x00021800,
53 0x00062000, 0x00065000, 0x00064800, 0x00061000,
54 0x00066800, 0x00064000, 0x00060000, 0x00067000,
55 0x00063000, 0x00065800, 0x00060800, 0x00066000,
56 0x00063800, 0x00067800, 0x00062800, 0x00061800,
57 0x00032000, 0x00035000, 0x00034800, 0x00031000,
58 0x00036800, 0x00034000, 0x00030000, 0x00037000,
59 0x00033000, 0x00035800, 0x00030800, 0x00036000,
60 0x00033800, 0x00037800, 0x00032800, 0x00031800,
61 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
62 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
63 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
64 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
65 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
66 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
67 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
68 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
69 0x00052000, 0x00055000, 0x00054800, 0x00051000,
70 0x00056800, 0x00054000, 0x00050000, 0x00057000,
71 0x00053000, 0x00055800, 0x00050800, 0x00056000,
72 0x00053800, 0x00057800, 0x00052800, 0x00051800,
73 0x00012000, 0x00015000, 0x00014800, 0x00011000,
74 0x00016800, 0x00014000, 0x00010000, 0x00017000,
75 0x00013000, 0x00015800, 0x00010800, 0x00016000,
76 0x00013800, 0x00017800, 0x00012800, 0x00011800,
77 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
78 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
79 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
80 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
81 0x00042000, 0x00045000, 0x00044800, 0x00041000,
82 0x00046800, 0x00044000, 0x00040000, 0x00047000,
83 0x00043000, 0x00045800, 0x00040800, 0x00046000,
84 0x00043800, 0x00047800, 0x00042800, 0x00041800,
85 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
86 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
87 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
88 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
89 0x00002000, 0x00005000, 0x00004800, 0x00001000,
90 0x00006800, 0x00004000, 0x00000000, 0x00007000,
91 0x00003000, 0x00005800, 0x00000800, 0x00006000,
92 0x00003800, 0x00007800, 0x00002800, 0x00001800,
93 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
94 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
95 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
96 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
97 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
98 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
99 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
100 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
101 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
102 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
103 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
104 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
107 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
108 0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
109 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
110 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
111 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
112 0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
113 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
114 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
115 0x05280000, 0x05400000, 0x05080000, 0x05680000,
116 0x05500000, 0x05180000, 0x05200000, 0x05100000,
117 0x05700000, 0x05780000, 0x05600000, 0x05380000,
118 0x05300000, 0x05000000, 0x05480000, 0x05580000,
119 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
120 0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
121 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
122 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
123 0x00280000, 0x00400000, 0x00080000, 0x00680000,
124 0x00500000, 0x00180000, 0x00200000, 0x00100000,
125 0x00700000, 0x00780000, 0x00600000, 0x00380000,
126 0x00300000, 0x00000000, 0x00480000, 0x00580000,
127 0x04280000, 0x04400000, 0x04080000, 0x04680000,
128 0x04500000, 0x04180000, 0x04200000, 0x04100000,
129 0x04700000, 0x04780000, 0x04600000, 0x04380000,
130 0x04300000, 0x04000000, 0x04480000, 0x04580000,
131 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
132 0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
133 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
134 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
135 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
136 0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
137 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
138 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
139 0x07280000, 0x07400000, 0x07080000, 0x07680000,
140 0x07500000, 0x07180000, 0x07200000, 0x07100000,
141 0x07700000, 0x07780000, 0x07600000, 0x07380000,
142 0x07300000, 0x07000000, 0x07480000, 0x07580000,
143 0x02280000, 0x02400000, 0x02080000, 0x02680000,
144 0x02500000, 0x02180000, 0x02200000, 0x02100000,
145 0x02700000, 0x02780000, 0x02600000, 0x02380000,
146 0x02300000, 0x02000000, 0x02480000, 0x02580000,
147 0x03280000, 0x03400000, 0x03080000, 0x03680000,
148 0x03500000, 0x03180000, 0x03200000, 0x03100000,
149 0x03700000, 0x03780000, 0x03600000, 0x03380000,
150 0x03300000, 0x03000000, 0x03480000, 0x03580000,
151 0x06280000, 0x06400000, 0x06080000, 0x06680000,
152 0x06500000, 0x06180000, 0x06200000, 0x06100000,
153 0x06700000, 0x06780000, 0x06600000, 0x06380000,
154 0x06300000, 0x06000000, 0x06480000, 0x06580000,
155 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
156 0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
157 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
158 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
159 0x01280000, 0x01400000, 0x01080000, 0x01680000,
160 0x01500000, 0x01180000, 0x01200000, 0x01100000,
161 0x01700000, 0x01780000, 0x01600000, 0x01380000,
162 0x01300000, 0x01000000, 0x01480000, 0x01580000,
163 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
164 0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
165 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
166 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
167 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
168 0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
169 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
170 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
173 0x30000002, 0x60000002, 0x38000002, 0x08000002,
174 0x28000002, 0x78000002, 0x68000002, 0x40000002,
175 0x20000002, 0x50000002, 0x48000002, 0x70000002,
176 0x00000002, 0x18000002, 0x58000002, 0x10000002,
177 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
178 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
179 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
180 0x80000005, 0x98000005, 0xd8000005, 0x90000005,
181 0x30000005, 0x60000005, 0x38000005, 0x08000005,
182 0x28000005, 0x78000005, 0x68000005, 0x40000005,
183 0x20000005, 0x50000005, 0x48000005, 0x70000005,
184 0x00000005, 0x18000005, 0x58000005, 0x10000005,
185 0x30000000, 0x60000000, 0x38000000, 0x08000000,
186 0x28000000, 0x78000000, 0x68000000, 0x40000000,
187 0x20000000, 0x50000000, 0x48000000, 0x70000000,
188 0x00000000, 0x18000000, 0x58000000, 0x10000000,
189 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
190 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
191 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
192 0x80000003, 0x98000003, 0xd8000003, 0x90000003,
193 0x30000001, 0x60000001, 0x38000001, 0x08000001,
194 0x28000001, 0x78000001, 0x68000001, 0x40000001,
195 0x20000001, 0x50000001, 0x48000001, 0x70000001,
196 0x00000001, 0x18000001, 0x58000001, 0x10000001,
197 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
198 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
199 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
200 0x80000000, 0x98000000, 0xd8000000, 0x90000000,
201 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
202 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
203 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
204 0x80000006, 0x98000006, 0xd8000006, 0x90000006,
205 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
206 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
207 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
208 0x80000001, 0x98000001, 0xd8000001, 0x90000001,
209 0x30000003, 0x60000003, 0x38000003, 0x08000003,
210 0x28000003, 0x78000003, 0x68000003, 0x40000003,
211 0x20000003, 0x50000003, 0x48000003, 0x70000003,
212 0x00000003, 0x18000003, 0x58000003, 0x10000003,
213 0x30000004, 0x60000004, 0x38000004, 0x08000004,
214 0x28000004, 0x78000004, 0x68000004, 0x40000004,
215 0x20000004, 0x50000004, 0x48000004, 0x70000004,
216 0x00000004, 0x18000004, 0x58000004, 0x10000004,
217 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
218 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
219 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
220 0x80000002, 0x98000002, 0xd8000002, 0x90000002,
221 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
222 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
223 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
224 0x80000004, 0x98000004, 0xd8000004, 0x90000004,
225 0x30000006, 0x60000006, 0x38000006, 0x08000006,
226 0x28000006, 0x78000006, 0x68000006, 0x40000006,
227 0x20000006, 0x50000006, 0x48000006, 0x70000006,
228 0x00000006, 0x18000006, 0x58000006, 0x10000006,
229 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
230 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
231 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
232 0x80000007, 0x98000007, 0xd8000007, 0x90000007,
233 0x30000007, 0x60000007, 0x38000007, 0x08000007,
234 0x28000007, 0x78000007, 0x68000007, 0x40000007,
235 0x20000007, 0x50000007, 0x48000007, 0x70000007,
236 0x00000007, 0x18000007, 0x58000007, 0x10000007,
239 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
240 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
241 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
242 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
243 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
244 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
245 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
246 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
247 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
248 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
249 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
250 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
251 0x00000068, 0x00000058, 0x00000020, 0x00000008,
252 0x00000018, 0x00000078, 0x00000028, 0x00000048,
253 0x00000000, 0x00000050, 0x00000070, 0x00000038,
254 0x00000030, 0x00000040, 0x00000010, 0x00000060,
255 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
256 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
257 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
258 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
259 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
260 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
261 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
262 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
263 0x00000568, 0x00000558, 0x00000520, 0x00000508,
264 0x00000518, 0x00000578, 0x00000528, 0x00000548,
265 0x00000500, 0x00000550, 0x00000570, 0x00000538,
266 0x00000530, 0x00000540, 0x00000510, 0x00000560,
267 0x00000268, 0x00000258, 0x00000220, 0x00000208,
268 0x00000218, 0x00000278, 0x00000228, 0x00000248,
269 0x00000200, 0x00000250, 0x00000270, 0x00000238,
270 0x00000230, 0x00000240, 0x00000210, 0x00000260,
271 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
272 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
273 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
274 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
275 0x00000168, 0x00000158, 0x00000120, 0x00000108,
276 0x00000118, 0x00000178, 0x00000128, 0x00000148,
277 0x00000100, 0x00000150, 0x00000170, 0x00000138,
278 0x00000130, 0x00000140, 0x00000110, 0x00000160,
279 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
280 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
281 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
282 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
283 0x00000768, 0x00000758, 0x00000720, 0x00000708,
284 0x00000718, 0x00000778, 0x00000728, 0x00000748,
285 0x00000700, 0x00000750, 0x00000770, 0x00000738,
286 0x00000730, 0x00000740, 0x00000710, 0x00000760,
287 0x00000368, 0x00000358, 0x00000320, 0x00000308,
288 0x00000318, 0x00000378, 0x00000328, 0x00000348,
289 0x00000300, 0x00000350, 0x00000370, 0x00000338,
290 0x00000330, 0x00000340, 0x00000310, 0x00000360,
291 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
292 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
293 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
294 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
295 0x00000468, 0x00000458, 0x00000420, 0x00000408,
296 0x00000418, 0x00000478, 0x00000428, 0x00000448,
297 0x00000400, 0x00000450, 0x00000470, 0x00000438,
298 0x00000430, 0x00000440, 0x00000410, 0x00000460,
299 0x00000668, 0x00000658, 0x00000620, 0x00000608,
300 0x00000618, 0x00000678, 0x00000628, 0x00000648,
301 0x00000600, 0x00000650, 0x00000670, 0x00000638,
302 0x00000630, 0x00000640, 0x00000610, 0x00000660,
307 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
311 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
314 #define round(k1,k2,tbl) \
318 l ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \
319 BOX ((t >> 8) & 0xff, 1, tbl) ^ \
320 BOX ((t >> 16) & 0xff, 2, tbl) ^ \
321 BOX ((t >> 24) & 0xff, 3, tbl); \
323 r ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \
324 BOX ((t >> 8) & 0xff, 1, tbl) ^ \
325 BOX ((t >> 16) & 0xff, 2, tbl) ^ \
326 BOX ((t >> 24) & 0xff, 3, tbl); \
329 #define R(k,h,s,i,t) \
335 round (k[0], k[1], t); \
336 round (k[2], k[3], t); \
337 round (k[4], k[5], t); \
338 round (k[6], k[7], t); \
339 round (k[0], k[1], t); \
340 round (k[2], k[3], t); \
341 round (k[4], k[5], t); \
342 round (k[6], k[7], t); \
343 round (k[0], k[1], t); \
344 round (k[2], k[3], t); \
345 round (k[4], k[5], t); \
346 round (k[6], k[7], t); \
347 round (k[7], k[6], t); \
348 round (k[5], k[4], t); \
349 round (k[3], k[2], t); \
350 round (k[1], k[0], t); \
356 w[0] = u[0] ^ v[0]; \
357 w[1] = u[1] ^ v[1]; \
358 w[2] = u[2] ^ v[2]; \
359 w[3] = u[3] ^ v[3]; \
360 w[4] = u[4] ^ v[4]; \
361 w[5] = u[5] ^ v[5]; \
362 w[6] = u[6] ^ v[6]; \
366 k[0] = ((w[0] & 0x000000ff) << 0) \
367 | ((w[2] & 0x000000ff) << 8) \
368 | ((w[4] & 0x000000ff) << 16) \
369 | ((w[6] & 0x000000ff) << 24); \
370 k[1] = ((w[0] & 0x0000ff00) >> 8) \
371 | ((w[2] & 0x0000ff00) >> 0) \
372 | ((w[4] & 0x0000ff00) << 8) \
373 | ((w[6] & 0x0000ff00) << 16); \
374 k[2] = ((w[0] & 0x00ff0000) >> 16) \
375 | ((w[2] & 0x00ff0000) >> 8) \
376 | ((w[4] & 0x00ff0000) << 0) \
377 | ((w[6] & 0x00ff0000) << 8); \
378 k[3] = ((w[0] & 0xff000000) >> 24) \
379 | ((w[2] & 0xff000000) >> 16) \
380 | ((w[4] & 0xff000000) >> 8) \
381 | ((w[6] & 0xff000000) >> 0); \
382 k[4] = ((w[1] & 0x000000ff) << 0) \
383 | ((w[3] & 0x000000ff) << 8) \
384 | ((w[5] & 0x000000ff) << 16) \
385 | ((w[7] & 0x000000ff) << 24); \
386 k[5] = ((w[1] & 0x0000ff00) >> 8) \
387 | ((w[3] & 0x0000ff00) >> 0) \
388 | ((w[5] & 0x0000ff00) << 8) \
389 | ((w[7] & 0x0000ff00) << 16); \
390 k[6] = ((w[1] & 0x00ff0000) >> 16) \
391 | ((w[3] & 0x00ff0000) >> 8) \
392 | ((w[5] & 0x00ff0000) << 0) \
393 | ((w[7] & 0x00ff0000) << 8); \
394 k[7] = ((w[1] & 0xff000000) >> 24) \
395 | ((w[3] & 0xff000000) >> 16) \
396 | ((w[5] & 0xff000000) >> 8) \
397 | ((w[7] & 0xff000000) >> 0);
434 x[0] ^= 0xff00ff00; \
435 x[1] ^= 0xff00ff00; \
436 x[2] ^= 0x00ff00ff; \
437 x[3] ^= 0x00ff00ff; \
438 x[4] ^= 0x00ffff00; \
439 x[5] ^= 0xff0000ff; \
440 x[6] ^= 0x000000ff; \
443 #define SHIFT12(u,m,s) \
444 u[0] = m[0] ^ s[6]; \
445 u[1] = m[1] ^ s[7]; \
446 u[2] = m[2] ^ (s[0] << 16) \
448 ^ (s[0] & 0x0000ffff) \
449 ^ (s[1] & 0x0000ffff) \
454 ^ (s[7] & 0xffff0000) \
456 u[3] = m[3] ^ (s[0] & 0x0000ffff) \
458 ^ (s[1] & 0x0000ffff) \
467 ^ (s[7] & 0x0000ffff) \
470 u[4] = m[4] ^ (s[0] & 0xffff0000) \
473 ^ (s[1] & 0xffff0000) \
482 ^ (s[7] & 0x0000ffff) \
485 u[5] = m[5] ^ (s[0] << 16) \
487 ^ (s[0] & 0xffff0000) \
488 ^ (s[1] & 0x0000ffff) \
498 ^ (s[7] & 0xffff0000) \
514 u[7] = m[7] ^ (s[0] & 0xffff0000) \
516 ^ (s[1] & 0x0000ffff) \
525 ^ (s[7] & 0x0000ffff) \
529 #define SHIFT16(h,v,u) \
530 v[0] = h[0] ^ (u[1] << 16) \
532 v[1] = h[1] ^ (u[2] << 16) \
534 v[2] = h[2] ^ (u[3] << 16) \
536 v[3] = h[3] ^ (u[4] << 16) \
538 v[4] = h[4] ^ (u[5] << 16) \
540 v[5] = h[5] ^ (u[6] << 16) \
542 v[6] = h[6] ^ (u[7] << 16) \
544 v[7] = h[7] ^ (u[0] & 0xffff0000) \
547 ^ (u[1] & 0xffff0000) \
550 ^ (u[7] & 0xffff0000);
552 #define SHIFT61(h,v) \
553 h[0] = (v[0] & 0xffff0000) \
557 ^ (v[1] & 0xffff0000) \
566 ^ (v[7] & 0x0000ffff); \
567 h[1] = (v[0] << 16) \
569 ^ (v[0] & 0xffff0000) \
570 ^ (v[1] & 0x0000ffff) \
578 ^ (v[7] & 0xffff0000) \
580 h[2] = (v[0] & 0x0000ffff) \
584 ^ (v[1] & 0xffff0000) \
592 ^ (v[7] & 0x0000ffff) \
595 h[3] = (v[0] << 16) \
597 ^ (v[0] & 0xffff0000) \
598 ^ (v[1] & 0xffff0000) \
608 ^ (v[7] & 0x0000ffff) \
610 h[4] = (v[0] >> 16) \
624 h[5] = (v[0] << 16) \
625 ^ (v[0] & 0xffff0000) \
628 ^ (v[1] & 0xffff0000) \
642 ^ (v[7] & 0xffff0000); \
674 #define PASS0(h,s,u,v,t) \
685 #define PASS2(h,s,u,v,t) \
697 #define PASS4(h,s,u,v,t) \
708 #define PASS6(h,s,u,v,t) \
717 __device__ __shared__ u32 s_tables[4][256];
719 __device__ __constant__ bf_t c_bfs[1024];
721 __device__ static void m06900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
727 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
728 const u32 lid = threadIdx.x;
734 const u32 w14 = pw_len * 8;
742 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
744 const u32 w0r = c_bfs[il_pos].i;
783 state_m[0] = state[0];
784 state_m[1] = state[1];
785 state_m[2] = state[2];
786 state_m[3] = state[3];
787 state_m[4] = state[4];
788 state_m[5] = state[5];
789 state_m[6] = state[6];
790 state_m[7] = state[7];
803 PASS0 (state, tmp, state_m, data_m, s_tables);
804 PASS2 (state, tmp, state_m, data_m, s_tables);
805 PASS4 (state, tmp, state_m, data_m, s_tables);
806 PASS6 (state, tmp, state_m, data_m, s_tables);
808 SHIFT12 (state_m, data, tmp);
809 SHIFT16 (state, data_m, state_m);
810 SHIFT61 (state, data_m);
823 state_m[0] = state[0];
824 state_m[1] = state[1];
825 state_m[2] = state[2];
826 state_m[3] = state[3];
827 state_m[4] = state[4];
828 state_m[5] = state[5];
829 state_m[6] = state[6];
830 state_m[7] = state[7];
841 PASS0 (state, tmp, state_m, data_m, s_tables);
842 PASS2 (state, tmp, state_m, data_m, s_tables);
843 PASS4 (state, tmp, state_m, data_m, s_tables);
844 PASS6 (state, tmp, state_m, data_m, s_tables);
846 SHIFT12 (state_m, data, tmp);
847 SHIFT16 (state, data_m, state_m);
848 SHIFT61 (state, data_m);
861 state_m[0] = state[0];
862 state_m[1] = state[1];
863 state_m[2] = state[2];
864 state_m[3] = state[3];
865 state_m[4] = state[4];
866 state_m[5] = state[5];
867 state_m[6] = state[6];
868 state_m[7] = state[7];
879 PASS0 (state, tmp, state_m, data_m, s_tables);
880 PASS2 (state, tmp, state_m, data_m, s_tables);
881 PASS4 (state, tmp, state_m, data_m, s_tables);
882 PASS6 (state, tmp, state_m, data_m, s_tables);
884 SHIFT12 (state_m, data, tmp);
885 SHIFT16 (state, data_m, state_m);
886 SHIFT61 (state, data_m);
890 const u32x r0 = state[0];
891 const u32x r1 = state[1];
892 const u32x r2 = state[2];
893 const u32x r3 = state[3];
895 #include VECT_COMPARE_M
899 __device__ static void m06900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
905 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
906 const u32 lid = threadIdx.x;
912 const u32 w14 = pw_len * 8;
918 const u32 search[4] =
920 digests_buf[digests_offset].digest_buf[DGST_R0],
921 digests_buf[digests_offset].digest_buf[DGST_R1],
922 digests_buf[digests_offset].digest_buf[DGST_R2],
923 digests_buf[digests_offset].digest_buf[DGST_R3]
932 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
934 const u32 w0r = c_bfs[il_pos].i;
973 state_m[0] = state[0];
974 state_m[1] = state[1];
975 state_m[2] = state[2];
976 state_m[3] = state[3];
977 state_m[4] = state[4];
978 state_m[5] = state[5];
979 state_m[6] = state[6];
980 state_m[7] = state[7];
993 PASS0 (state, tmp, state_m, data_m, s_tables);
994 PASS2 (state, tmp, state_m, data_m, s_tables);
995 PASS4 (state, tmp, state_m, data_m, s_tables);
996 PASS6 (state, tmp, state_m, data_m, s_tables);
998 SHIFT12 (state_m, data, tmp);
999 SHIFT16 (state, data_m, state_m);
1000 SHIFT61 (state, data_m);
1013 state_m[0] = state[0];
1014 state_m[1] = state[1];
1015 state_m[2] = state[2];
1016 state_m[3] = state[3];
1017 state_m[4] = state[4];
1018 state_m[5] = state[5];
1019 state_m[6] = state[6];
1020 state_m[7] = state[7];
1022 data_m[0] = data[0];
1023 data_m[1] = data[1];
1024 data_m[2] = data[2];
1025 data_m[3] = data[3];
1026 data_m[4] = data[4];
1027 data_m[5] = data[5];
1028 data_m[6] = data[6];
1029 data_m[7] = data[7];
1031 PASS0 (state, tmp, state_m, data_m, s_tables);
1032 PASS2 (state, tmp, state_m, data_m, s_tables);
1033 PASS4 (state, tmp, state_m, data_m, s_tables);
1034 PASS6 (state, tmp, state_m, data_m, s_tables);
1036 SHIFT12 (state_m, data, tmp);
1037 SHIFT16 (state, data_m, state_m);
1038 SHIFT61 (state, data_m);
1042 data[0] = state[ 8];
1043 data[1] = state[ 9];
1044 data[2] = state[10];
1045 data[3] = state[11];
1046 data[4] = state[12];
1047 data[5] = state[13];
1048 data[6] = state[14];
1049 data[7] = state[15];
1051 state_m[0] = state[0];
1052 state_m[1] = state[1];
1053 state_m[2] = state[2];
1054 state_m[3] = state[3];
1055 state_m[4] = state[4];
1056 state_m[5] = state[5];
1057 state_m[6] = state[6];
1058 state_m[7] = state[7];
1060 data_m[0] = data[0];
1061 data_m[1] = data[1];
1062 data_m[2] = data[2];
1063 data_m[3] = data[3];
1064 data_m[4] = data[4];
1065 data_m[5] = data[5];
1066 data_m[6] = data[6];
1067 data_m[7] = data[7];
1069 PASS0 (state, tmp, state_m, data_m, s_tables);
1070 PASS2 (state, tmp, state_m, data_m, s_tables);
1071 PASS4 (state, tmp, state_m, data_m, s_tables);
1072 PASS6 (state, tmp, state_m, data_m, s_tables);
1074 SHIFT12 (state_m, data, tmp);
1075 SHIFT16 (state, data_m, state_m);
1076 SHIFT61 (state, data_m);
1080 const u32x r0 = state[0];
1081 const u32x r1 = state[1];
1082 const u32x r2 = state[2];
1083 const u32x r3 = state[3];
1085 #include VECT_COMPARE_S
1089 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1095 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1096 const u32 lid = threadIdx.x;
1100 w0[0] = pws[gid].i[ 0];
1101 w0[1] = pws[gid].i[ 1];
1102 w0[2] = pws[gid].i[ 2];
1103 w0[3] = pws[gid].i[ 3];
1126 const u32 pw_len = pws[gid].pw_len;
1132 s_tables[0][lid] = c_tables[0][lid];
1133 s_tables[1][lid] = c_tables[1][lid];
1134 s_tables[2][lid] = c_tables[2][lid];
1135 s_tables[3][lid] = c_tables[3][lid];
1139 if (gid >= gid_max) return;
1145 m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1148 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1154 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1155 const u32 lid = threadIdx.x;
1159 w0[0] = pws[gid].i[ 0];
1160 w0[1] = pws[gid].i[ 1];
1161 w0[2] = pws[gid].i[ 2];
1162 w0[3] = pws[gid].i[ 3];
1166 w1[0] = pws[gid].i[ 4];
1167 w1[1] = pws[gid].i[ 5];
1168 w1[2] = pws[gid].i[ 6];
1169 w1[3] = pws[gid].i[ 7];
1185 const u32 pw_len = pws[gid].pw_len;
1191 s_tables[0][lid] = c_tables[0][lid];
1192 s_tables[1][lid] = c_tables[1][lid];
1193 s_tables[2][lid] = c_tables[2][lid];
1194 s_tables[3][lid] = c_tables[3][lid];
1198 if (gid >= gid_max) return;
1204 m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1207 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1211 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1217 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1218 const u32 lid = threadIdx.x;
1222 w0[0] = pws[gid].i[ 0];
1223 w0[1] = pws[gid].i[ 1];
1224 w0[2] = pws[gid].i[ 2];
1225 w0[3] = pws[gid].i[ 3];
1248 const u32 pw_len = pws[gid].pw_len;
1254 s_tables[0][lid] = c_tables[0][lid];
1255 s_tables[1][lid] = c_tables[1][lid];
1256 s_tables[2][lid] = c_tables[2][lid];
1257 s_tables[3][lid] = c_tables[3][lid];
1261 if (gid >= gid_max) return;
1267 m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1270 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1276 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1277 const u32 lid = threadIdx.x;
1281 w0[0] = pws[gid].i[ 0];
1282 w0[1] = pws[gid].i[ 1];
1283 w0[2] = pws[gid].i[ 2];
1284 w0[3] = pws[gid].i[ 3];
1288 w1[0] = pws[gid].i[ 4];
1289 w1[1] = pws[gid].i[ 5];
1290 w1[2] = pws[gid].i[ 6];
1291 w1[3] = pws[gid].i[ 7];
1307 const u32 pw_len = pws[gid].pw_len;
1313 s_tables[0][lid] = c_tables[0][lid];
1314 s_tables[1][lid] = c_tables[1][lid];
1315 s_tables[2][lid] = c_tables[2][lid];
1316 s_tables[3][lid] = c_tables[3][lid];
1320 if (gid >= gid_max) return;
1326 m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1329 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)