Merge pull request #230 from magnumripper/master
[hashcat.git] / OpenCL / simd.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 // vliw1
9
10 #if VECT_SIZE == 1
11
12 #define MATCHES_ONE_VV(a,b) ((a) == (b))
13 #define MATCHES_ONE_VS(a,b) ((a) == (b))
14
15 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
16 { \
17 if (((h0) == search[0]) && ((h1) == search[1]) && ((h2) == search[2]) && ((h3) == search[3])) \
18 { \
19 const u32 final_hash_pos = digests_offset + 0; \
20 \
21 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
22 { \
23 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); \
24 \
25 d_return_buf[lid] = 1; \
26 } \
27 } \
28 }
29
30 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
31 { \
32 const u32 digest_tp0[4] = { h0, h1, h2, h3 }; \
33 \
34 if (check (digest_tp0, \
35 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
36 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
37 bitmap_mask, \
38 bitmap_shift1, \
39 bitmap_shift2)) \
40 { \
41 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
42 \
43 if (hash_pos != -1) \
44 { \
45 const u32 final_hash_pos = digests_offset + hash_pos; \
46 \
47 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
48 { \
49 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); \
50 \
51 d_return_buf[lid] = 1; \
52 } \
53 } \
54 } \
55 }
56
57 #endif
58
59 // vliw2
60
61 #if VECT_SIZE == 2
62
63 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1))
64 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ))
65
66 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
67 { \
68 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
69 { \
70 const u32 final_hash_pos = digests_offset + 0; \
71 \
72 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
73 { \
74 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
75 \
76 d_return_buf[lid] = 1; \
77 } \
78 } \
79 \
80 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
81 { \
82 const u32 final_hash_pos = digests_offset + 0; \
83 \
84 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
85 { \
86 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
87 \
88 d_return_buf[lid] = 1; \
89 } \
90 } \
91 }
92
93 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
94 { \
95 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
96 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
97 \
98 if (check (digest_tp0, \
99 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
100 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
101 bitmap_mask, \
102 bitmap_shift1, \
103 bitmap_shift2)) \
104 { \
105 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
106 \
107 if (hash_pos != -1) \
108 { \
109 const u32 final_hash_pos = digests_offset + hash_pos; \
110 \
111 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
112 { \
113 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
114 \
115 d_return_buf[lid] = 1; \
116 } \
117 } \
118 } \
119 \
120 if (check (digest_tp1, \
121 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
122 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
123 bitmap_mask, \
124 bitmap_shift1, \
125 bitmap_shift2)) \
126 { \
127 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
128 \
129 if (hash_pos != -1) \
130 { \
131 const u32 final_hash_pos = digests_offset + hash_pos; \
132 \
133 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
134 { \
135 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
136 \
137 d_return_buf[lid] = 1; \
138 } \
139 } \
140 } \
141 }
142
143 #endif
144
145 // vliw4
146
147 #if VECT_SIZE == 4
148
149 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3))
150 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ))
151
152 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
153 { \
154 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
155 { \
156 const u32 final_hash_pos = digests_offset + 0; \
157 \
158 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
159 { \
160 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
161 \
162 d_return_buf[lid] = 1; \
163 } \
164 } \
165 \
166 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
167 { \
168 const u32 final_hash_pos = digests_offset + 0; \
169 \
170 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
171 { \
172 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
173 \
174 d_return_buf[lid] = 1; \
175 } \
176 } \
177 \
178 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
179 { \
180 const u32 final_hash_pos = digests_offset + 0; \
181 \
182 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
183 { \
184 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
185 \
186 d_return_buf[lid] = 1; \
187 } \
188 } \
189 \
190 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
191 { \
192 const u32 final_hash_pos = digests_offset + 0; \
193 \
194 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
195 { \
196 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
197 \
198 d_return_buf[lid] = 1; \
199 } \
200 } \
201 }
202
203 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
204 { \
205 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
206 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
207 const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
208 const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
209 \
210 if (check (digest_tp0, \
211 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
212 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
213 bitmap_mask, \
214 bitmap_shift1, \
215 bitmap_shift2)) \
216 { \
217 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
218 \
219 if (hash_pos != -1) \
220 { \
221 const u32 final_hash_pos = digests_offset + hash_pos; \
222 \
223 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
224 { \
225 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
226 \
227 d_return_buf[lid] = 1; \
228 } \
229 } \
230 } \
231 \
232 if (check (digest_tp1, \
233 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
234 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
235 bitmap_mask, \
236 bitmap_shift1, \
237 bitmap_shift2)) \
238 { \
239 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
240 \
241 if (hash_pos != -1) \
242 { \
243 const u32 final_hash_pos = digests_offset + hash_pos; \
244 \
245 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
246 { \
247 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
248 \
249 d_return_buf[lid] = 1; \
250 } \
251 } \
252 } \
253 \
254 if (check (digest_tp2, \
255 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
256 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
257 bitmap_mask, \
258 bitmap_shift1, \
259 bitmap_shift2)) \
260 { \
261 int hash_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
262 \
263 if (hash_pos != -1) \
264 { \
265 const u32 final_hash_pos = digests_offset + hash_pos; \
266 \
267 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
268 { \
269 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
270 \
271 d_return_buf[lid] = 1; \
272 } \
273 } \
274 } \
275 \
276 if (check (digest_tp3, \
277 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
278 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
279 bitmap_mask, \
280 bitmap_shift1, \
281 bitmap_shift2)) \
282 { \
283 int hash_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
284 \
285 if (hash_pos != -1) \
286 { \
287 const u32 final_hash_pos = digests_offset + hash_pos; \
288 \
289 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
290 { \
291 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
292 \
293 d_return_buf[lid] = 1; \
294 } \
295 } \
296 } \
297 }
298
299 #endif
300
301 // vliw8
302
303 #if VECT_SIZE == 8
304
305 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7))
306 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ))
307
308 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
309 { \
310 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
311 { \
312 const u32 final_hash_pos = digests_offset + 0; \
313 \
314 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
315 { \
316 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
317 \
318 d_return_buf[lid] = 1; \
319 } \
320 } \
321 \
322 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
323 { \
324 const u32 final_hash_pos = digests_offset + 0; \
325 \
326 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
327 { \
328 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
329 \
330 d_return_buf[lid] = 1; \
331 } \
332 } \
333 \
334 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
335 { \
336 const u32 final_hash_pos = digests_offset + 0; \
337 \
338 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
339 { \
340 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
341 \
342 d_return_buf[lid] = 1; \
343 } \
344 } \
345 \
346 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
347 { \
348 const u32 final_hash_pos = digests_offset + 0; \
349 \
350 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
351 { \
352 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
353 \
354 d_return_buf[lid] = 1; \
355 } \
356 } \
357 if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
358 { \
359 const u32 final_hash_pos = digests_offset + 0; \
360 \
361 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
362 { \
363 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
364 \
365 d_return_buf[lid] = 1; \
366 } \
367 } \
368 \
369 if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
370 { \
371 const u32 final_hash_pos = digests_offset + 0; \
372 \
373 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
374 { \
375 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
376 \
377 d_return_buf[lid] = 1; \
378 } \
379 } \
380 \
381 if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
382 { \
383 const u32 final_hash_pos = digests_offset + 0; \
384 \
385 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
386 { \
387 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
388 \
389 d_return_buf[lid] = 1; \
390 } \
391 } \
392 \
393 if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
394 { \
395 const u32 final_hash_pos = digests_offset + 0; \
396 \
397 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
398 { \
399 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
400 \
401 d_return_buf[lid] = 1; \
402 } \
403 } \
404 }
405
406 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
407 { \
408 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
409 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
410 const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
411 const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
412 const u32 digest_tp4[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
413 const u32 digest_tp5[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
414 const u32 digest_tp6[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
415 const u32 digest_tp7[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
416 \
417 if (check (digest_tp0, \
418 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
419 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
420 bitmap_mask, \
421 bitmap_shift1, \
422 bitmap_shift2)) \
423 { \
424 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
425 \
426 if (hash_pos != -1) \
427 { \
428 const u32 final_hash_pos = digests_offset + hash_pos; \
429 \
430 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
431 { \
432 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
433 \
434 d_return_buf[lid] = 1; \
435 } \
436 } \
437 } \
438 \
439 if (check (digest_tp1, \
440 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
441 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
442 bitmap_mask, \
443 bitmap_shift1, \
444 bitmap_shift2)) \
445 { \
446 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
447 \
448 if (hash_pos != -1) \
449 { \
450 const u32 final_hash_pos = digests_offset + hash_pos; \
451 \
452 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
453 { \
454 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
455 \
456 d_return_buf[lid] = 1; \
457 } \
458 } \
459 } \
460 \
461 if (check (digest_tp2, \
462 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
463 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
464 bitmap_mask, \
465 bitmap_shift1, \
466 bitmap_shift2)) \
467 { \
468 int hash_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
469 \
470 if (hash_pos != -1) \
471 { \
472 const u32 final_hash_pos = digests_offset + hash_pos; \
473 \
474 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
475 { \
476 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
477 \
478 d_return_buf[lid] = 1; \
479 } \
480 } \
481 } \
482 \
483 if (check (digest_tp3, \
484 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
485 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
486 bitmap_mask, \
487 bitmap_shift1, \
488 bitmap_shift2)) \
489 { \
490 int hash_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
491 \
492 if (hash_pos != -1) \
493 { \
494 const u32 final_hash_pos = digests_offset + hash_pos; \
495 \
496 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
497 { \
498 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
499 \
500 d_return_buf[lid] = 1; \
501 } \
502 } \
503 } \
504 if (check (digest_tp4, \
505 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
506 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
507 bitmap_mask, \
508 bitmap_shift1, \
509 bitmap_shift2)) \
510 { \
511 int hash_pos = find_hash (digest_tp4, digests_cnt, &digests_buf[digests_offset]); \
512 \
513 if (hash_pos != -1) \
514 { \
515 const u32 final_hash_pos = digests_offset + hash_pos; \
516 \
517 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
518 { \
519 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
520 \
521 d_return_buf[lid] = 1; \
522 } \
523 } \
524 } \
525 \
526 if (check (digest_tp5, \
527 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
528 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
529 bitmap_mask, \
530 bitmap_shift1, \
531 bitmap_shift2)) \
532 { \
533 int hash_pos = find_hash (digest_tp5, digests_cnt, &digests_buf[digests_offset]); \
534 \
535 if (hash_pos != -1) \
536 { \
537 const u32 final_hash_pos = digests_offset + hash_pos; \
538 \
539 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
540 { \
541 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
542 \
543 d_return_buf[lid] = 1; \
544 } \
545 } \
546 } \
547 \
548 if (check (digest_tp6, \
549 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
550 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
551 bitmap_mask, \
552 bitmap_shift1, \
553 bitmap_shift2)) \
554 { \
555 int hash_pos = find_hash (digest_tp6, digests_cnt, &digests_buf[digests_offset]); \
556 \
557 if (hash_pos != -1) \
558 { \
559 const u32 final_hash_pos = digests_offset + hash_pos; \
560 \
561 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
562 { \
563 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
564 \
565 d_return_buf[lid] = 1; \
566 } \
567 } \
568 } \
569 \
570 if (check (digest_tp7, \
571 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
572 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
573 bitmap_mask, \
574 bitmap_shift1, \
575 bitmap_shift2)) \
576 { \
577 int hash_pos = find_hash (digest_tp7, digests_cnt, &digests_buf[digests_offset]); \
578 \
579 if (hash_pos != -1) \
580 { \
581 const u32 final_hash_pos = digests_offset + hash_pos; \
582 \
583 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
584 { \
585 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
586 \
587 d_return_buf[lid] = 1; \
588 } \
589 } \
590 } \
591 }
592
593 #endif
594
595 // vliw16
596
597 #if VECT_SIZE == 16
598
599 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7) || ((a).s8 == (b).s8) || ((a).s9 == (b).s9) || ((a).sa == (b).sa) || ((a).sb == (b).sb) || ((a).sc == (b).sc) || ((a).sd == (b).sd) || ((a).se == (b).se) || ((a).sf == (b).sf))
600 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ) || ((a).s8 == (b) ) || ((a).s9 == (b) ) || ((a).sa == (b) ) || ((a).sb == (b) ) || ((a).sc == (b) ) || ((a).sd == (b) ) || ((a).se == (b) ) || ((a).sf == (b) ))
601
602 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
603 { \
604 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
605 { \
606 const u32 final_hash_pos = digests_offset + 0; \
607 \
608 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
609 { \
610 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
611 \
612 d_return_buf[lid] = 1; \
613 } \
614 } \
615 \
616 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
617 { \
618 const u32 final_hash_pos = digests_offset + 0; \
619 \
620 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
621 { \
622 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
623 \
624 d_return_buf[lid] = 1; \
625 } \
626 } \
627 \
628 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
629 { \
630 const u32 final_hash_pos = digests_offset + 0; \
631 \
632 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
633 { \
634 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
635 \
636 d_return_buf[lid] = 1; \
637 } \
638 } \
639 \
640 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
641 { \
642 const u32 final_hash_pos = digests_offset + 0; \
643 \
644 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
645 { \
646 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
647 \
648 d_return_buf[lid] = 1; \
649 } \
650 } \
651 if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
652 { \
653 const u32 final_hash_pos = digests_offset + 0; \
654 \
655 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
656 { \
657 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
658 \
659 d_return_buf[lid] = 1; \
660 } \
661 } \
662 \
663 if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
664 { \
665 const u32 final_hash_pos = digests_offset + 0; \
666 \
667 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
668 { \
669 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
670 \
671 d_return_buf[lid] = 1; \
672 } \
673 } \
674 \
675 if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
676 { \
677 const u32 final_hash_pos = digests_offset + 0; \
678 \
679 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
680 { \
681 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
682 \
683 d_return_buf[lid] = 1; \
684 } \
685 } \
686 \
687 if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
688 { \
689 const u32 final_hash_pos = digests_offset + 0; \
690 \
691 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
692 { \
693 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
694 \
695 d_return_buf[lid] = 1; \
696 } \
697 } \
698 \
699 if (((h0).s8 == search[0]) && ((h1).s8 == search[1]) && ((h2).s8 == search[2]) && ((h3).s8 == search[3])) \
700 { \
701 const u32 final_hash_pos = digests_offset + 0; \
702 \
703 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
704 { \
705 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 8); \
706 \
707 d_return_buf[lid] = 1; \
708 } \
709 } \
710 \
711 if (((h0).s9 == search[0]) && ((h1).s9 == search[1]) && ((h2).s9 == search[2]) && ((h3).s9 == search[3])) \
712 { \
713 const u32 final_hash_pos = digests_offset + 0; \
714 \
715 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
716 { \
717 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 9); \
718 \
719 d_return_buf[lid] = 1; \
720 } \
721 } \
722 \
723 if (((h0).sa == search[0]) && ((h1).sa == search[1]) && ((h2).sa == search[2]) && ((h3).sa == search[3])) \
724 { \
725 const u32 final_hash_pos = digests_offset + 0; \
726 \
727 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
728 { \
729 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 10); \
730 \
731 d_return_buf[lid] = 1; \
732 } \
733 } \
734 \
735 if (((h0).sb == search[0]) && ((h1).sb == search[1]) && ((h2).sb == search[2]) && ((h3).sb == search[3])) \
736 { \
737 const u32 final_hash_pos = digests_offset + 0; \
738 \
739 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
740 { \
741 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 11); \
742 \
743 d_return_buf[lid] = 1; \
744 } \
745 } \
746 \
747 if (((h0).sc == search[0]) && ((h1).sc == search[1]) && ((h2).sc == search[2]) && ((h3).sc == search[3])) \
748 { \
749 const u32 final_hash_pos = digests_offset + 0; \
750 \
751 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
752 { \
753 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 12); \
754 \
755 d_return_buf[lid] = 1; \
756 } \
757 } \
758 \
759 if (((h0).sd == search[0]) && ((h1).sd == search[1]) && ((h2).sd == search[2]) && ((h3).sd == search[3])) \
760 { \
761 const u32 final_hash_pos = digests_offset + 0; \
762 \
763 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
764 { \
765 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 13); \
766 \
767 d_return_buf[lid] = 1; \
768 } \
769 } \
770 \
771 if (((h0).se == search[0]) && ((h1).se == search[1]) && ((h2).se == search[2]) && ((h3).se == search[3])) \
772 { \
773 const u32 final_hash_pos = digests_offset + 0; \
774 \
775 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
776 { \
777 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 14); \
778 \
779 d_return_buf[lid] = 1; \
780 } \
781 } \
782 \
783 if (((h0).sf == search[0]) && ((h1).sf == search[1]) && ((h2).sf == search[2]) && ((h3).sf == search[3])) \
784 { \
785 const u32 final_hash_pos = digests_offset + 0; \
786 \
787 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
788 { \
789 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 15); \
790 \
791 d_return_buf[lid] = 1; \
792 } \
793 } \
794 }
795
796 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
797 { \
798 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
799 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
800 const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
801 const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
802 const u32 digest_tp4[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
803 const u32 digest_tp5[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
804 const u32 digest_tp6[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
805 const u32 digest_tp7[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
806 const u32 digest_tp8[4] = { h0.s8, h1.s8, h2.s8, h3.s8 }; \
807 const u32 digest_tp9[4] = { h0.s9, h1.s9, h2.s9, h3.s9 }; \
808 const u32 digest_tp10[4] = { h0.sa, h1.sa, h2.sa, h3.sa }; \
809 const u32 digest_tp11[4] = { h0.sb, h1.sb, h2.sb, h3.sb }; \
810 const u32 digest_tp12[4] = { h0.sc, h1.sc, h2.sc, h3.sc }; \
811 const u32 digest_tp13[4] = { h0.sd, h1.sd, h2.sd, h3.sd }; \
812 const u32 digest_tp14[4] = { h0.se, h1.se, h2.se, h3.se }; \
813 const u32 digest_tp15[4] = { h0.sf, h1.sf, h2.sf, h3.sf }; \
814 \
815 if (check (digest_tp0, \
816 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
817 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
818 bitmap_mask, \
819 bitmap_shift1, \
820 bitmap_shift2)) \
821 { \
822 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
823 \
824 if (hash_pos != -1) \
825 { \
826 const u32 final_hash_pos = digests_offset + hash_pos; \
827 \
828 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
829 { \
830 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
831 \
832 d_return_buf[lid] = 1; \
833 } \
834 } \
835 } \
836 \
837 if (check (digest_tp1, \
838 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
839 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
840 bitmap_mask, \
841 bitmap_shift1, \
842 bitmap_shift2)) \
843 { \
844 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
845 \
846 if (hash_pos != -1) \
847 { \
848 const u32 final_hash_pos = digests_offset + hash_pos; \
849 \
850 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
851 { \
852 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
853 \
854 d_return_buf[lid] = 1; \
855 } \
856 } \
857 } \
858 \
859 if (check (digest_tp2, \
860 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
861 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
862 bitmap_mask, \
863 bitmap_shift1, \
864 bitmap_shift2)) \
865 { \
866 int hash_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
867 \
868 if (hash_pos != -1) \
869 { \
870 const u32 final_hash_pos = digests_offset + hash_pos; \
871 \
872 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
873 { \
874 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
875 \
876 d_return_buf[lid] = 1; \
877 } \
878 } \
879 } \
880 \
881 if (check (digest_tp3, \
882 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
883 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
884 bitmap_mask, \
885 bitmap_shift1, \
886 bitmap_shift2)) \
887 { \
888 int hash_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
889 \
890 if (hash_pos != -1) \
891 { \
892 const u32 final_hash_pos = digests_offset + hash_pos; \
893 \
894 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
895 { \
896 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
897 \
898 d_return_buf[lid] = 1; \
899 } \
900 } \
901 } \
902 if (check (digest_tp4, \
903 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
904 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
905 bitmap_mask, \
906 bitmap_shift1, \
907 bitmap_shift2)) \
908 { \
909 int hash_pos = find_hash (digest_tp4, digests_cnt, &digests_buf[digests_offset]); \
910 \
911 if (hash_pos != -1) \
912 { \
913 const u32 final_hash_pos = digests_offset + hash_pos; \
914 \
915 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
916 { \
917 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
918 \
919 d_return_buf[lid] = 1; \
920 } \
921 } \
922 } \
923 \
924 if (check (digest_tp5, \
925 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
926 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
927 bitmap_mask, \
928 bitmap_shift1, \
929 bitmap_shift2)) \
930 { \
931 int hash_pos = find_hash (digest_tp5, digests_cnt, &digests_buf[digests_offset]); \
932 \
933 if (hash_pos != -1) \
934 { \
935 const u32 final_hash_pos = digests_offset + hash_pos; \
936 \
937 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
938 { \
939 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
940 \
941 d_return_buf[lid] = 1; \
942 } \
943 } \
944 } \
945 \
946 if (check (digest_tp6, \
947 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
948 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
949 bitmap_mask, \
950 bitmap_shift1, \
951 bitmap_shift2)) \
952 { \
953 int hash_pos = find_hash (digest_tp6, digests_cnt, &digests_buf[digests_offset]); \
954 \
955 if (hash_pos != -1) \
956 { \
957 const u32 final_hash_pos = digests_offset + hash_pos; \
958 \
959 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
960 { \
961 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
962 \
963 d_return_buf[lid] = 1; \
964 } \
965 } \
966 } \
967 \
968 if (check (digest_tp7, \
969 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
970 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
971 bitmap_mask, \
972 bitmap_shift1, \
973 bitmap_shift2)) \
974 { \
975 int hash_pos = find_hash (digest_tp7, digests_cnt, &digests_buf[digests_offset]); \
976 \
977 if (hash_pos != -1) \
978 { \
979 const u32 final_hash_pos = digests_offset + hash_pos; \
980 \
981 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
982 { \
983 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
984 \
985 d_return_buf[lid] = 1; \
986 } \
987 } \
988 } \
989 \
990 if (check (digest_tp8, \
991 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
992 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
993 bitmap_mask, \
994 bitmap_shift1, \
995 bitmap_shift2)) \
996 { \
997 int hash_pos = find_hash (digest_tp8, digests_cnt, &digests_buf[digests_offset]); \
998 \
999 if (hash_pos != -1) \
1000 { \
1001 const u32 final_hash_pos = digests_offset + hash_pos; \
1002 \
1003 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
1004 { \
1005 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 8); \
1006 \
1007 d_return_buf[lid] = 1; \
1008 } \
1009 } \
1010 } \
1011 \
1012 if (check (digest_tp9, \
1013 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1014 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1015 bitmap_mask, \
1016 bitmap_shift1, \
1017 bitmap_shift2)) \
1018 { \
1019 int hash_pos = find_hash (digest_tp9, digests_cnt, &digests_buf[digests_offset]); \
1020 \
1021 if (hash_pos != -1) \
1022 { \
1023 const u32 final_hash_pos = digests_offset + hash_pos; \
1024 \
1025 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
1026 { \
1027 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 9); \
1028 \
1029 d_return_buf[lid] = 1; \
1030 } \
1031 } \
1032 } \
1033 \
1034 if (check (digest_tp10, \
1035 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1036 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1037 bitmap_mask, \
1038 bitmap_shift1, \
1039 bitmap_shift2)) \
1040 { \
1041 int hash_pos = find_hash (digest_tp10, digests_cnt, &digests_buf[digests_offset]); \
1042 \
1043 if (hash_pos != -1) \
1044 { \
1045 const u32 final_hash_pos = digests_offset + hash_pos; \
1046 \
1047 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
1048 { \
1049 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 10); \
1050 \
1051 d_return_buf[lid] = 1; \
1052 } \
1053 } \
1054 } \
1055 \
1056 if (check (digest_tp11, \
1057 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1058 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1059 bitmap_mask, \
1060 bitmap_shift1, \
1061 bitmap_shift2)) \
1062 { \
1063 int hash_pos = find_hash (digest_tp11, digests_cnt, &digests_buf[digests_offset]); \
1064 \
1065 if (hash_pos != -1) \
1066 { \
1067 const u32 final_hash_pos = digests_offset + hash_pos; \
1068 \
1069 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
1070 { \
1071 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 11); \
1072 \
1073 d_return_buf[lid] = 1; \
1074 } \
1075 } \
1076 } \
1077 \
1078 if (check (digest_tp12, \
1079 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1080 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1081 bitmap_mask, \
1082 bitmap_shift1, \
1083 bitmap_shift2)) \
1084 { \
1085 int hash_pos = find_hash (digest_tp12, digests_cnt, &digests_buf[digests_offset]); \
1086 \
1087 if (hash_pos != -1) \
1088 { \
1089 const u32 final_hash_pos = digests_offset + hash_pos; \
1090 \
1091 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
1092 { \
1093 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 12); \
1094 \
1095 d_return_buf[lid] = 1; \
1096 } \
1097 } \
1098 } \
1099 \
1100 if (check (digest_tp13, \
1101 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1102 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1103 bitmap_mask, \
1104 bitmap_shift1, \
1105 bitmap_shift2)) \
1106 { \
1107 int hash_pos = find_hash (digest_tp13, digests_cnt, &digests_buf[digests_offset]); \
1108 \
1109 if (hash_pos != -1) \
1110 { \
1111 const u32 final_hash_pos = digests_offset + hash_pos; \
1112 \
1113 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
1114 { \
1115 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 13); \
1116 \
1117 d_return_buf[lid] = 1; \
1118 } \
1119 } \
1120 } \
1121 \
1122 if (check (digest_tp14, \
1123 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1124 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1125 bitmap_mask, \
1126 bitmap_shift1, \
1127 bitmap_shift2)) \
1128 { \
1129 int hash_pos = find_hash (digest_tp14, digests_cnt, &digests_buf[digests_offset]); \
1130 \
1131 if (hash_pos != -1) \
1132 { \
1133 const u32 final_hash_pos = digests_offset + hash_pos; \
1134 \
1135 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
1136 { \
1137 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 14); \
1138 \
1139 d_return_buf[lid] = 1; \
1140 } \
1141 } \
1142 } \
1143 \
1144 if (check (digest_tp15, \
1145 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1146 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1147 bitmap_mask, \
1148 bitmap_shift1, \
1149 bitmap_shift2)) \
1150 { \
1151 int hash_pos = find_hash (digest_tp15, digests_cnt, &digests_buf[digests_offset]); \
1152 \
1153 if (hash_pos != -1) \
1154 { \
1155 const u32 final_hash_pos = digests_offset + hash_pos; \
1156 \
1157 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
1158 { \
1159 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 15); \
1160 \
1161 d_return_buf[lid] = 1; \
1162 } \
1163 } \
1164 } \
1165 }
1166
1167 #endif
1168
1169 #define MATCHES_NONE_VV(a,b) !(MATCHES_ONE_VV ((a), (b)))
1170 #define MATCHES_NONE_VS(a,b) !(MATCHES_ONE_VS ((a), (b)))
1171
1172 // attack-mode 0
1173
1174 static inline u32x w0r_create_bft (__global bf_t *bfs_buf, const u32 il_pos)
1175 {
1176 #if VECT_SIZE == 1
1177 const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i);
1178 #elif VECT_SIZE == 2
1179 const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
1180 #elif VECT_SIZE == 4
1181 const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
1182 #elif VECT_SIZE == 8
1183 const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
1184 #elif VECT_SIZE == 16
1185 const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i);
1186 #endif
1187
1188 return w0r;
1189 }
1190
1191 #if VECT_SIZE == 1
1192 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 1) + 0].var[(idx)])
1193 #elif VECT_SIZE == 2
1194 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 2) + 0].var[(idx)], (arr)[((gid) * 2) + 1].var[(idx)])
1195 #elif VECT_SIZE == 4
1196 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 4) + 0].var[(idx)], (arr)[((gid) * 4) + 1].var[(idx)], (arr)[((gid) * 4) + 2].var[(idx)], (arr)[((gid) * 4) + 3].var[(idx)])
1197 #elif VECT_SIZE == 8
1198 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)])
1199 #elif VECT_SIZE == 16
1200 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)], (arr)[((gid) * 8) + 8].var[(idx)], (arr)[((gid) * 8) + 9].var[(idx)], (arr)[((gid) * 8) + 10].var[(idx)], (arr)[((gid) * 8) + 11].var[(idx)], (arr)[((gid) * 8) + 12].var[(idx)], (arr)[((gid) * 8) + 13].var[(idx)], (arr)[((gid) * 8) + 14].var[(idx)], (arr)[((gid) * 8) + 15].var[(idx)])
1201 #endif
1202
1203 #if VECT_SIZE == 1
1204 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 1) + 0].var[(idx)] = val;
1205 #elif VECT_SIZE == 2
1206 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 2) + 0].var[(idx)] = val.s0; (arr)[((gid) * 2) + 1].var[(idx)] = val.s1;
1207 #elif VECT_SIZE == 4
1208 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 4) + 0].var[(idx)] = val.s0; (arr)[((gid) * 4) + 1].var[(idx)] = val.s1; (arr)[((gid) * 4) + 2].var[(idx)] = val.s2; (arr)[((gid) * 4) + 3].var[(idx)] = val.s3;
1209 #elif VECT_SIZE == 8
1210 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] = val.s0; (arr)[((gid) * 8) + 1].var[(idx)] = val.s1; (arr)[((gid) * 8) + 2].var[(idx)] = val.s2; (arr)[((gid) * 8) + 3].var[(idx)] = val.s3; (arr)[((gid) * 8) + 4].var[(idx)] = val.s4; (arr)[((gid) * 8) + 5].var[(idx)] = val.s5; (arr)[((gid) * 8) + 6].var[(idx)] = val.s6; (arr)[((gid) * 8) + 7].var[(idx)] = val.s7;
1211 #elif VECT_SIZE == 16
1212 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] = val.s0; (arr)[((gid) * 8) + 1].var[(idx)] = val.s1; (arr)[((gid) * 8) + 2].var[(idx)] = val.s2; (arr)[((gid) * 8) + 3].var[(idx)] = val.s3; (arr)[((gid) * 8) + 4].var[(idx)] = val.s4; (arr)[((gid) * 8) + 5].var[(idx)] = val.s5; (arr)[((gid) * 8) + 6].var[(idx)] = val.s6; (arr)[((gid) * 8) + 7].var[(idx)] = val.s7; (arr)[((gid) * 8) + 8].var[(idx)] = val.s8; (arr)[((gid) * 8) + 9].var[(idx)] = val.s9; (arr)[((gid) * 8) + 10].var[(idx)] = val.sa; (arr)[((gid) * 8) + 11].var[(idx)] = val.sb; (arr)[((gid) * 8) + 12].var[(idx)] = val.sc; (arr)[((gid) * 8) + 13].var[(idx)] = val.sd; (arr)[((gid) * 8) + 14].var[(idx)] = val.se; (arr)[((gid) * 8) + 15].var[(idx)] = val.sf;
1213 #endif