2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
12 #define MATCHES_ONE_VV(a,b) ((a) == (b))
13 #define MATCHES_ONE_VS(a,b) ((a) == (b))
15 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
17 if (((h0) == search[0]) && ((h1) == search[1]) && ((h2) == search[2]) && ((h3) == search[3])) \
19 const u32 final_hash_pos = digests_offset + 0; \
21 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
23 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); \
25 d_return_buf[lid] = 1; \
30 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
32 const u32 digest_tp0[4] = { h0, h1, h2, h3 }; \
34 if (check (digest_tp0, \
35 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
36 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
41 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
45 const u32 final_hash_pos = digests_offset + hash_pos; \
47 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
49 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); \
51 d_return_buf[lid] = 1; \
61 #define vector_accessible(p,c,e) (((p) + (e)) < (c))
65 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1))
66 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ))
68 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
70 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
72 const u32 final_hash_pos = digests_offset + 0; \
74 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
76 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
78 d_return_buf[lid] = 1; \
82 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
84 const u32 final_hash_pos = digests_offset + 0; \
86 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
88 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
90 d_return_buf[lid] = 1; \
95 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
97 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
98 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
100 if (check (digest_tp0, \
101 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
102 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
107 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
109 if (hash_pos != -1) \
111 const u32 final_hash_pos = digests_offset + hash_pos; \
113 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
115 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
117 d_return_buf[lid] = 1; \
122 if (check (digest_tp1, \
123 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
124 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
129 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
131 if (hash_pos != -1) \
133 const u32 final_hash_pos = digests_offset + hash_pos; \
135 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
137 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
139 d_return_buf[lid] = 1; \
151 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3))
152 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ))
154 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
156 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
158 const u32 final_hash_pos = digests_offset + 0; \
160 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
162 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
164 d_return_buf[lid] = 1; \
168 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
170 const u32 final_hash_pos = digests_offset + 0; \
172 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
174 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
176 d_return_buf[lid] = 1; \
180 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
182 const u32 final_hash_pos = digests_offset + 0; \
184 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
186 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
188 d_return_buf[lid] = 1; \
192 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
194 const u32 final_hash_pos = digests_offset + 0; \
196 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
198 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
200 d_return_buf[lid] = 1; \
205 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
207 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
208 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
209 const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
210 const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
212 if (check (digest_tp0, \
213 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
214 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
219 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
221 if (hash_pos != -1) \
223 const u32 final_hash_pos = digests_offset + hash_pos; \
225 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
227 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
229 d_return_buf[lid] = 1; \
234 if (check (digest_tp1, \
235 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
236 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
241 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
243 if (hash_pos != -1) \
245 const u32 final_hash_pos = digests_offset + hash_pos; \
247 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
249 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
251 d_return_buf[lid] = 1; \
256 if (check (digest_tp2, \
257 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
258 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
263 int hash_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
265 if (hash_pos != -1) \
267 const u32 final_hash_pos = digests_offset + hash_pos; \
269 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
271 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
273 d_return_buf[lid] = 1; \
278 if (check (digest_tp3, \
279 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
280 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
285 int hash_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
287 if (hash_pos != -1) \
289 const u32 final_hash_pos = digests_offset + hash_pos; \
291 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
293 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
295 d_return_buf[lid] = 1; \
307 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7))
308 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ))
310 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
312 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
314 const u32 final_hash_pos = digests_offset + 0; \
316 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
318 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
320 d_return_buf[lid] = 1; \
324 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
326 const u32 final_hash_pos = digests_offset + 0; \
328 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
330 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
332 d_return_buf[lid] = 1; \
336 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
338 const u32 final_hash_pos = digests_offset + 0; \
340 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
342 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
344 d_return_buf[lid] = 1; \
348 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
350 const u32 final_hash_pos = digests_offset + 0; \
352 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
354 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
356 d_return_buf[lid] = 1; \
359 if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
361 const u32 final_hash_pos = digests_offset + 0; \
363 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
365 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
367 d_return_buf[lid] = 1; \
371 if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
373 const u32 final_hash_pos = digests_offset + 0; \
375 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
377 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
379 d_return_buf[lid] = 1; \
383 if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
385 const u32 final_hash_pos = digests_offset + 0; \
387 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
389 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
391 d_return_buf[lid] = 1; \
395 if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
397 const u32 final_hash_pos = digests_offset + 0; \
399 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
401 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
403 d_return_buf[lid] = 1; \
408 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
410 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
411 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
412 const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
413 const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
414 const u32 digest_tp4[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
415 const u32 digest_tp5[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
416 const u32 digest_tp6[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
417 const u32 digest_tp7[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
419 if (check (digest_tp0, \
420 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
421 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
426 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
428 if (hash_pos != -1) \
430 const u32 final_hash_pos = digests_offset + hash_pos; \
432 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
434 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
436 d_return_buf[lid] = 1; \
441 if (check (digest_tp1, \
442 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
443 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
448 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
450 if (hash_pos != -1) \
452 const u32 final_hash_pos = digests_offset + hash_pos; \
454 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
456 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
458 d_return_buf[lid] = 1; \
463 if (check (digest_tp2, \
464 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
465 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
470 int hash_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
472 if (hash_pos != -1) \
474 const u32 final_hash_pos = digests_offset + hash_pos; \
476 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
478 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
480 d_return_buf[lid] = 1; \
485 if (check (digest_tp3, \
486 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
487 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
492 int hash_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
494 if (hash_pos != -1) \
496 const u32 final_hash_pos = digests_offset + hash_pos; \
498 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
500 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
502 d_return_buf[lid] = 1; \
506 if (check (digest_tp4, \
507 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
508 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
513 int hash_pos = find_hash (digest_tp4, digests_cnt, &digests_buf[digests_offset]); \
515 if (hash_pos != -1) \
517 const u32 final_hash_pos = digests_offset + hash_pos; \
519 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
521 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
523 d_return_buf[lid] = 1; \
528 if (check (digest_tp5, \
529 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
530 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
535 int hash_pos = find_hash (digest_tp5, digests_cnt, &digests_buf[digests_offset]); \
537 if (hash_pos != -1) \
539 const u32 final_hash_pos = digests_offset + hash_pos; \
541 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
543 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
545 d_return_buf[lid] = 1; \
550 if (check (digest_tp6, \
551 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
552 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
557 int hash_pos = find_hash (digest_tp6, digests_cnt, &digests_buf[digests_offset]); \
559 if (hash_pos != -1) \
561 const u32 final_hash_pos = digests_offset + hash_pos; \
563 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
565 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
567 d_return_buf[lid] = 1; \
572 if (check (digest_tp7, \
573 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
574 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
579 int hash_pos = find_hash (digest_tp7, digests_cnt, &digests_buf[digests_offset]); \
581 if (hash_pos != -1) \
583 const u32 final_hash_pos = digests_offset + hash_pos; \
585 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
587 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
589 d_return_buf[lid] = 1; \
601 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7) || ((a).s8 == (b).s8) || ((a).s9 == (b).s9) || ((a).sa == (b).sa) || ((a).sb == (b).sb) || ((a).sc == (b).sc) || ((a).sd == (b).sd) || ((a).se == (b).se) || ((a).sf == (b).sf))
602 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ) || ((a).s8 == (b) ) || ((a).s9 == (b) ) || ((a).sa == (b) ) || ((a).sb == (b) ) || ((a).sc == (b) ) || ((a).sd == (b) ) || ((a).se == (b) ) || ((a).sf == (b) ))
604 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
606 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
608 const u32 final_hash_pos = digests_offset + 0; \
610 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
612 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
614 d_return_buf[lid] = 1; \
618 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
620 const u32 final_hash_pos = digests_offset + 0; \
622 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
624 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
626 d_return_buf[lid] = 1; \
630 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
632 const u32 final_hash_pos = digests_offset + 0; \
634 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
636 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
638 d_return_buf[lid] = 1; \
642 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
644 const u32 final_hash_pos = digests_offset + 0; \
646 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
648 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
650 d_return_buf[lid] = 1; \
653 if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
655 const u32 final_hash_pos = digests_offset + 0; \
657 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
659 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
661 d_return_buf[lid] = 1; \
665 if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
667 const u32 final_hash_pos = digests_offset + 0; \
669 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
671 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
673 d_return_buf[lid] = 1; \
677 if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
679 const u32 final_hash_pos = digests_offset + 0; \
681 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
683 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
685 d_return_buf[lid] = 1; \
689 if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
691 const u32 final_hash_pos = digests_offset + 0; \
693 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
695 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
697 d_return_buf[lid] = 1; \
701 if (((h0).s8 == search[0]) && ((h1).s8 == search[1]) && ((h2).s8 == search[2]) && ((h3).s8 == search[3])) \
703 const u32 final_hash_pos = digests_offset + 0; \
705 if (vector_accessible (il_pos, il_cnt, 8) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
707 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 8); \
709 d_return_buf[lid] = 1; \
713 if (((h0).s9 == search[0]) && ((h1).s9 == search[1]) && ((h2).s9 == search[2]) && ((h3).s9 == search[3])) \
715 const u32 final_hash_pos = digests_offset + 0; \
717 if (vector_accessible (il_pos, il_cnt, 9) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
719 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 9); \
721 d_return_buf[lid] = 1; \
725 if (((h0).sa == search[0]) && ((h1).sa == search[1]) && ((h2).sa == search[2]) && ((h3).sa == search[3])) \
727 const u32 final_hash_pos = digests_offset + 0; \
729 if (vector_accessible (il_pos, il_cnt, 10) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
731 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 10); \
733 d_return_buf[lid] = 1; \
737 if (((h0).sb == search[0]) && ((h1).sb == search[1]) && ((h2).sb == search[2]) && ((h3).sb == search[3])) \
739 const u32 final_hash_pos = digests_offset + 0; \
741 if (vector_accessible (il_pos, il_cnt, 11) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
743 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 11); \
745 d_return_buf[lid] = 1; \
749 if (((h0).sc == search[0]) && ((h1).sc == search[1]) && ((h2).sc == search[2]) && ((h3).sc == search[3])) \
751 const u32 final_hash_pos = digests_offset + 0; \
753 if (vector_accessible (il_pos, il_cnt, 12) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
755 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 12); \
757 d_return_buf[lid] = 1; \
761 if (((h0).sd == search[0]) && ((h1).sd == search[1]) && ((h2).sd == search[2]) && ((h3).sd == search[3])) \
763 const u32 final_hash_pos = digests_offset + 0; \
765 if (vector_accessible (il_pos, il_cnt, 13) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
767 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 13); \
769 d_return_buf[lid] = 1; \
773 if (((h0).se == search[0]) && ((h1).se == search[1]) && ((h2).se == search[2]) && ((h3).se == search[3])) \
775 const u32 final_hash_pos = digests_offset + 0; \
777 if (vector_accessible (il_pos, il_cnt, 14) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
779 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 14); \
781 d_return_buf[lid] = 1; \
785 if (((h0).sf == search[0]) && ((h1).sf == search[1]) && ((h2).sf == search[2]) && ((h3).sf == search[3])) \
787 const u32 final_hash_pos = digests_offset + 0; \
789 if (vector_accessible (il_pos, il_cnt, 15) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
791 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 15); \
793 d_return_buf[lid] = 1; \
798 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
800 const u32 digest_tp00[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
801 const u32 digest_tp01[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
802 const u32 digest_tp02[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
803 const u32 digest_tp03[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
804 const u32 digest_tp04[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
805 const u32 digest_tp05[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
806 const u32 digest_tp06[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
807 const u32 digest_tp07[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
808 const u32 digest_tp08[4] = { h0.s8, h1.s8, h2.s8, h3.s8 }; \
809 const u32 digest_tp09[4] = { h0.s9, h1.s9, h2.s9, h3.s9 }; \
810 const u32 digest_tp10[4] = { h0.sa, h1.sa, h2.sa, h3.sa }; \
811 const u32 digest_tp11[4] = { h0.sb, h1.sb, h2.sb, h3.sb }; \
812 const u32 digest_tp12[4] = { h0.sc, h1.sc, h2.sc, h3.sc }; \
813 const u32 digest_tp13[4] = { h0.sd, h1.sd, h2.sd, h3.sd }; \
814 const u32 digest_tp14[4] = { h0.se, h1.se, h2.se, h3.se }; \
815 const u32 digest_tp15[4] = { h0.sf, h1.sf, h2.sf, h3.sf }; \
817 if (check (digest_tp00, \
818 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
819 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
824 int hash_pos = find_hash (digest_tp00, digests_cnt, &digests_buf[digests_offset]); \
826 if (hash_pos != -1) \
828 const u32 final_hash_pos = digests_offset + hash_pos; \
830 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
832 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
834 d_return_buf[lid] = 1; \
839 if (check (digest_tp01, \
840 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
841 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
846 int hash_pos = find_hash (digest_tp01, digests_cnt, &digests_buf[digests_offset]); \
848 if (hash_pos != -1) \
850 const u32 final_hash_pos = digests_offset + hash_pos; \
852 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
854 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
856 d_return_buf[lid] = 1; \
861 if (check (digest_tp02, \
862 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
863 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
868 int hash_pos = find_hash (digest_tp02, digests_cnt, &digests_buf[digests_offset]); \
870 if (hash_pos != -1) \
872 const u32 final_hash_pos = digests_offset + hash_pos; \
874 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
876 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
878 d_return_buf[lid] = 1; \
883 if (check (digest_tp03, \
884 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
885 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
890 int hash_pos = find_hash (digest_tp03, digests_cnt, &digests_buf[digests_offset]); \
892 if (hash_pos != -1) \
894 const u32 final_hash_pos = digests_offset + hash_pos; \
896 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
898 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
900 d_return_buf[lid] = 1; \
905 if (check (digest_tp04, \
906 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
907 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
912 int hash_pos = find_hash (digest_tp04, digests_cnt, &digests_buf[digests_offset]); \
914 if (hash_pos != -1) \
916 const u32 final_hash_pos = digests_offset + hash_pos; \
918 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
920 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
922 d_return_buf[lid] = 1; \
927 if (check (digest_tp05, \
928 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
929 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
934 int hash_pos = find_hash (digest_tp05, digests_cnt, &digests_buf[digests_offset]); \
936 if (hash_pos != -1) \
938 const u32 final_hash_pos = digests_offset + hash_pos; \
940 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
942 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
944 d_return_buf[lid] = 1; \
949 if (check (digest_tp06, \
950 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
951 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
956 int hash_pos = find_hash (digest_tp06, digests_cnt, &digests_buf[digests_offset]); \
958 if (hash_pos != -1) \
960 const u32 final_hash_pos = digests_offset + hash_pos; \
962 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
964 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
966 d_return_buf[lid] = 1; \
971 if (check (digest_tp07, \
972 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
973 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
978 int hash_pos = find_hash (digest_tp07, digests_cnt, &digests_buf[digests_offset]); \
980 if (hash_pos != -1) \
982 const u32 final_hash_pos = digests_offset + hash_pos; \
984 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
986 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
988 d_return_buf[lid] = 1; \
993 if (check (digest_tp08, \
994 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
995 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1000 int hash_pos = find_hash (digest_tp08, digests_cnt, &digests_buf[digests_offset]); \
1002 if (hash_pos != -1) \
1004 const u32 final_hash_pos = digests_offset + hash_pos; \
1006 if (vector_accessible (il_pos, il_cnt, 8) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1008 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 8); \
1010 d_return_buf[lid] = 1; \
1015 if (check (digest_tp09, \
1016 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1017 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1022 int hash_pos = find_hash (digest_tp09, digests_cnt, &digests_buf[digests_offset]); \
1024 if (hash_pos != -1) \
1026 const u32 final_hash_pos = digests_offset + hash_pos; \
1028 if (vector_accessible (il_pos, il_cnt, 9) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1030 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 9); \
1032 d_return_buf[lid] = 1; \
1037 if (check (digest_tp10, \
1038 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1039 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1044 int hash_pos = find_hash (digest_tp10, digests_cnt, &digests_buf[digests_offset]); \
1046 if (hash_pos != -1) \
1048 const u32 final_hash_pos = digests_offset + hash_pos; \
1050 if (vector_accessible (il_pos, il_cnt, 10) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1052 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 10); \
1054 d_return_buf[lid] = 1; \
1059 if (check (digest_tp11, \
1060 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1061 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1066 int hash_pos = find_hash (digest_tp11, digests_cnt, &digests_buf[digests_offset]); \
1068 if (hash_pos != -1) \
1070 const u32 final_hash_pos = digests_offset + hash_pos; \
1072 if (vector_accessible (il_pos, il_cnt, 11) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1074 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 11); \
1076 d_return_buf[lid] = 1; \
1081 if (check (digest_tp12, \
1082 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1083 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1088 int hash_pos = find_hash (digest_tp12, digests_cnt, &digests_buf[digests_offset]); \
1090 if (hash_pos != -1) \
1092 const u32 final_hash_pos = digests_offset + hash_pos; \
1094 if (vector_accessible (il_pos, il_cnt, 12) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1096 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 12); \
1098 d_return_buf[lid] = 1; \
1103 if (check (digest_tp13, \
1104 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1105 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1110 int hash_pos = find_hash (digest_tp13, digests_cnt, &digests_buf[digests_offset]); \
1112 if (hash_pos != -1) \
1114 const u32 final_hash_pos = digests_offset + hash_pos; \
1116 if (vector_accessible (il_pos, il_cnt, 13) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1118 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 13); \
1120 d_return_buf[lid] = 1; \
1125 if (check (digest_tp14, \
1126 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1127 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1132 int hash_pos = find_hash (digest_tp14, digests_cnt, &digests_buf[digests_offset]); \
1134 if (hash_pos != -1) \
1136 const u32 final_hash_pos = digests_offset + hash_pos; \
1138 if (vector_accessible (il_pos, il_cnt, 14) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1140 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 14); \
1142 d_return_buf[lid] = 1; \
1147 if (check (digest_tp15, \
1148 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1149 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1154 int hash_pos = find_hash (digest_tp15, digests_cnt, &digests_buf[digests_offset]); \
1156 if (hash_pos != -1) \
1158 const u32 final_hash_pos = digests_offset + hash_pos; \
1160 if (vector_accessible (il_pos, il_cnt, 15) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1162 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 15); \
1164 d_return_buf[lid] = 1; \
1172 #define MATCHES_NONE_VV(a,b) !(MATCHES_ONE_VV ((a), (b)))
1173 #define MATCHES_NONE_VS(a,b) !(MATCHES_ONE_VS ((a), (b)))
1177 static inline u32x
ix_create_bft (__global bf_t
*bfs_buf
, const u32 il_pos
)
1180 const u32x ix
= (u32x
) (bfs_buf
[il_pos
+ 0].i
);
1181 #elif VECT_SIZE == 2
1182 const u32x ix
= (u32x
) (bfs_buf
[il_pos
+ 0].i
, bfs_buf
[il_pos
+ 1].i
);
1183 #elif VECT_SIZE == 4
1184 const u32x ix
= (u32x
) (bfs_buf
[il_pos
+ 0].i
, bfs_buf
[il_pos
+ 1].i
, bfs_buf
[il_pos
+ 2].i
, bfs_buf
[il_pos
+ 3].i
);
1185 #elif VECT_SIZE == 8
1186 const u32x ix
= (u32x
) (bfs_buf
[il_pos
+ 0].i
, bfs_buf
[il_pos
+ 1].i
, bfs_buf
[il_pos
+ 2].i
, bfs_buf
[il_pos
+ 3].i
, bfs_buf
[il_pos
+ 4].i
, bfs_buf
[il_pos
+ 5].i
, bfs_buf
[il_pos
+ 6].i
, bfs_buf
[il_pos
+ 7].i
);
1187 #elif VECT_SIZE == 16
1188 const u32x ix
= (u32x
) (bfs_buf
[il_pos
+ 0].i
, bfs_buf
[il_pos
+ 1].i
, bfs_buf
[il_pos
+ 2].i
, bfs_buf
[il_pos
+ 3].i
, bfs_buf
[il_pos
+ 4].i
, bfs_buf
[il_pos
+ 5].i
, bfs_buf
[il_pos
+ 6].i
, bfs_buf
[il_pos
+ 7].i
, bfs_buf
[il_pos
+ 8].i
, bfs_buf
[il_pos
+ 9].i
, bfs_buf
[il_pos
+ 10].i
, bfs_buf
[il_pos
+ 11].i
, bfs_buf
[il_pos
+ 12].i
, bfs_buf
[il_pos
+ 13].i
, bfs_buf
[il_pos
+ 14].i
, bfs_buf
[il_pos
+ 15].i
);
1196 static inline u32x
pwlenx_create_combt (__global comb_t
*combs_buf
, const u32 il_pos
)
1199 const u32x pw_lenx
= (u32x
) (combs_buf
[il_pos
+ 0].pw_len
);
1200 #elif VECT_SIZE == 2
1201 const u32x pw_lenx
= (u32x
) (combs_buf
[il_pos
+ 0].pw_len
, combs_buf
[il_pos
+ 1].pw_len
);
1202 #elif VECT_SIZE == 4
1203 const u32x pw_lenx
= (u32x
) (combs_buf
[il_pos
+ 0].pw_len
, combs_buf
[il_pos
+ 1].pw_len
, combs_buf
[il_pos
+ 2].pw_len
, combs_buf
[il_pos
+ 3].pw_len
);
1204 #elif VECT_SIZE == 8
1205 const u32x pw_lenx
= (u32x
) (combs_buf
[il_pos
+ 0].pw_len
, combs_buf
[il_pos
+ 1].pw_len
, combs_buf
[il_pos
+ 2].pw_len
, combs_buf
[il_pos
+ 3].pw_len
, combs_buf
[il_pos
+ 4].pw_len
, combs_buf
[il_pos
+ 5].pw_len
, combs_buf
[il_pos
+ 6].pw_len
, combs_buf
[il_pos
+ 7].pw_len
);
1206 #elif VECT_SIZE == 16
1207 const u32x pw_lenx
= (u32x
) (combs_buf
[il_pos
+ 0].pw_len
, combs_buf
[il_pos
+ 1].pw_len
, combs_buf
[il_pos
+ 2].pw_len
, combs_buf
[il_pos
+ 3].pw_len
, combs_buf
[il_pos
+ 4].pw_len
, combs_buf
[il_pos
+ 5].pw_len
, combs_buf
[il_pos
+ 6].pw_len
, combs_buf
[il_pos
+ 7].pw_len
, combs_buf
[il_pos
+ 8].pw_len
, combs_buf
[il_pos
+ 9].pw_len
, combs_buf
[il_pos
+ 10].pw_len
, combs_buf
[il_pos
+ 11].pw_len
, combs_buf
[il_pos
+ 12].pw_len
, combs_buf
[il_pos
+ 13].pw_len
, combs_buf
[il_pos
+ 14].pw_len
, combs_buf
[il_pos
+ 15].pw_len
);
1213 static inline u32x
ix_create_combt (__global comb_t
*combs_buf
, const u32 il_pos
, const int idx
)
1216 const u32x ix
= (u32x
) (combs_buf
[il_pos
+ 0].i
[idx
]);
1217 #elif VECT_SIZE == 2
1218 const u32x ix
= (u32x
) (combs_buf
[il_pos
+ 0].i
[idx
], combs_buf
[il_pos
+ 1].i
[idx
]);
1219 #elif VECT_SIZE == 4
1220 const u32x ix
= (u32x
) (combs_buf
[il_pos
+ 0].i
[idx
], combs_buf
[il_pos
+ 1].i
[idx
], combs_buf
[il_pos
+ 2].i
[idx
], combs_buf
[il_pos
+ 3].i
[idx
]);
1221 #elif VECT_SIZE == 8
1222 const u32x ix
= (u32x
) (combs_buf
[il_pos
+ 0].i
[idx
], combs_buf
[il_pos
+ 1].i
[idx
], combs_buf
[il_pos
+ 2].i
[idx
], combs_buf
[il_pos
+ 3].i
[idx
], combs_buf
[il_pos
+ 4].i
[idx
], combs_buf
[il_pos
+ 5].i
[idx
], combs_buf
[il_pos
+ 6].i
[idx
], combs_buf
[il_pos
+ 7].i
[idx
]);
1223 #elif VECT_SIZE == 16
1224 const u32x ix
= (u32x
) (combs_buf
[il_pos
+ 0].i
[idx
], combs_buf
[il_pos
+ 1].i
[idx
], combs_buf
[il_pos
+ 2].i
[idx
], combs_buf
[il_pos
+ 3].i
[idx
], combs_buf
[il_pos
+ 4].i
[idx
], combs_buf
[il_pos
+ 5].i
[idx
], combs_buf
[il_pos
+ 6].i
[idx
], combs_buf
[il_pos
+ 7].i
[idx
], combs_buf
[il_pos
+ 8].i
[idx
], combs_buf
[il_pos
+ 9].i
[idx
], combs_buf
[il_pos
+ 10].i
[idx
], combs_buf
[il_pos
+ 11].i
[idx
], combs_buf
[il_pos
+ 12].i
[idx
], combs_buf
[il_pos
+ 13].i
[idx
], combs_buf
[il_pos
+ 14].i
[idx
], combs_buf
[il_pos
+ 15].i
[idx
]);
1231 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 1) + 0].var[(idx)])
1232 #elif VECT_SIZE == 2
1233 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 2) + 0].var[(idx)], (arr)[((gid) * 2) + 1].var[(idx)])
1234 #elif VECT_SIZE == 4
1235 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 4) + 0].var[(idx)], (arr)[((gid) * 4) + 1].var[(idx)], (arr)[((gid) * 4) + 2].var[(idx)], (arr)[((gid) * 4) + 3].var[(idx)])
1236 #elif VECT_SIZE == 8
1237 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)])
1238 #elif VECT_SIZE == 16
1239 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)])
1243 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 1) + 0].var[(idx)] = val;
1244 #elif VECT_SIZE == 2
1245 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 2) + 0].var[(idx)] = val.s0; (arr)[((gid) * 2) + 1].var[(idx)] = val.s1;
1246 #elif VECT_SIZE == 4
1247 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 4) + 0].var[(idx)] = val.s0; (arr)[((gid) * 4) + 1].var[(idx)] = val.s1; (arr)[((gid) * 4) + 2].var[(idx)] = val.s2; (arr)[((gid) * 4) + 3].var[(idx)] = val.s3;
1248 #elif VECT_SIZE == 8
1249 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] = val.s0; (arr)[((gid) * 8) + 1].var[(idx)] = val.s1; (arr)[((gid) * 8) + 2].var[(idx)] = val.s2; (arr)[((gid) * 8) + 3].var[(idx)] = val.s3; (arr)[((gid) * 8) + 4].var[(idx)] = val.s4; (arr)[((gid) * 8) + 5].var[(idx)] = val.s5; (arr)[((gid) * 8) + 6].var[(idx)] = val.s6; (arr)[((gid) * 8) + 7].var[(idx)] = val.s7;
1250 #elif VECT_SIZE == 16
1251 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] = val.s0; (arr)[((gid) * 16) + 1].var[(idx)] = val.s1; (arr)[((gid) * 16) + 2].var[(idx)] = val.s2; (arr)[((gid) * 16) + 3].var[(idx)] = val.s3; (arr)[((gid) * 16) + 4].var[(idx)] = val.s4; (arr)[((gid) * 16) + 5].var[(idx)] = val.s5; (arr)[((gid) * 16) + 6].var[(idx)] = val.s6; (arr)[((gid) * 16) + 7].var[(idx)] = val.s7; (arr)[((gid) * 16) + 8].var[(idx)] = val.s8; (arr)[((gid) * 16) + 9].var[(idx)] = val.s9; (arr)[((gid) * 16) + 10].var[(idx)] = val.sa; (arr)[((gid) * 16) + 11].var[(idx)] = val.sb; (arr)[((gid) * 16) + 12].var[(idx)] = val.sc; (arr)[((gid) * 16) + 13].var[(idx)] = val.sd; (arr)[((gid) * 16) + 14].var[(idx)] = val.se; (arr)[((gid) * 16) + 15].var[(idx)] = val.sf;