2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
8 inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len);
9 inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len);
10 inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4]);
12 inline u32 generate_cmask (u32 buf)
14 const u32 rmask = ((buf & 0x40404040) >> 1)
15 & ~((buf & 0x80808080) >> 2);
17 const u32 hmask = (buf & 0x1f1f1f1f) + 0x05050505;
18 const u32 lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f;
20 return rmask & ~hmask & lmask;
23 inline void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
25 const u32 tmp = (1 << ((len % 4) * 8)) - 1;
76 inline void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
78 const u32 tmp = ~((1 << ((len % 4) * 8)) - 1);
129 inline void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
132 out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321);
133 out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321);
134 out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321);
135 out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321);
136 out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321);
137 out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321);
138 out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321);
139 out1[3] = __byte_perm_S (in1[3], 0, 0x4321);
142 #if defined IS_AMD || defined IS_GENERIC
143 out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
144 out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
145 out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
146 out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
147 out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
148 out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
149 out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
150 out1[3] = amd_bytealign_S ( 0, in1[3], 1);
154 inline void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
157 out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543);
158 out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543);
159 out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543);
160 out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543);
161 out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543);
162 out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543);
163 out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543);
164 out0[0] = __byte_perm_S ( 0, in0[0], 0x6543);
167 #if defined IS_AMD || defined IS_GENERIC
168 out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
169 out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
170 out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
171 out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
172 out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
173 out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
174 out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
175 out0[0] = amd_bytealign_S (in0[0], 0, 3);
179 inline void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
184 case 0: out0[0] = in0[0];
193 case 1: out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321);
194 out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321);
195 out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321);
196 out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321);
197 out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321);
198 out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321);
199 out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321);
200 out1[3] = __byte_perm_S (in1[3], 0, 0x4321);
202 case 2: out0[0] = __byte_perm_S (in0[0], in0[1], 0x5432);
203 out0[1] = __byte_perm_S (in0[1], in0[2], 0x5432);
204 out0[2] = __byte_perm_S (in0[2], in0[3], 0x5432);
205 out0[3] = __byte_perm_S (in0[3], in1[0], 0x5432);
206 out1[0] = __byte_perm_S (in1[0], in1[1], 0x5432);
207 out1[1] = __byte_perm_S (in1[1], in1[2], 0x5432);
208 out1[2] = __byte_perm_S (in1[2], in1[3], 0x5432);
209 out1[3] = __byte_perm_S (in1[3], 0, 0x5432);
211 case 3: out0[0] = __byte_perm_S (in0[0], in0[1], 0x6543);
212 out0[1] = __byte_perm_S (in0[1], in0[2], 0x6543);
213 out0[2] = __byte_perm_S (in0[2], in0[3], 0x6543);
214 out0[3] = __byte_perm_S (in0[3], in1[0], 0x6543);
215 out1[0] = __byte_perm_S (in1[0], in1[1], 0x6543);
216 out1[1] = __byte_perm_S (in1[1], in1[2], 0x6543);
217 out1[2] = __byte_perm_S (in1[2], in1[3], 0x6543);
218 out1[3] = __byte_perm_S (in1[3], 0, 0x6543);
220 case 4: out0[0] = in0[1];
229 case 5: out0[0] = __byte_perm_S (in0[1], in0[2], 0x4321);
230 out0[1] = __byte_perm_S (in0[2], in0[3], 0x4321);
231 out0[2] = __byte_perm_S (in0[3], in1[0], 0x4321);
232 out0[3] = __byte_perm_S (in1[0], in1[1], 0x4321);
233 out1[0] = __byte_perm_S (in1[1], in1[2], 0x4321);
234 out1[1] = __byte_perm_S (in1[2], in1[3], 0x4321);
235 out1[2] = __byte_perm_S (in1[3], 0, 0x4321);
238 case 6: out0[0] = __byte_perm_S (in0[1], in0[2], 0x5432);
239 out0[1] = __byte_perm_S (in0[2], in0[3], 0x5432);
240 out0[2] = __byte_perm_S (in0[3], in1[0], 0x5432);
241 out0[3] = __byte_perm_S (in1[0], in1[1], 0x5432);
242 out1[0] = __byte_perm_S (in1[1], in1[2], 0x5432);
243 out1[1] = __byte_perm_S (in1[2], in1[3], 0x5432);
244 out1[2] = __byte_perm_S (in1[3], 0, 0x5432);
247 case 7: out0[0] = __byte_perm_S (in0[1], in0[2], 0x6543);
248 out0[1] = __byte_perm_S (in0[2], in0[3], 0x6543);
249 out0[2] = __byte_perm_S (in0[3], in1[0], 0x6543);
250 out0[3] = __byte_perm_S (in1[0], in1[1], 0x6543);
251 out1[0] = __byte_perm_S (in1[1], in1[2], 0x6543);
252 out1[1] = __byte_perm_S (in1[2], in1[3], 0x6543);
253 out1[2] = __byte_perm_S (in1[3], 0, 0x6543);
256 case 8: out0[0] = in0[2];
265 case 9: out0[0] = __byte_perm_S (in0[2], in0[3], 0x4321);
266 out0[1] = __byte_perm_S (in0[3], in1[0], 0x4321);
267 out0[2] = __byte_perm_S (in1[0], in1[1], 0x4321);
268 out0[3] = __byte_perm_S (in1[1], in1[2], 0x4321);
269 out1[0] = __byte_perm_S (in1[2], in1[3], 0x4321);
270 out1[1] = __byte_perm_S (in1[3], 0, 0x4321);
274 case 10: out0[0] = __byte_perm_S (in0[2], in0[3], 0x5432);
275 out0[1] = __byte_perm_S (in0[3], in1[0], 0x5432);
276 out0[2] = __byte_perm_S (in1[0], in1[1], 0x5432);
277 out0[3] = __byte_perm_S (in1[1], in1[2], 0x5432);
278 out1[0] = __byte_perm_S (in1[2], in1[3], 0x5432);
279 out1[1] = __byte_perm_S (in1[3], 0, 0x5432);
283 case 11: out0[0] = __byte_perm_S (in0[2], in0[3], 0x6543);
284 out0[1] = __byte_perm_S (in0[3], in1[0], 0x6543);
285 out0[2] = __byte_perm_S (in1[0], in1[1], 0x6543);
286 out0[3] = __byte_perm_S (in1[1], in1[2], 0x6543);
287 out1[0] = __byte_perm_S (in1[2], in1[3], 0x6543);
288 out1[1] = __byte_perm_S (in1[3], 0, 0x6543);
292 case 12: out0[0] = in0[3];
302 out0[0] = __byte_perm_S (in0[3], in1[0], 0x4321);
303 out0[1] = __byte_perm_S (in1[0], in1[1], 0x4321);
304 out0[2] = __byte_perm_S (in1[1], in1[2], 0x4321);
305 out0[3] = __byte_perm_S (in1[2], in1[3], 0x4321);
306 out1[0] = __byte_perm_S (in1[3], 0, 0x4321);
311 case 14: out0[0] = __byte_perm_S (in0[3], in1[0], 0x5432);
312 out0[1] = __byte_perm_S (in1[0], in1[1], 0x5432);
313 out0[2] = __byte_perm_S (in1[1], in1[2], 0x5432);
314 out0[3] = __byte_perm_S (in1[2], in1[3], 0x5432);
315 out1[0] = __byte_perm_S (in1[3], 0, 0x5432);
320 case 15: out0[0] = __byte_perm_S (in0[3], in1[0], 0x6543);
321 out0[1] = __byte_perm_S (in1[0], in1[1], 0x6543);
322 out0[2] = __byte_perm_S (in1[1], in1[2], 0x6543);
323 out0[3] = __byte_perm_S (in1[2], in1[3], 0x6543);
324 out1[0] = __byte_perm_S (in1[3], 0, 0x6543);
329 case 16: out0[0] = in1[0];
338 case 17: out0[0] = __byte_perm_S (in1[0], in1[1], 0x4321);
339 out0[1] = __byte_perm_S (in1[1], in1[2], 0x4321);
340 out0[2] = __byte_perm_S (in1[2], in1[3], 0x4321);
341 out0[3] = __byte_perm_S (in1[3], 0, 0x4321);
347 case 18: out0[0] = __byte_perm_S (in1[0], in1[1], 0x5432);
348 out0[1] = __byte_perm_S (in1[1], in1[2], 0x5432);
349 out0[2] = __byte_perm_S (in1[2], in1[3], 0x5432);
350 out0[3] = __byte_perm_S (in1[3], 0, 0x5432);
356 case 19: out0[0] = __byte_perm_S (in1[0], in1[1], 0x6543);
357 out0[1] = __byte_perm_S (in1[1], in1[2], 0x6543);
358 out0[2] = __byte_perm_S (in1[2], in1[3], 0x6543);
359 out0[3] = __byte_perm_S (in1[3], 0, 0x6543);
365 case 20: out0[0] = in1[1];
374 case 21: out0[0] = __byte_perm_S (in1[1], in1[2], 0x4321);
375 out0[1] = __byte_perm_S (in1[2], in1[3], 0x4321);
376 out0[2] = __byte_perm_S (in1[3], 0, 0x4321);
383 case 22: out0[0] = __byte_perm_S (in1[1], in1[2], 0x5432);
384 out0[1] = __byte_perm_S (in1[2], in1[3], 0x5432);
385 out0[2] = __byte_perm_S (in1[3], 0, 0x5432);
392 case 23: out0[0] = __byte_perm_S (in1[1], in1[2], 0x6543);
393 out0[1] = __byte_perm_S (in1[2], in1[3], 0x6543);
394 out0[2] = __byte_perm_S (in1[3], 0, 0x6543);
401 case 24: out0[0] = in1[2];
410 case 25: out0[0] = __byte_perm_S (in1[2], in1[3], 0x4321);
411 out0[1] = __byte_perm_S (in1[3], 0, 0x4321);
419 case 26: out0[0] = __byte_perm_S (in1[2], in1[3], 0x5432);
420 out0[1] = __byte_perm_S (in1[3], 0, 0x5432);
428 case 27: out0[0] = __byte_perm_S (in1[2], in1[3], 0x6543);
429 out0[1] = __byte_perm_S (in1[3], 0, 0x6543);
437 case 28: out0[0] = in1[3];
446 case 29: out0[0] = __byte_perm_S (in1[3], 0, 0x4321);
455 case 30: out0[0] = __byte_perm_S (in1[3], 0, 0x5432);
464 case 31: out0[0] = __byte_perm_S (in1[3], 0, 0x6543);
476 #if defined IS_AMD || defined IS_GENERIC
479 case 0: out0[0] = in0[0];
488 case 1: out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
489 out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
490 out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
491 out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
492 out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
493 out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
494 out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
495 out1[3] = amd_bytealign_S ( 0, in1[3], 1);
497 case 2: out0[0] = amd_bytealign_S (in0[1], in0[0], 2);
498 out0[1] = amd_bytealign_S (in0[2], in0[1], 2);
499 out0[2] = amd_bytealign_S (in0[3], in0[2], 2);
500 out0[3] = amd_bytealign_S (in1[0], in0[3], 2);
501 out1[0] = amd_bytealign_S (in1[1], in1[0], 2);
502 out1[1] = amd_bytealign_S (in1[2], in1[1], 2);
503 out1[2] = amd_bytealign_S (in1[3], in1[2], 2);
504 out1[3] = amd_bytealign_S ( 0, in1[3], 2);
506 case 3: out0[0] = amd_bytealign_S (in0[1], in0[0], 3);
507 out0[1] = amd_bytealign_S (in0[2], in0[1], 3);
508 out0[2] = amd_bytealign_S (in0[3], in0[2], 3);
509 out0[3] = amd_bytealign_S (in1[0], in0[3], 3);
510 out1[0] = amd_bytealign_S (in1[1], in1[0], 3);
511 out1[1] = amd_bytealign_S (in1[2], in1[1], 3);
512 out1[2] = amd_bytealign_S (in1[3], in1[2], 3);
513 out1[3] = amd_bytealign_S ( 0, in1[3], 3);
515 case 4: out0[0] = in0[1];
524 case 5: out0[0] = amd_bytealign_S (in0[2], in0[1], 1);
525 out0[1] = amd_bytealign_S (in0[3], in0[2], 1);
526 out0[2] = amd_bytealign_S (in1[0], in0[3], 1);
527 out0[3] = amd_bytealign_S (in1[1], in1[0], 1);
528 out1[0] = amd_bytealign_S (in1[2], in1[1], 1);
529 out1[1] = amd_bytealign_S (in1[3], in1[2], 1);
530 out1[2] = amd_bytealign_S ( 0, in1[3], 1);
533 case 6: out0[0] = amd_bytealign_S (in0[2], in0[1], 2);
534 out0[1] = amd_bytealign_S (in0[3], in0[2], 2);
535 out0[2] = amd_bytealign_S (in1[0], in0[3], 2);
536 out0[3] = amd_bytealign_S (in1[1], in1[0], 2);
537 out1[0] = amd_bytealign_S (in1[2], in1[1], 2);
538 out1[1] = amd_bytealign_S (in1[3], in1[2], 2);
539 out1[2] = amd_bytealign_S ( 0, in1[3], 2);
542 case 7: out0[0] = amd_bytealign_S (in0[2], in0[1], 3);
543 out0[1] = amd_bytealign_S (in0[3], in0[2], 3);
544 out0[2] = amd_bytealign_S (in1[0], in0[3], 3);
545 out0[3] = amd_bytealign_S (in1[1], in1[0], 3);
546 out1[0] = amd_bytealign_S (in1[2], in1[1], 3);
547 out1[1] = amd_bytealign_S (in1[3], in1[2], 3);
548 out1[2] = amd_bytealign_S ( 0, in1[3], 3);
551 case 8: out0[0] = in0[2];
560 case 9: out0[0] = amd_bytealign_S (in0[3], in0[2], 1);
561 out0[1] = amd_bytealign_S (in1[0], in0[3], 1);
562 out0[2] = amd_bytealign_S (in1[1], in1[0], 1);
563 out0[3] = amd_bytealign_S (in1[2], in1[1], 1);
564 out1[0] = amd_bytealign_S (in1[3], in1[2], 1);
565 out1[1] = amd_bytealign_S ( 0, in1[3], 1);
569 case 10: out0[0] = amd_bytealign_S (in0[3], in0[2], 2);
570 out0[1] = amd_bytealign_S (in1[0], in0[3], 2);
571 out0[2] = amd_bytealign_S (in1[1], in1[0], 2);
572 out0[3] = amd_bytealign_S (in1[2], in1[1], 2);
573 out1[0] = amd_bytealign_S (in1[3], in1[2], 2);
574 out1[1] = amd_bytealign_S ( 0, in1[3], 2);
578 case 11: out0[0] = amd_bytealign_S (in0[3], in0[2], 3);
579 out0[1] = amd_bytealign_S (in1[0], in0[3], 3);
580 out0[2] = amd_bytealign_S (in1[1], in1[0], 3);
581 out0[3] = amd_bytealign_S (in1[2], in1[1], 3);
582 out1[0] = amd_bytealign_S (in1[3], in1[2], 3);
583 out1[1] = amd_bytealign_S ( 0, in1[3], 3);
587 case 12: out0[0] = in0[3];
596 case 13: out0[0] = amd_bytealign_S (in1[0], in0[3], 1);
597 out0[1] = amd_bytealign_S (in1[1], in1[0], 1);
598 out0[2] = amd_bytealign_S (in1[2], in1[1], 1);
599 out0[3] = amd_bytealign_S (in1[3], in1[2], 1);
600 out1[0] = amd_bytealign_S ( 0, in1[3], 1);
605 case 14: out0[0] = amd_bytealign_S (in1[0], in0[3], 2);
606 out0[1] = amd_bytealign_S (in1[1], in1[0], 2);
607 out0[2] = amd_bytealign_S (in1[2], in1[1], 2);
608 out0[3] = amd_bytealign_S (in1[3], in1[2], 2);
609 out1[0] = amd_bytealign_S ( 0, in1[3], 2);
614 case 15: out0[0] = amd_bytealign_S (in1[0], in0[3], 3);
615 out0[1] = amd_bytealign_S (in1[1], in1[0], 3);
616 out0[2] = amd_bytealign_S (in1[2], in1[1], 3);
617 out0[3] = amd_bytealign_S (in1[3], in1[2], 3);
618 out1[0] = amd_bytealign_S ( 0, in1[3], 3);
623 case 16: out0[0] = in1[0];
632 case 17: out0[0] = amd_bytealign_S (in1[1], in1[0], 1);
633 out0[1] = amd_bytealign_S (in1[2], in1[1], 1);
634 out0[2] = amd_bytealign_S (in1[3], in1[2], 1);
635 out0[3] = amd_bytealign_S ( 0, in1[3], 1);
641 case 18: out0[0] = amd_bytealign_S (in1[1], in1[0], 2);
642 out0[1] = amd_bytealign_S (in1[2], in1[1], 2);
643 out0[2] = amd_bytealign_S (in1[3], in1[2], 2);
644 out0[3] = amd_bytealign_S ( 0, in1[3], 2);
650 case 19: out0[0] = amd_bytealign_S (in1[1], in1[0], 3);
651 out0[1] = amd_bytealign_S (in1[2], in1[1], 3);
652 out0[2] = amd_bytealign_S (in1[3], in1[2], 3);
653 out0[3] = amd_bytealign_S ( 0, in1[3], 3);
659 case 20: out0[0] = in1[1];
668 case 21: out0[0] = amd_bytealign_S (in1[2], in1[1], 1);
669 out0[1] = amd_bytealign_S (in1[3], in1[2], 1);
670 out0[2] = amd_bytealign_S ( 0, in1[3], 1);
677 case 22: out0[0] = amd_bytealign_S (in1[2], in1[1], 2);
678 out0[1] = amd_bytealign_S (in1[3], in1[2], 2);
679 out0[2] = amd_bytealign_S ( 0, in1[3], 2);
686 case 23: out0[0] = amd_bytealign_S (in1[2], in1[1], 3);
687 out0[1] = amd_bytealign_S (in1[3], in1[2], 3);
688 out0[2] = amd_bytealign_S ( 0, in1[3], 3);
695 case 24: out0[0] = in1[2];
704 case 25: out0[0] = amd_bytealign_S (in1[3], in1[2], 1);
705 out0[1] = amd_bytealign_S ( 0, in1[3], 1);
713 case 26: out0[0] = amd_bytealign_S (in1[3], in1[2], 2);
714 out0[1] = amd_bytealign_S ( 0, in1[3], 2);
722 case 27: out0[0] = amd_bytealign_S (in1[3], in1[2], 3);
723 out0[1] = amd_bytealign_S ( 0, in1[3], 3);
731 case 28: out0[0] = in1[3];
740 case 29: out0[0] = amd_bytealign_S ( 0, in1[3], 1);
749 case 30: out0[0] = amd_bytealign_S ( 0, in1[3], 2);
758 case 31: out0[0] = amd_bytealign_S ( 0, in1[3], 3);
771 inline void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
776 case 0: out1[3] = in1[3];
785 case 1: out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543);
786 out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543);
787 out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543);
788 out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543);
789 out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543);
790 out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543);
791 out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543);
792 out0[0] = __byte_perm_S ( 0, in0[0], 0x6543);
794 case 2: out1[3] = __byte_perm_S (in1[2], in1[3], 0x5432);
795 out1[2] = __byte_perm_S (in1[1], in1[2], 0x5432);
796 out1[1] = __byte_perm_S (in1[0], in1[1], 0x5432);
797 out1[0] = __byte_perm_S (in0[3], in1[0], 0x5432);
798 out0[3] = __byte_perm_S (in0[2], in0[3], 0x5432);
799 out0[2] = __byte_perm_S (in0[1], in0[2], 0x5432);
800 out0[1] = __byte_perm_S (in0[0], in0[1], 0x5432);
801 out0[0] = __byte_perm_S ( 0, in0[0], 0x5432);
803 case 3: out1[3] = __byte_perm_S (in1[2], in1[3], 0x4321);
804 out1[2] = __byte_perm_S (in1[1], in1[2], 0x4321);
805 out1[1] = __byte_perm_S (in1[0], in1[1], 0x4321);
806 out1[0] = __byte_perm_S (in0[3], in1[0], 0x4321);
807 out0[3] = __byte_perm_S (in0[2], in0[3], 0x4321);
808 out0[2] = __byte_perm_S (in0[1], in0[2], 0x4321);
809 out0[1] = __byte_perm_S (in0[0], in0[1], 0x4321);
810 out0[0] = __byte_perm_S ( 0, in0[0], 0x4321);
812 case 4: out1[3] = in1[2];
821 case 5: out1[3] = __byte_perm_S (in1[1], in1[2], 0x6543);
822 out1[2] = __byte_perm_S (in1[0], in1[1], 0x6543);
823 out1[1] = __byte_perm_S (in0[3], in1[0], 0x6543);
824 out1[0] = __byte_perm_S (in0[2], in0[3], 0x6543);
825 out0[3] = __byte_perm_S (in0[1], in0[2], 0x6543);
826 out0[2] = __byte_perm_S (in0[0], in0[1], 0x6543);
827 out0[1] = __byte_perm_S ( 0, in0[0], 0x6543);
830 case 6: out1[3] = __byte_perm_S (in1[1], in1[2], 0x5432);
831 out1[2] = __byte_perm_S (in1[0], in1[1], 0x5432);
832 out1[1] = __byte_perm_S (in0[3], in1[0], 0x5432);
833 out1[0] = __byte_perm_S (in0[2], in0[3], 0x5432);
834 out0[3] = __byte_perm_S (in0[1], in0[2], 0x5432);
835 out0[2] = __byte_perm_S (in0[0], in0[1], 0x5432);
836 out0[1] = __byte_perm_S ( 0, in0[0], 0x5432);
839 case 7: out1[3] = __byte_perm_S (in1[1], in1[2], 0x4321);
840 out1[2] = __byte_perm_S (in1[0], in1[1], 0x4321);
841 out1[1] = __byte_perm_S (in0[3], in1[0], 0x4321);
842 out1[0] = __byte_perm_S (in0[2], in0[3], 0x4321);
843 out0[3] = __byte_perm_S (in0[1], in0[2], 0x4321);
844 out0[2] = __byte_perm_S (in0[0], in0[1], 0x4321);
845 out0[1] = __byte_perm_S ( 0, in0[0], 0x4321);
848 case 8: out1[3] = in1[1];
857 case 9: out1[3] = __byte_perm_S (in1[0], in1[1], 0x6543);
858 out1[2] = __byte_perm_S (in0[3], in1[0], 0x6543);
859 out1[1] = __byte_perm_S (in0[2], in0[3], 0x6543);
860 out1[0] = __byte_perm_S (in0[1], in0[2], 0x6543);
861 out0[3] = __byte_perm_S (in0[0], in0[1], 0x6543);
862 out0[2] = __byte_perm_S ( 0, in0[0], 0x6543);
866 case 10: out1[3] = __byte_perm_S (in1[0], in1[1], 0x5432);
867 out1[2] = __byte_perm_S (in0[3], in1[0], 0x5432);
868 out1[1] = __byte_perm_S (in0[2], in0[3], 0x5432);
869 out1[0] = __byte_perm_S (in0[1], in0[2], 0x5432);
870 out0[3] = __byte_perm_S (in0[0], in0[1], 0x5432);
871 out0[2] = __byte_perm_S ( 0, in0[0], 0x5432);
875 case 11: out1[3] = __byte_perm_S (in1[0], in1[1], 0x4321);
876 out1[2] = __byte_perm_S (in0[3], in1[0], 0x4321);
877 out1[1] = __byte_perm_S (in0[2], in0[3], 0x4321);
878 out1[0] = __byte_perm_S (in0[1], in0[2], 0x4321);
879 out0[3] = __byte_perm_S (in0[0], in0[1], 0x4321);
880 out0[2] = __byte_perm_S ( 0, in0[0], 0x4321);
884 case 12: out1[3] = in1[0];
893 case 13: out1[3] = __byte_perm_S (in0[3], in1[0], 0x6543);
894 out1[2] = __byte_perm_S (in0[2], in0[3], 0x6543);
895 out1[1] = __byte_perm_S (in0[1], in0[2], 0x6543);
896 out1[0] = __byte_perm_S (in0[0], in0[1], 0x6543);
897 out0[3] = __byte_perm_S ( 0, in0[0], 0x6543);
902 case 14: out1[3] = __byte_perm_S (in0[3], in1[0], 0x5432);
903 out1[2] = __byte_perm_S (in0[2], in0[3], 0x5432);
904 out1[1] = __byte_perm_S (in0[1], in0[2], 0x5432);
905 out1[0] = __byte_perm_S (in0[0], in0[1], 0x5432);
906 out0[3] = __byte_perm_S ( 0, in0[0], 0x5432);
911 case 15: out1[3] = __byte_perm_S (in0[3], in1[0], 0x4321);
912 out1[2] = __byte_perm_S (in0[2], in0[3], 0x4321);
913 out1[1] = __byte_perm_S (in0[1], in0[2], 0x4321);
914 out1[0] = __byte_perm_S (in0[0], in0[1], 0x4321);
915 out0[3] = __byte_perm_S ( 0, in0[0], 0x4321);
920 case 16: out1[3] = in0[3];
929 case 17: out1[3] = __byte_perm_S (in0[2], in0[3], 0x6543);
930 out1[2] = __byte_perm_S (in0[1], in0[2], 0x6543);
931 out1[1] = __byte_perm_S (in0[0], in0[1], 0x6543);
932 out1[0] = __byte_perm_S ( 0, in0[0], 0x6543);
938 case 18: out1[3] = __byte_perm_S (in0[2], in0[3], 0x5432);
939 out1[2] = __byte_perm_S (in0[1], in0[2], 0x5432);
940 out1[1] = __byte_perm_S (in0[0], in0[1], 0x5432);
941 out1[0] = __byte_perm_S ( 0, in0[0], 0x5432);
947 case 19: out1[3] = __byte_perm_S (in0[2], in0[3], 0x4321);
948 out1[2] = __byte_perm_S (in0[1], in0[2], 0x4321);
949 out1[1] = __byte_perm_S (in0[0], in0[1], 0x4321);
950 out1[0] = __byte_perm_S ( 0, in0[0], 0x4321);
956 case 20: out1[3] = in0[2];
965 case 21: out1[3] = __byte_perm_S (in0[1], in0[2], 0x6543);
966 out1[2] = __byte_perm_S (in0[0], in0[1], 0x6543);
967 out1[1] = __byte_perm_S ( 0, in0[0], 0x6543);
974 case 22: out1[3] = __byte_perm_S (in0[1], in0[2], 0x5432);
975 out1[2] = __byte_perm_S (in0[0], in0[1], 0x5432);
976 out1[1] = __byte_perm_S ( 0, in0[0], 0x5432);
983 case 23: out1[3] = __byte_perm_S (in0[1], in0[2], 0x4321);
984 out1[2] = __byte_perm_S (in0[0], in0[1], 0x4321);
985 out1[1] = __byte_perm_S ( 0, in0[0], 0x4321);
992 case 24: out1[3] = in0[1];
1001 case 25: out1[3] = __byte_perm_S (in0[0], in0[1], 0x6543);
1002 out1[2] = __byte_perm_S ( 0, in0[0], 0x6543);
1010 case 26: out1[3] = __byte_perm_S (in0[0], in0[1], 0x5432);
1011 out1[2] = __byte_perm_S ( 0, in0[0], 0x5432);
1019 case 27: out1[3] = __byte_perm_S (in0[0], in0[1], 0x4321);
1020 out1[2] = __byte_perm_S ( 0, in0[0], 0x4321);
1028 case 28: out1[3] = in0[0];
1037 case 29: out1[3] = __byte_perm_S ( 0, in0[0], 0x6543);
1046 case 30: out1[3] = __byte_perm_S ( 0, in0[0], 0x5432);
1055 case 31: out1[3] = __byte_perm_S ( 0, in0[0], 0x4321);
1067 #if defined IS_AMD || defined IS_GENERIC
1070 case 0: out1[3] = in1[3];
1079 case 1: out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
1080 out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
1081 out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
1082 out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
1083 out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
1084 out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
1085 out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
1086 out0[0] = amd_bytealign_S (in0[0], 0, 3);
1088 case 2: out1[3] = amd_bytealign_S (in1[3], in1[2], 2);
1089 out1[2] = amd_bytealign_S (in1[2], in1[1], 2);
1090 out1[1] = amd_bytealign_S (in1[1], in1[0], 2);
1091 out1[0] = amd_bytealign_S (in1[0], in0[3], 2);
1092 out0[3] = amd_bytealign_S (in0[3], in0[2], 2);
1093 out0[2] = amd_bytealign_S (in0[2], in0[1], 2);
1094 out0[1] = amd_bytealign_S (in0[1], in0[0], 2);
1095 out0[0] = amd_bytealign_S (in0[0], 0, 2);
1097 case 3: out1[3] = amd_bytealign_S (in1[3], in1[2], 1);
1098 out1[2] = amd_bytealign_S (in1[2], in1[1], 1);
1099 out1[1] = amd_bytealign_S (in1[1], in1[0], 1);
1100 out1[0] = amd_bytealign_S (in1[0], in0[3], 1);
1101 out0[3] = amd_bytealign_S (in0[3], in0[2], 1);
1102 out0[2] = amd_bytealign_S (in0[2], in0[1], 1);
1103 out0[1] = amd_bytealign_S (in0[1], in0[0], 1);
1104 out0[0] = amd_bytealign_S (in0[0], 0, 1);
1106 case 4: out1[3] = in1[2];
1115 case 5: out1[3] = amd_bytealign_S (in1[2], in1[1], 3);
1116 out1[2] = amd_bytealign_S (in1[1], in1[0], 3);
1117 out1[1] = amd_bytealign_S (in1[0], in0[3], 3);
1118 out1[0] = amd_bytealign_S (in0[3], in0[2], 3);
1119 out0[3] = amd_bytealign_S (in0[2], in0[1], 3);
1120 out0[2] = amd_bytealign_S (in0[1], in0[0], 3);
1121 out0[1] = amd_bytealign_S (in0[0], 0, 3);
1124 case 6: out1[3] = amd_bytealign_S (in1[2], in1[1], 2);
1125 out1[2] = amd_bytealign_S (in1[1], in1[0], 2);
1126 out1[1] = amd_bytealign_S (in1[0], in0[3], 2);
1127 out1[0] = amd_bytealign_S (in0[3], in0[2], 2);
1128 out0[3] = amd_bytealign_S (in0[2], in0[1], 2);
1129 out0[2] = amd_bytealign_S (in0[1], in0[0], 2);
1130 out0[1] = amd_bytealign_S (in0[0], 0, 2);
1133 case 7: out1[3] = amd_bytealign_S (in1[2], in1[1], 1);
1134 out1[2] = amd_bytealign_S (in1[1], in1[0], 1);
1135 out1[1] = amd_bytealign_S (in1[0], in0[3], 1);
1136 out1[0] = amd_bytealign_S (in0[3], in0[2], 1);
1137 out0[3] = amd_bytealign_S (in0[2], in0[1], 1);
1138 out0[2] = amd_bytealign_S (in0[1], in0[0], 1);
1139 out0[1] = amd_bytealign_S (in0[0], 0, 1);
1142 case 8: out1[3] = in1[1];
1151 case 9: out1[3] = amd_bytealign_S (in1[1], in1[0], 3);
1152 out1[2] = amd_bytealign_S (in1[0], in0[3], 3);
1153 out1[1] = amd_bytealign_S (in0[3], in0[2], 3);
1154 out1[0] = amd_bytealign_S (in0[2], in0[1], 3);
1155 out0[3] = amd_bytealign_S (in0[1], in0[0], 3);
1156 out0[2] = amd_bytealign_S (in0[0], 0, 3);
1160 case 10: out1[3] = amd_bytealign_S (in1[1], in1[0], 2);
1161 out1[2] = amd_bytealign_S (in1[0], in0[3], 2);
1162 out1[1] = amd_bytealign_S (in0[3], in0[2], 2);
1163 out1[0] = amd_bytealign_S (in0[2], in0[1], 2);
1164 out0[3] = amd_bytealign_S (in0[1], in0[0], 2);
1165 out0[2] = amd_bytealign_S (in0[0], 0, 2);
1169 case 11: out1[3] = amd_bytealign_S (in1[1], in1[0], 1);
1170 out1[2] = amd_bytealign_S (in1[0], in0[3], 1);
1171 out1[1] = amd_bytealign_S (in0[3], in0[2], 1);
1172 out1[0] = amd_bytealign_S (in0[2], in0[1], 1);
1173 out0[3] = amd_bytealign_S (in0[1], in0[0], 1);
1174 out0[2] = amd_bytealign_S (in0[0], 0, 1);
1178 case 12: out1[3] = in1[0];
1187 case 13: out1[3] = amd_bytealign_S (in1[0], in0[3], 3);
1188 out1[2] = amd_bytealign_S (in0[3], in0[2], 3);
1189 out1[1] = amd_bytealign_S (in0[2], in0[1], 3);
1190 out1[0] = amd_bytealign_S (in0[1], in0[0], 3);
1191 out0[3] = amd_bytealign_S (in0[0], 0, 3);
1196 case 14: out1[3] = amd_bytealign_S (in1[0], in0[3], 2);
1197 out1[2] = amd_bytealign_S (in0[3], in0[2], 2);
1198 out1[1] = amd_bytealign_S (in0[2], in0[1], 2);
1199 out1[0] = amd_bytealign_S (in0[1], in0[0], 2);
1200 out0[3] = amd_bytealign_S (in0[0], 0, 2);
1205 case 15: out1[3] = amd_bytealign_S (in1[0], in0[3], 1);
1206 out1[2] = amd_bytealign_S (in0[3], in0[2], 1);
1207 out1[1] = amd_bytealign_S (in0[2], in0[1], 1);
1208 out1[0] = amd_bytealign_S (in0[1], in0[0], 1);
1209 out0[3] = amd_bytealign_S (in0[0], 0, 1);
1214 case 16: out1[3] = in0[3];
1223 case 17: out1[3] = amd_bytealign_S (in0[3], in0[2], 3);
1224 out1[2] = amd_bytealign_S (in0[2], in0[1], 3);
1225 out1[1] = amd_bytealign_S (in0[1], in0[0], 3);
1226 out1[0] = amd_bytealign_S (in0[0], 0, 3);
1232 case 18: out1[3] = amd_bytealign_S (in0[3], in0[2], 2);
1233 out1[2] = amd_bytealign_S (in0[2], in0[1], 2);
1234 out1[1] = amd_bytealign_S (in0[1], in0[0], 2);
1235 out1[0] = amd_bytealign_S (in0[0], 0, 2);
1241 case 19: out1[3] = amd_bytealign_S (in0[3], in0[2], 1);
1242 out1[2] = amd_bytealign_S (in0[2], in0[1], 1);
1243 out1[1] = amd_bytealign_S (in0[1], in0[0], 1);
1244 out1[0] = amd_bytealign_S (in0[0], 0, 1);
1250 case 20: out1[3] = in0[2];
1259 case 21: out1[3] = amd_bytealign_S (in0[2], in0[1], 3);
1260 out1[2] = amd_bytealign_S (in0[1], in0[0], 3);
1261 out1[1] = amd_bytealign_S (in0[0], 0, 3);
1268 case 22: out1[3] = amd_bytealign_S (in0[2], in0[1], 2);
1269 out1[2] = amd_bytealign_S (in0[1], in0[0], 2);
1270 out1[1] = amd_bytealign_S (in0[0], 0, 2);
1277 case 23: out1[3] = amd_bytealign_S (in0[2], in0[1], 1);
1278 out1[2] = amd_bytealign_S (in0[1], in0[0], 1);
1279 out1[1] = amd_bytealign_S (in0[0], 0, 1);
1286 case 24: out1[3] = in0[1];
1295 case 25: out1[3] = amd_bytealign_S (in0[1], in0[0], 3);
1296 out1[2] = amd_bytealign_S (in0[0], 0, 3);
1304 case 26: out1[3] = amd_bytealign_S (in0[1], in0[0], 2);
1305 out1[2] = amd_bytealign_S (in0[0], 0, 2);
1313 case 27: out1[3] = amd_bytealign_S (in0[1], in0[0], 1);
1314 out1[2] = amd_bytealign_S (in0[0], 0, 1);
1322 case 28: out1[3] = in0[0];
1331 case 29: out1[3] = amd_bytealign_S (in0[0], 0, 3);
1340 case 30: out1[3] = amd_bytealign_S (in0[0], 0, 2);
1349 case 31: out1[3] = amd_bytealign_S (in0[0], 0, 1);
1362 inline void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
1368 case 0: tmp[0] = src_r0;
1371 case 1: tmp[0] = src_r0 << 8;
1372 tmp[1] = src_r0 >> 24;
1374 case 2: tmp[0] = src_r0 << 16;
1375 tmp[1] = src_r0 >> 16;
1377 case 3: tmp[0] = src_r0 << 24;
1378 tmp[1] = src_r0 >> 8;
1384 case 0: dst0[0] |= tmp[0];
1387 case 1: dst0[1] |= tmp[0];
1390 case 2: dst0[2] |= tmp[0];
1393 case 3: dst0[3] |= tmp[0];
1396 case 4: dst1[0] |= tmp[0];
1399 case 5: dst1[1] |= tmp[0];
1402 case 6: dst1[2] |= tmp[0];
1405 case 7: dst1[3] |= tmp[0];
1410 inline void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
1417 dst0[0] = src_r0[0];
1418 dst0[1] = src_r0[1];
1419 dst0[2] = src_r0[2];
1420 dst0[3] = src_r0[3];
1421 dst1[0] = src_r1[0];
1422 dst1[1] = src_r1[1];
1423 dst1[2] = src_r1[2];
1424 dst1[3] = src_r1[3];
1428 dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x6540);
1429 dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1430 dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1431 dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1432 dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
1433 dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543);
1434 dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543);
1435 dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x6543);
1439 dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x5410);
1440 dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1441 dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1442 dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1443 dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
1444 dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432);
1445 dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432);
1446 dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x5432);
1450 dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x4210);
1451 dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1452 dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1453 dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1454 dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
1455 dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321);
1456 dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321);
1457 dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x4321);
1461 dst0[1] = src_r0[0];
1462 dst0[2] = src_r0[1];
1463 dst0[3] = src_r0[2];
1464 dst1[0] = src_r0[3];
1465 dst1[1] = src_r1[0];
1466 dst1[2] = src_r1[1];
1467 dst1[3] = src_r1[2];
1471 dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x6540);
1472 dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1473 dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1474 dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1475 dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
1476 dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543);
1477 dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543);
1481 dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x5410);
1482 dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1483 dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1484 dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1485 dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
1486 dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432);
1487 dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432);
1491 dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x4210);
1492 dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1493 dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1494 dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1495 dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
1496 dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321);
1497 dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321);
1501 dst0[2] = src_r0[0];
1502 dst0[3] = src_r0[1];
1503 dst1[0] = src_r0[2];
1504 dst1[1] = src_r0[3];
1505 dst1[2] = src_r1[0];
1506 dst1[3] = src_r1[1];
1510 dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x6540);
1511 dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1512 dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1513 dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1514 dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
1515 dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543);
1519 dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x5410);
1520 dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1521 dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1522 dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1523 dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
1524 dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432);
1528 dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x4210);
1529 dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1530 dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1531 dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1532 dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
1533 dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321);
1537 dst0[3] = src_r0[0];
1538 dst1[0] = src_r0[1];
1539 dst1[1] = src_r0[2];
1540 dst1[2] = src_r0[3];
1541 dst1[3] = src_r1[0];
1545 dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x6540);
1546 dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1547 dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1548 dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1549 dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
1553 dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x5410);
1554 dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1555 dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1556 dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1557 dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
1561 dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x4210);
1562 dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1563 dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1564 dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1565 dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
1569 dst1[0] = src_r0[0];
1570 dst1[1] = src_r0[1];
1571 dst1[2] = src_r0[2];
1572 dst1[3] = src_r0[3];
1576 dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x6540);
1577 dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1578 dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1579 dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1583 dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x5410);
1584 dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1585 dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1586 dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1590 dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x4210);
1591 dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1592 dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1593 dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1597 dst1[1] = src_r0[0];
1598 dst1[2] = src_r0[1];
1599 dst1[3] = src_r0[2];
1603 dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x6540);
1604 dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1605 dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1609 dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x5410);
1610 dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1611 dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1615 dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x4210);
1616 dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1617 dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1621 dst1[2] = src_r0[0];
1622 dst1[3] = src_r0[1];
1626 dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x6540);
1627 dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1631 dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x5410);
1632 dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1636 dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x4210);
1637 dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1641 dst1[3] = src_r0[0];
1645 dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x6540);
1649 dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x5410);
1653 dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x4210);
1658 #if defined IS_AMD || defined IS_GENERIC
1663 dst1[3] = src_l1[3] | src_r0[0] << 24;
1666 dst1[3] = src_l1[3] | src_r0[0] << 16;
1669 dst1[3] = src_l1[3] | src_r0[0] << 8;
1672 dst1[3] = src_r0[0];
1675 dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1676 dst1[2] = src_l1[2] | src_r0[0] << 24;
1679 dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1680 dst1[2] = src_l1[2] | src_r0[0] << 16;
1683 dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1684 dst1[2] = src_l1[2] | src_r0[0] << 8;
1687 dst1[3] = src_r0[1];
1688 dst1[2] = src_r0[0];
1691 dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1692 dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1693 dst1[1] = src_l1[1] | src_r0[0] << 24;
1696 dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1697 dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1698 dst1[1] = src_l1[1] | src_r0[0] << 16;
1701 dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1702 dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1703 dst1[1] = src_l1[1] | src_r0[0] << 8;
1706 dst1[3] = src_r0[2];
1707 dst1[2] = src_r0[1];
1708 dst1[1] = src_r0[0];
1711 dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1712 dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1713 dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1714 dst1[0] = src_l1[0] | src_r0[0] << 24;
1717 dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1718 dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1719 dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1720 dst1[0] = src_l1[0] | src_r0[0] << 16;
1723 dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1724 dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1725 dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1726 dst1[0] = src_l1[0] | src_r0[0] << 8;
1729 dst1[3] = src_r0[3];
1730 dst1[2] = src_r0[2];
1731 dst1[1] = src_r0[1];
1732 dst1[0] = src_r0[0];
1735 dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
1736 dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1737 dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1738 dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1739 dst0[3] = src_l0[3] | src_r0[0] << 24;
1742 dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
1743 dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1744 dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1745 dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1746 dst0[3] = src_l0[3] | src_r0[0] << 16;
1749 dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
1750 dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1751 dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1752 dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1753 dst0[3] = src_l0[3] | src_r0[0] << 8;
1756 dst1[3] = src_r1[0];
1757 dst1[2] = src_r0[3];
1758 dst1[1] = src_r0[2];
1759 dst1[0] = src_r0[1];
1760 dst0[3] = src_r0[0];
1763 dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
1764 dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
1765 dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1766 dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1767 dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1768 dst0[2] = src_l0[2] | src_r0[0] << 24;
1771 dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
1772 dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
1773 dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1774 dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1775 dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1776 dst0[2] = src_l0[2] | src_r0[0] << 16;
1779 dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
1780 dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
1781 dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1782 dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1783 dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1784 dst0[2] = src_l0[2] | src_r0[0] << 8;
1787 dst1[3] = src_r1[1];
1788 dst1[2] = src_r1[0];
1789 dst1[1] = src_r0[3];
1790 dst1[0] = src_r0[2];
1791 dst0[3] = src_r0[1];
1792 dst0[2] = src_r0[0];
1795 dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
1796 dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
1797 dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
1798 dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1799 dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1800 dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1801 dst0[1] = src_l0[1] | src_r0[0] << 24;
1804 dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
1805 dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
1806 dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
1807 dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1808 dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1809 dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1810 dst0[1] = src_l0[1] | src_r0[0] << 16;
1813 dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
1814 dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
1815 dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
1816 dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1817 dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1818 dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1819 dst0[1] = src_l0[1] | src_r0[0] << 8;
1822 dst1[3] = src_r1[2];
1823 dst1[2] = src_r1[1];
1824 dst1[1] = src_r1[0];
1825 dst1[0] = src_r0[3];
1826 dst0[3] = src_r0[2];
1827 dst0[2] = src_r0[1];
1828 dst0[1] = src_r0[0];
1831 dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 1);
1832 dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
1833 dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
1834 dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
1835 dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1836 dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1837 dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1838 dst0[0] = src_l0[0] | src_r0[0] << 24;
1841 dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 2);
1842 dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
1843 dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
1844 dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
1845 dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1846 dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1847 dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1848 dst0[0] = src_l0[0] | src_r0[0] << 16;
1851 dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 3);
1852 dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
1853 dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
1854 dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
1855 dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1856 dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1857 dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1858 dst0[0] = src_l0[0] | src_r0[0] << 8;
1861 dst1[3] = src_r1[3];
1862 dst1[2] = src_r1[2];
1863 dst1[1] = src_r1[1];
1864 dst1[0] = src_r1[0];
1865 dst0[3] = src_r0[3];
1866 dst0[2] = src_r0[2];
1867 dst0[1] = src_r0[1];
1868 dst0[0] = src_r0[0];
1874 inline void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
1876 rshift_block_N (in0, in1, out0, out1, 32 - len);
1890 out0[0] = swap32_S (tib40[0]);
1891 out0[1] = swap32_S (tib40[1]);
1892 out0[2] = swap32_S (tib40[2]);
1893 out0[3] = swap32_S (tib40[3]);
1894 out1[0] = swap32_S (tib41[0]);
1895 out1[1] = swap32_S (tib41[1]);
1896 out1[2] = swap32_S (tib41[2]);
1897 out1[3] = swap32_S (tib41[3]);
1900 inline u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1902 buf0[0] |= (generate_cmask (buf0[0]));
1903 buf0[1] |= (generate_cmask (buf0[1]));
1904 buf0[2] |= (generate_cmask (buf0[2]));
1905 buf0[3] |= (generate_cmask (buf0[3]));
1906 buf1[0] |= (generate_cmask (buf1[0]));
1907 buf1[1] |= (generate_cmask (buf1[1]));
1908 buf1[2] |= (generate_cmask (buf1[2]));
1909 buf1[3] |= (generate_cmask (buf1[3]));
1914 inline u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1916 buf0[0] &= ~(generate_cmask (buf0[0]));
1917 buf0[1] &= ~(generate_cmask (buf0[1]));
1918 buf0[2] &= ~(generate_cmask (buf0[2]));
1919 buf0[3] &= ~(generate_cmask (buf0[3]));
1920 buf1[0] &= ~(generate_cmask (buf1[0]));
1921 buf1[1] &= ~(generate_cmask (buf1[1]));
1922 buf1[2] &= ~(generate_cmask (buf1[2]));
1923 buf1[3] &= ~(generate_cmask (buf1[3]));
1928 inline u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1930 rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len);
1932 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
1937 inline u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1939 rule_op_mangle_urest (p0, p1, buf0, buf1, in_len);
1941 buf0[0] |= (0x00000020 & generate_cmask (buf0[0]));
1946 inline u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1948 buf0[0] ^= (generate_cmask (buf0[0]));
1949 buf0[1] ^= (generate_cmask (buf0[1]));
1950 buf0[2] ^= (generate_cmask (buf0[2]));
1951 buf0[3] ^= (generate_cmask (buf0[3]));
1952 buf1[0] ^= (generate_cmask (buf1[0]));
1953 buf1[1] ^= (generate_cmask (buf1[1]));
1954 buf1[2] ^= (generate_cmask (buf1[2]));
1955 buf1[3] ^= (generate_cmask (buf1[3]));
1960 inline u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1962 if (p0 >= in_len) return (in_len);
1964 const u32 tmp = 0x20u << ((p0 & 3) * 8);
1968 case 0: buf0[0] ^= (tmp & generate_cmask (buf0[0])); break;
1969 case 1: buf0[1] ^= (tmp & generate_cmask (buf0[1])); break;
1970 case 2: buf0[2] ^= (tmp & generate_cmask (buf0[2])); break;
1971 case 3: buf0[3] ^= (tmp & generate_cmask (buf0[3])); break;
1972 case 4: buf1[0] ^= (tmp & generate_cmask (buf1[0])); break;
1973 case 5: buf1[1] ^= (tmp & generate_cmask (buf1[1])); break;
1974 case 6: buf1[2] ^= (tmp & generate_cmask (buf1[2])); break;
1975 case 7: buf1[3] ^= (tmp & generate_cmask (buf1[3])); break;
1981 inline u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1983 reverse_block (buf0, buf1, buf0, buf1, in_len);
1988 inline u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1990 if ((in_len + in_len) >= 32) return (in_len);
1992 u32 out_len = in_len;
1994 append_block8 (out_len, buf0, buf1, buf0, buf1, buf0, buf1);
2001 inline u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2003 if (((in_len * p0) + in_len) >= 32) return (in_len);
2005 u32 out_len = in_len;
2019 for (u32 i = 0; i < p0; i++)
2021 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2029 inline u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2031 if ((in_len + in_len) >= 32) return (in_len);
2033 u32 out_len = in_len;
2038 reverse_block (buf0, buf1, tib40, tib41, out_len);
2040 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2047 inline u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2049 if ((in_len + 1) >= 32) return (in_len);
2051 u32 out_len = in_len;
2053 append_block1 (out_len, buf0, buf1, p0);
2060 inline u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2062 if ((in_len + 1) >= 32) return (in_len);
2064 u32 out_len = in_len;
2066 rshift_block (buf0, buf1, buf0, buf1);
2068 buf0[0] = buf0[0] | p0;
2075 inline u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2077 if (in_len == 0) return (in_len);
2079 const u32 in_len1 = in_len - 1;
2081 const u32 sh = (in_len1 & 3) * 8;
2083 const u32 tmp = (buf0[0] & 0xff) << sh;
2085 lshift_block (buf0, buf1, buf0, buf1);
2087 switch (in_len1 / 4)
2089 case 0: buf0[0] |= tmp; break;
2090 case 1: buf0[1] |= tmp; break;
2091 case 2: buf0[2] |= tmp; break;
2092 case 3: buf0[3] |= tmp; break;
2093 case 4: buf1[0] |= tmp; break;
2094 case 5: buf1[1] |= tmp; break;
2095 case 6: buf1[2] |= tmp; break;
2096 case 7: buf1[3] |= tmp; break;
2102 inline u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2104 if (in_len == 0) return (in_len);
2106 const u32 in_len1 = in_len - 1;
2108 const u32 sh = (in_len1 & 3) * 8;
2112 switch (in_len1 / 4)
2114 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2115 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2116 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2117 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2118 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2119 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2120 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2121 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2124 rshift_block (buf0, buf1, buf0, buf1);
2128 truncate_right (buf0, buf1, in_len);
2133 inline u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2135 if (in_len == 0) return (in_len);
2137 const u32 in_len1 = in_len - 1;
2139 lshift_block (buf0, buf1, buf0, buf1);
2144 inline u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2146 if (in_len == 0) return (in_len);
2148 const u32 in_len1 = in_len - 1;
2150 const u32 tmp = (1 << ((in_len1 & 3) * 8)) - 1;
2152 switch (in_len1 / 4)
2154 case 0: buf0[0] &= tmp; break;
2155 case 1: buf0[1] &= tmp; break;
2156 case 2: buf0[2] &= tmp; break;
2157 case 3: buf0[3] &= tmp; break;
2158 case 4: buf1[0] &= tmp; break;
2159 case 5: buf1[1] &= tmp; break;
2160 case 6: buf1[2] &= tmp; break;
2161 case 7: buf1[3] &= tmp; break;
2167 inline u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2169 if (p0 >= in_len) return (in_len);
2171 u32 out_len = in_len;
2176 lshift_block (buf0, buf1, tib40, tib41);
2178 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2183 case 0: buf0[0] = (buf0[0] & ml)
2193 case 1: buf0[1] = (buf0[1] & ml)
2202 case 2: buf0[2] = (buf0[2] & ml)
2210 case 3: buf0[3] = (buf0[3] & ml)
2217 case 4: buf1[0] = (buf1[0] & ml)
2223 case 5: buf1[1] = (buf1[1] & ml)
2228 case 6: buf1[2] = (buf1[2] & ml)
2232 case 7: buf1[3] = (buf1[3] & ml)
2242 inline u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2244 if (p0 >= in_len) return (in_len);
2246 if ((p0 + p1) > in_len) return (in_len);
2250 lshift_block_N (buf0, buf1, buf0, buf1, p0);
2252 truncate_right (buf0, buf1, out_len);
2257 inline u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2259 if (p0 >= in_len) return (in_len);
2261 if ((p0 + p1) > in_len) return (in_len);
2263 u32 out_len = in_len;
2277 lshift_block_N (buf0, buf1, tib40, tib41, p1);
2279 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2284 case 0: buf0[0] = (buf0[0] & ml)
2294 case 1: buf0[1] = (buf0[1] & ml)
2303 case 2: buf0[2] = (buf0[2] & ml)
2311 case 3: buf0[3] = (buf0[3] & ml)
2318 case 4: buf1[0] = (buf1[0] & ml)
2324 case 5: buf1[1] = (buf1[1] & ml)
2329 case 6: buf1[2] = (buf1[2] & ml)
2333 case 7: buf1[3] = (buf1[3] & ml)
2343 inline u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2345 if (p0 > in_len) return (in_len);
2347 if ((in_len + 1) >= 32) return (in_len);
2349 u32 out_len = in_len;
2354 rshift_block (buf0, buf1, tib40, tib41);
2356 const u32 p1n = p1 << ((p0 & 3) * 8);
2358 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2360 const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
2364 case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr);
2373 case 1: buf0[1] = (buf0[1] & ml) | p1n | (tib40[1] & mr);
2381 case 2: buf0[2] = (buf0[2] & ml) | p1n | (tib40[2] & mr);
2388 case 3: buf0[3] = (buf0[3] & ml) | p1n | (tib40[3] & mr);
2394 case 4: buf1[0] = (buf1[0] & ml) | p1n | (tib41[0] & mr);
2399 case 5: buf1[1] = (buf1[1] & ml) | p1n | (tib41[1] & mr);
2403 case 6: buf1[2] = (buf1[2] & ml) | p1n | (tib41[2] & mr);
2406 case 7: buf1[3] = (buf1[3] & ml) | p1n | (tib41[3] & mr);
2415 inline u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2417 if (p0 >= in_len) return (in_len);
2419 const u32 p1n = p1 << ((p0 & 3) * 8);
2421 const u32 m = ~(0xffu << ((p0 & 3) * 8));
2425 case 0: buf0[0] = (buf0[0] & m) | p1n; break;
2426 case 1: buf0[1] = (buf0[1] & m) | p1n; break;
2427 case 2: buf0[2] = (buf0[2] & m) | p1n; break;
2428 case 3: buf0[3] = (buf0[3] & m) | p1n; break;
2429 case 4: buf1[0] = (buf1[0] & m) | p1n; break;
2430 case 5: buf1[1] = (buf1[1] & m) | p1n; break;
2431 case 6: buf1[2] = (buf1[2] & m) | p1n; break;
2432 case 7: buf1[3] = (buf1[3] & m) | p1n; break;
2438 inline u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2440 if (p0 >= in_len) return (in_len);
2442 truncate_right (buf0, buf1, p0);
2447 inline u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2450 for (u32 i = 0; i < in_len; i++)
2454 case 0: if ((__byte_perm_S (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7650);
2456 case 1: if ((__byte_perm_S (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7604);
2458 case 2: if ((__byte_perm_S (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7054);
2460 case 3: if ((__byte_perm_S (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x0654);
2462 case 4: if ((__byte_perm_S (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7650);
2464 case 5: if ((__byte_perm_S (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7604);
2466 case 6: if ((__byte_perm_S (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7054);
2468 case 7: if ((__byte_perm_S (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x0654);
2470 case 8: if ((__byte_perm_S (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7650);
2472 case 9: if ((__byte_perm_S (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7604);
2474 case 10: if ((__byte_perm_S (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7054);
2476 case 11: if ((__byte_perm_S (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x0654);
2478 case 12: if ((__byte_perm_S (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7650);
2480 case 13: if ((__byte_perm_S (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7604);
2482 case 14: if ((__byte_perm_S (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7054);
2484 case 15: if ((__byte_perm_S (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x0654);
2486 case 16: if ((__byte_perm_S (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7650);
2488 case 17: if ((__byte_perm_S (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7604);
2490 case 18: if ((__byte_perm_S (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7054);
2492 case 19: if ((__byte_perm_S (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x0654);
2494 case 20: if ((__byte_perm_S (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7650);
2496 case 21: if ((__byte_perm_S (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7604);
2498 case 22: if ((__byte_perm_S (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7054);
2500 case 23: if ((__byte_perm_S (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x0654);
2502 case 24: if ((__byte_perm_S (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7650);
2504 case 25: if ((__byte_perm_S (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7604);
2506 case 26: if ((__byte_perm_S (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7054);
2508 case 27: if ((__byte_perm_S (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x0654);
2510 case 28: if ((__byte_perm_S (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7650);
2512 case 29: if ((__byte_perm_S (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7604);
2514 case 30: if ((__byte_perm_S (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7054);
2516 case 31: if ((__byte_perm_S (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x0654);
2522 #if defined IS_AMD || defined IS_GENERIC
2523 const uchar4 tmp0 = (uchar4) (p0);
2524 const uchar4 tmp1 = (uchar4) (p1);
2528 tmp = as_uchar4 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint (tmp);
2529 tmp = as_uchar4 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint (tmp);
2530 tmp = as_uchar4 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint (tmp);
2531 tmp = as_uchar4 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint (tmp);
2532 tmp = as_uchar4 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint (tmp);
2533 tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp);
2534 tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp);
2535 tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp);
2541 inline u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2547 inline u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2553 inline u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2555 if ( in_len == 0) return (in_len);
2556 if ((in_len + p0) >= 32) return (in_len);
2558 u32 out_len = in_len;
2560 const u32 tmp = buf0[0] & 0xFF;
2562 rshift_block_N (buf0, buf1, buf0, buf1, p0);
2567 case 1: buf0[0] |= tmp;
2569 case 2: buf0[0] |= __byte_perm_S (tmp, 0, 0x5400);
2571 case 3: buf0[0] |= __byte_perm_S (tmp, 0, 0x4000);
2573 case 4: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2575 case 5: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2578 case 6: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2579 buf0[1] |= __byte_perm_S (tmp, 0, 0x5400);
2581 case 7: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2582 buf0[1] |= __byte_perm_S (tmp, 0, 0x4000);
2584 case 8: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2585 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2587 case 9: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2588 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2591 case 10: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2592 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2593 buf0[2] |= __byte_perm_S (tmp, 0, 0x5400);
2595 case 11: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2596 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2597 buf0[2] |= __byte_perm_S (tmp, 0, 0x4000);
2599 case 12: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2600 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2601 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2603 case 13: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2604 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2605 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2608 case 14: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2609 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2610 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2611 buf0[3] |= __byte_perm_S (tmp, 0, 0x5400);
2613 case 15: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2614 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2615 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2616 buf0[3] |= __byte_perm_S (tmp, 0, 0x4000);
2618 case 16: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2619 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2620 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2621 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2623 case 17: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2624 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2625 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2626 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2629 case 18: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2630 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2631 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2632 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2633 buf1[0] |= __byte_perm_S (tmp, 0, 0x5400);
2635 case 19: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2636 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2637 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2638 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2639 buf1[0] |= __byte_perm_S (tmp, 0, 0x4000);
2641 case 20: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2642 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2643 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2644 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2645 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2647 case 21: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2648 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2649 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2650 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2651 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2654 case 22: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2655 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2656 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2657 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2658 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2659 buf1[1] |= __byte_perm_S (tmp, 0, 0x5400);
2661 case 23: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2662 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2663 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2664 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2665 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2666 buf1[1] |= __byte_perm_S (tmp, 0, 0x4000);
2668 case 24: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2669 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2670 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2671 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2672 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2673 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2675 case 25: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2676 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2677 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2678 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2679 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2680 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2683 case 26: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2684 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2685 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2686 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2687 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2688 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2689 buf1[2] |= __byte_perm_S (tmp, 0, 0x5400);
2691 case 27: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2692 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2693 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2694 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2695 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2696 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2697 buf1[2] |= __byte_perm_S (tmp, 0, 0x4000);
2699 case 28: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2700 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2701 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2702 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2703 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2704 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2705 buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
2707 case 29: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2708 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2709 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2710 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2711 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2712 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2713 buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
2716 case 30: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2717 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2718 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2719 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2720 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2721 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2722 buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
2723 buf1[3] |= __byte_perm_S (tmp, 0, 0x5400);
2725 case 31: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2726 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2727 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2728 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2729 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2730 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2731 buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
2732 buf1[3] |= __byte_perm_S (tmp, 0, 0x4000);
2737 #if defined IS_AMD || defined IS_GENERIC
2740 case 1: buf0[0] |= tmp << 0;
2742 case 2: buf0[0] |= tmp << 0 | tmp << 8;
2744 case 3: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2746 case 4: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2748 case 5: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2749 buf0[1] |= tmp << 0;
2751 case 6: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2752 buf0[1] |= tmp << 0 | tmp << 8;
2754 case 7: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2755 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2757 case 8: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2758 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2760 case 9: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2761 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2762 buf0[2] |= tmp << 0;
2764 case 10: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2765 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2766 buf0[2] |= tmp << 0 | tmp << 8;
2768 case 11: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2769 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2770 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2772 case 12: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2773 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2774 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2776 case 13: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2777 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2778 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2779 buf0[3] |= tmp << 0;
2781 case 14: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2782 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2783 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2784 buf0[3] |= tmp << 0 | tmp << 8;
2786 case 15: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2787 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2788 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2789 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2791 case 16: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2792 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2793 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2794 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2796 case 17: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2797 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2798 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2799 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2800 buf1[0] |= tmp << 0;
2802 case 18: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2803 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2804 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2805 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2806 buf1[0] |= tmp << 0 | tmp << 8;
2808 case 19: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2809 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2810 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2811 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2812 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2814 case 20: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2815 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2816 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2817 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2818 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2820 case 21: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2821 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2822 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2823 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2824 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2825 buf1[1] |= tmp << 0;
2827 case 22: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2828 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2829 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2830 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2831 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2832 buf1[1] |= tmp << 0 | tmp << 8;
2834 case 23: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2835 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2836 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2837 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2838 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2839 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2841 case 24: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2842 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2843 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2844 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2845 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2846 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2848 case 25: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2849 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2850 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2851 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2852 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2853 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2854 buf1[2] |= tmp << 0;
2856 case 26: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2857 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2858 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2859 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2860 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2861 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2862 buf1[2] |= tmp << 0 | tmp << 8;
2864 case 27: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2865 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2866 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2867 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2868 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2869 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2870 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2872 case 28: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2873 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2874 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2875 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2876 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2877 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2878 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2880 case 29: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2881 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2882 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2883 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2884 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2885 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2886 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2887 buf1[3] |= tmp << 0;
2889 case 30: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2890 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2891 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2892 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2893 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2894 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2895 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2896 buf1[3] |= tmp << 0 | tmp << 8;
2898 case 31: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2899 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2900 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2901 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2902 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2903 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2904 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2905 buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2915 inline u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2917 if ( in_len == 0) return (in_len);
2918 if ((in_len + p0) >= 32) return (in_len);
2920 const u32 in_len1 = in_len - 1;
2922 const u32 sh = (in_len1 & 3) * 8;
2926 switch (in_len1 / 4)
2928 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2929 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2930 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2931 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2932 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2933 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2934 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2935 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2938 u32 out_len = in_len;
2940 for (u32 i = 0; i < p0; i++)
2942 append_block1 (out_len, buf0, buf1, tmp);
2950 inline u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2952 if ( in_len == 0) return (in_len);
2953 if ((in_len + in_len) >= 32) return (in_len);
2955 u32 out_len = in_len;
2961 tib40[0] = __byte_perm_S (buf0[0], 0, 0x1100);
2962 tib40[1] = __byte_perm_S (buf0[0], 0, 0x3322);
2963 tib40[2] = __byte_perm_S (buf0[1], 0, 0x1100);
2964 tib40[3] = __byte_perm_S (buf0[1], 0, 0x3322);
2965 tib41[0] = __byte_perm_S (buf0[2], 0, 0x1100);
2966 tib41[1] = __byte_perm_S (buf0[2], 0, 0x3322);
2967 tib41[2] = __byte_perm_S (buf0[3], 0, 0x1100);
2968 tib41[3] = __byte_perm_S (buf0[3], 0, 0x3322);
2980 #if defined IS_AMD || defined IS_GENERIC
2981 tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8);
2982 tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8);
2983 tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8);
2984 tib40[3] = ((buf0[1] & 0x00FF0000) >> 16) | ((buf0[1] & 0xFF000000) >> 8);
2985 tib41[0] = ((buf0[2] & 0x000000FF) << 0) | ((buf0[2] & 0x0000FF00) << 8);
2986 tib41[1] = ((buf0[2] & 0x00FF0000) >> 16) | ((buf0[2] & 0xFF000000) >> 8);
2987 tib41[2] = ((buf0[3] & 0x000000FF) << 0) | ((buf0[3] & 0x0000FF00) << 8);
2988 tib41[3] = ((buf0[3] & 0x00FF0000) >> 16) | ((buf0[3] & 0xFF000000) >> 8);
2990 buf0[0] = tib40[0] | (tib40[0] << 8);
2991 buf0[1] = tib40[1] | (tib40[1] << 8);
2992 buf0[2] = tib40[2] | (tib40[2] << 8);
2993 buf0[3] = tib40[3] | (tib40[3] << 8);
2994 buf1[0] = tib41[0] | (tib41[0] << 8);
2995 buf1[1] = tib41[1] | (tib41[1] << 8);
2996 buf1[2] = tib41[2] | (tib41[2] << 8);
2997 buf1[3] = tib41[3] | (tib41[3] << 8);
3000 out_len = out_len + out_len;
3005 inline u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3007 if (in_len < 2) return (in_len);
3010 buf0[0] = __byte_perm_S (buf0[0], 0, 0x3201);
3013 #if defined IS_AMD || defined IS_GENERIC
3014 buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3020 inline u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3022 if (in_len < 2) return (in_len);
3027 case 2: buf0[0] = __byte_perm_S (buf0[0], 0, 0x5401);
3029 case 3: buf0[0] = __byte_perm_S (buf0[0], 0, 0x4120);
3031 case 4: buf0[0] = __byte_perm_S (buf0[0], 0, 0x2310);
3033 case 5: buf0[1] = __byte_perm_S (buf0[1], buf0[0], 0x7210);
3034 buf0[0] = __byte_perm_S (buf0[0], buf0[1], 0x4210);
3035 buf0[1] = __byte_perm_S (buf0[1], 0, 0x6543);
3037 case 6: buf0[1] = __byte_perm_S (buf0[1], 0, 0x5401);
3039 case 7: buf0[1] = __byte_perm_S (buf0[1], 0, 0x4120);
3041 case 8: buf0[1] = __byte_perm_S (buf0[1], 0, 0x2310);
3043 case 9: buf0[2] = __byte_perm_S (buf0[2], buf0[1], 0x7210);
3044 buf0[1] = __byte_perm_S (buf0[1], buf0[2], 0x4210);
3045 buf0[2] = __byte_perm_S (buf0[2], 0, 0x6543);
3047 case 10: buf0[2] = __byte_perm_S (buf0[2], 0, 0x5401);
3049 case 11: buf0[2] = __byte_perm_S (buf0[2], 0, 0x4120);
3051 case 12: buf0[2] = __byte_perm_S (buf0[2], 0, 0x2310);
3053 case 13: buf0[3] = __byte_perm_S (buf0[3], buf0[2], 0x7210);
3054 buf0[2] = __byte_perm_S (buf0[2], buf0[3], 0x4210);
3055 buf0[3] = __byte_perm_S (buf0[3], 0, 0x6543);
3057 case 14: buf0[3] = __byte_perm_S (buf0[3], 0, 0x5401);
3059 case 15: buf0[3] = __byte_perm_S (buf0[3], 0, 0x4120);
3061 case 16: buf0[3] = __byte_perm_S (buf0[3], 0, 0x2310);
3063 case 17: buf1[0] = __byte_perm_S (buf1[0], buf0[3], 0x7210);
3064 buf0[3] = __byte_perm_S (buf0[3], buf1[0], 0x4210);
3065 buf1[0] = __byte_perm_S (buf1[0], 0, 0x6543);
3067 case 18: buf1[0] = __byte_perm_S (buf1[0], 0, 0x5401);
3069 case 19: buf1[0] = __byte_perm_S (buf1[0], 0, 0x4120);
3071 case 20: buf1[0] = __byte_perm_S (buf1[0], 0, 0x2310);
3073 case 21: buf1[1] = __byte_perm_S (buf1[1], buf1[0], 0x7210);
3074 buf1[0] = __byte_perm_S (buf1[0], buf1[1], 0x4210);
3075 buf1[1] = __byte_perm_S (buf1[1], 0, 0x6543);
3077 case 22: buf1[1] = __byte_perm_S (buf1[1], 0, 0x5401);
3079 case 23: buf1[1] = __byte_perm_S (buf1[1], 0, 0x4120);
3081 case 24: buf1[1] = __byte_perm_S (buf1[1], 0, 0x2310);
3083 case 25: buf1[2] = __byte_perm_S (buf1[2], buf1[1], 0x7210);
3084 buf1[1] = __byte_perm_S (buf1[1], buf1[2], 0x4210);
3085 buf1[2] = __byte_perm_S (buf1[2], 0, 0x6543);
3087 case 26: buf1[2] = __byte_perm_S (buf1[2], 0, 0x5401);
3089 case 27: buf1[2] = __byte_perm_S (buf1[2], 0, 0x4120);
3091 case 28: buf1[2] = __byte_perm_S (buf1[2], 0, 0x2310);
3093 case 29: buf1[3] = __byte_perm_S (buf1[3], buf1[2], 0x7210);
3094 buf1[2] = __byte_perm_S (buf1[2], buf1[3], 0x4210);
3095 buf1[3] = __byte_perm_S (buf1[3], 0, 0x6543);
3097 case 30: buf1[3] = __byte_perm_S (buf1[3], 0, 0x5401);
3099 case 31: buf1[3] = __byte_perm_S (buf1[3], 0, 0x4120);
3104 #if defined IS_AMD || defined IS_GENERIC
3107 case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3109 case 3: buf0[0] = (buf0[0] & 0x000000FF) | ((buf0[0] << 8) & 0x00FF0000) | ((buf0[0] >> 8) & 0x0000FF00);
3111 case 4: buf0[0] = (buf0[0] & 0x0000FFFF) | ((buf0[0] << 8) & 0xFF000000) | ((buf0[0] >> 8) & 0x00FF0000);
3113 case 5: buf0[1] = (buf0[0] & 0xFF000000) | buf0[1];
3114 buf0[0] = (buf0[0] & 0x00FFFFFF) | (buf0[1] << 24);
3115 buf0[1] = (buf0[1] >> 24);
3117 case 6: buf0[1] = ((buf0[1] << 8) & 0x0000FF00) | ((buf0[1] >> 8) & 0x000000FF);
3119 case 7: buf0[1] = (buf0[1] & 0x000000FF) | ((buf0[1] << 8) & 0x00FF0000) | ((buf0[1] >> 8) & 0x0000FF00);
3121 case 8: buf0[1] = (buf0[1] & 0x0000FFFF) | ((buf0[1] << 8) & 0xFF000000) | ((buf0[1] >> 8) & 0x00FF0000);
3123 case 9: buf0[2] = (buf0[1] & 0xFF000000) | buf0[2];
3124 buf0[1] = (buf0[1] & 0x00FFFFFF) | (buf0[2] << 24);
3125 buf0[2] = (buf0[2] >> 24);
3127 case 10: buf0[2] = ((buf0[2] << 8) & 0x0000FF00) | ((buf0[2] >> 8) & 0x000000FF);
3129 case 11: buf0[2] = (buf0[2] & 0x000000FF) | ((buf0[2] << 8) & 0x00FF0000) | ((buf0[2] >> 8) & 0x0000FF00);
3131 case 12: buf0[2] = (buf0[2] & 0x0000FFFF) | ((buf0[2] << 8) & 0xFF000000) | ((buf0[2] >> 8) & 0x00FF0000);
3133 case 13: buf0[3] = (buf0[2] & 0xFF000000) | buf0[3];
3134 buf0[2] = (buf0[2] & 0x00FFFFFF) | (buf0[3] << 24);
3135 buf0[3] = (buf0[3] >> 24);
3137 case 14: buf0[3] = ((buf0[3] << 8) & 0x0000FF00) | ((buf0[3] >> 8) & 0x000000FF);
3139 case 15: buf0[3] = (buf0[3] & 0x000000FF) | ((buf0[3] << 8) & 0x00FF0000) | ((buf0[3] >> 8) & 0x0000FF00);
3141 case 16: buf0[3] = (buf0[3] & 0x0000FFFF) | ((buf0[3] << 8) & 0xFF000000) | ((buf0[3] >> 8) & 0x00FF0000);
3143 case 17: buf1[0] = (buf0[3] & 0xFF000000) | buf1[0];
3144 buf0[3] = (buf0[3] & 0x00FFFFFF) | (buf1[0] << 24);
3145 buf1[0] = (buf1[0] >> 24);
3147 case 18: buf1[0] = ((buf1[0] << 8) & 0x0000FF00) | ((buf1[0] >> 8) & 0x000000FF);
3149 case 19: buf1[0] = (buf1[0] & 0x000000FF) | ((buf1[0] << 8) & 0x00FF0000) | ((buf1[0] >> 8) & 0x0000FF00);
3151 case 20: buf1[0] = (buf1[0] & 0x0000FFFF) | ((buf1[0] << 8) & 0xFF000000) | ((buf1[0] >> 8) & 0x00FF0000);
3153 case 21: buf1[1] = (buf1[0] & 0xFF000000) | buf1[1];
3154 buf1[0] = (buf1[0] & 0x00FFFFFF) | (buf1[1] << 24);
3155 buf1[1] = (buf1[1] >> 24);
3157 case 22: buf1[1] = ((buf1[1] << 8) & 0x0000FF00) | ((buf1[1] >> 8) & 0x000000FF);
3159 case 23: buf1[1] = (buf1[1] & 0x000000FF) | ((buf1[1] << 8) & 0x00FF0000) | ((buf1[1] >> 8) & 0x0000FF00);
3161 case 24: buf1[1] = (buf1[1] & 0x0000FFFF) | ((buf1[1] << 8) & 0xFF000000) | ((buf1[1] >> 8) & 0x00FF0000);
3163 case 25: buf1[2] = (buf1[1] & 0xFF000000) | buf1[2];
3164 buf1[1] = (buf1[1] & 0x00FFFFFF) | (buf1[2] << 24);
3165 buf1[2] = (buf1[2] >> 24);
3167 case 26: buf1[2] = ((buf1[2] << 8) & 0x0000FF00) | ((buf1[2] >> 8) & 0x000000FF);
3169 case 27: buf1[2] = (buf1[2] & 0x000000FF) | ((buf1[2] << 8) & 0x00FF0000) | ((buf1[2] >> 8) & 0x0000FF00);
3171 case 28: buf1[2] = (buf1[2] & 0x0000FFFF) | ((buf1[2] << 8) & 0xFF000000) | ((buf1[2] >> 8) & 0x00FF0000);
3173 case 29: buf1[3] = (buf1[2] & 0xFF000000) | buf1[3];
3174 buf1[2] = (buf1[2] & 0x00FFFFFF) | (buf1[3] << 24);
3175 buf1[3] = (buf1[3] >> 24);
3177 case 30: buf1[3] = ((buf1[3] << 8) & 0x0000FF00) | ((buf1[3] >> 8) & 0x000000FF);
3179 case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00);
3187 inline u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3189 if (p0 >= in_len) return (in_len);
3190 if (p1 >= in_len) return (in_len);
3198 case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540);
3200 case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541);
3202 case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542);
3204 case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543);
3206 case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540);
3208 case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541);
3210 case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542);
3212 case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543);
3214 case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540);
3216 case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541);
3218 case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542);
3220 case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543);
3222 case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540);
3224 case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541);
3226 case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542);
3228 case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543);
3230 case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540);
3232 case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541);
3234 case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542);
3236 case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543);
3238 case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540);
3240 case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541);
3242 case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542);
3244 case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543);
3246 case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540);
3248 case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541);
3250 case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542);
3252 case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543);
3254 case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540);
3256 case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541);
3258 case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542);
3260 case 31: tmp0 = __byte_perm_S (buf1[3], 0, 0x6543);
3266 case 0: tmp1 = __byte_perm_S (buf0[0], 0, 0x6540);
3267 buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7650);
3269 case 1: tmp1 = __byte_perm_S (buf0[0], 0, 0x6541);
3270 buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7604);
3272 case 2: tmp1 = __byte_perm_S (buf0[0], 0, 0x6542);
3273 buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7054);
3275 case 3: tmp1 = __byte_perm_S (buf0[0], 0, 0x6543);
3276 buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x0654);
3278 case 4: tmp1 = __byte_perm_S (buf0[1], 0, 0x6540);
3279 buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7650);
3281 case 5: tmp1 = __byte_perm_S (buf0[1], 0, 0x6541);
3282 buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7604);
3284 case 6: tmp1 = __byte_perm_S (buf0[1], 0, 0x6542);
3285 buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7054);
3287 case 7: tmp1 = __byte_perm_S (buf0[1], 0, 0x6543);
3288 buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x0654);
3290 case 8: tmp1 = __byte_perm_S (buf0[2], 0, 0x6540);
3291 buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7650);
3293 case 9: tmp1 = __byte_perm_S (buf0[2], 0, 0x6541);
3294 buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7604);
3296 case 10: tmp1 = __byte_perm_S (buf0[2], 0, 0x6542);
3297 buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7054);
3299 case 11: tmp1 = __byte_perm_S (buf0[2], 0, 0x6543);
3300 buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x0654);
3302 case 12: tmp1 = __byte_perm_S (buf0[3], 0, 0x6540);
3303 buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7650);
3305 case 13: tmp1 = __byte_perm_S (buf0[3], 0, 0x6541);
3306 buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7604);
3308 case 14: tmp1 = __byte_perm_S (buf0[3], 0, 0x6542);
3309 buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7054);
3311 case 15: tmp1 = __byte_perm_S (buf0[3], 0, 0x6543);
3312 buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x0654);
3314 case 16: tmp1 = __byte_perm_S (buf1[0], 0, 0x6540);
3315 buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7650);
3317 case 17: tmp1 = __byte_perm_S (buf1[0], 0, 0x6541);
3318 buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7604);
3320 case 18: tmp1 = __byte_perm_S (buf1[0], 0, 0x6542);
3321 buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7054);
3323 case 19: tmp1 = __byte_perm_S (buf1[0], 0, 0x6543);
3324 buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x0654);
3326 case 20: tmp1 = __byte_perm_S (buf1[1], 0, 0x6540);
3327 buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7650);
3329 case 21: tmp1 = __byte_perm_S (buf1[1], 0, 0x6541);
3330 buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7604);
3332 case 22: tmp1 = __byte_perm_S (buf1[1], 0, 0x6542);
3333 buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7054);
3335 case 23: tmp1 = __byte_perm_S (buf1[1], 0, 0x6543);
3336 buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x0654);
3338 case 24: tmp1 = __byte_perm_S (buf1[2], 0, 0x6540);
3339 buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7650);
3341 case 25: tmp1 = __byte_perm_S (buf1[2], 0, 0x6541);
3342 buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7604);
3344 case 26: tmp1 = __byte_perm_S (buf1[2], 0, 0x6542);
3345 buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7054);
3347 case 27: tmp1 = __byte_perm_S (buf1[2], 0, 0x6543);
3348 buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x0654);
3350 case 28: tmp1 = __byte_perm_S (buf1[3], 0, 0x6540);
3351 buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7650);
3353 case 29: tmp1 = __byte_perm_S (buf1[3], 0, 0x6541);
3354 buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7604);
3356 case 30: tmp1 = __byte_perm_S (buf1[3], 0, 0x6542);
3357 buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7054);
3359 case 31: tmp1 = __byte_perm_S (buf1[3], 0, 0x6543);
3360 buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x0654);
3366 case 0: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7650);
3368 case 1: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7604);
3370 case 2: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7054);
3372 case 3: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x0654);
3374 case 4: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7650);
3376 case 5: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7604);
3378 case 6: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7054);
3380 case 7: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x0654);
3382 case 8: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7650);
3384 case 9: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7604);
3386 case 10: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7054);
3388 case 11: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x0654);
3390 case 12: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7650);
3392 case 13: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7604);
3394 case 14: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7054);
3396 case 15: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x0654);
3398 case 16: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7650);
3400 case 17: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7604);
3402 case 18: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7054);
3404 case 19: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x0654);
3406 case 20: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7650);
3408 case 21: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7604);
3410 case 22: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7054);
3412 case 23: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x0654);
3414 case 24: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7650);
3416 case 25: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7604);
3418 case 26: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7054);
3420 case 27: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x0654);
3422 case 28: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7650);
3424 case 29: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7604);
3426 case 30: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7054);
3428 case 31: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x0654);
3433 #if defined IS_AMD || defined IS_GENERIC
3436 case 0: tmp0 = (buf0[0] >> 0) & 0xFF;
3438 case 1: tmp0 = (buf0[0] >> 8) & 0xFF;
3440 case 2: tmp0 = (buf0[0] >> 16) & 0xFF;
3442 case 3: tmp0 = (buf0[0] >> 24) & 0xFF;
3444 case 4: tmp0 = (buf0[1] >> 0) & 0xFF;
3446 case 5: tmp0 = (buf0[1] >> 8) & 0xFF;
3448 case 6: tmp0 = (buf0[1] >> 16) & 0xFF;
3450 case 7: tmp0 = (buf0[1] >> 24) & 0xFF;
3452 case 8: tmp0 = (buf0[2] >> 0) & 0xFF;
3454 case 9: tmp0 = (buf0[2] >> 8) & 0xFF;
3456 case 10: tmp0 = (buf0[2] >> 16) & 0xFF;
3458 case 11: tmp0 = (buf0[2] >> 24) & 0xFF;
3460 case 12: tmp0 = (buf0[3] >> 0) & 0xFF;
3462 case 13: tmp0 = (buf0[3] >> 8) & 0xFF;
3464 case 14: tmp0 = (buf0[3] >> 16) & 0xFF;
3466 case 15: tmp0 = (buf0[3] >> 24) & 0xFF;
3468 case 16: tmp0 = (buf1[0] >> 0) & 0xFF;
3470 case 17: tmp0 = (buf1[0] >> 8) & 0xFF;
3472 case 18: tmp0 = (buf1[0] >> 16) & 0xFF;
3474 case 19: tmp0 = (buf1[0] >> 24) & 0xFF;
3476 case 20: tmp0 = (buf1[1] >> 0) & 0xFF;
3478 case 21: tmp0 = (buf1[1] >> 8) & 0xFF;
3480 case 22: tmp0 = (buf1[1] >> 16) & 0xFF;
3482 case 23: tmp0 = (buf1[1] >> 24) & 0xFF;
3484 case 24: tmp0 = (buf1[2] >> 0) & 0xFF;
3486 case 25: tmp0 = (buf1[2] >> 8) & 0xFF;
3488 case 26: tmp0 = (buf1[2] >> 16) & 0xFF;
3490 case 27: tmp0 = (buf1[2] >> 24) & 0xFF;
3492 case 28: tmp0 = (buf1[3] >> 0) & 0xFF;
3494 case 29: tmp0 = (buf1[3] >> 8) & 0xFF;
3496 case 30: tmp0 = (buf1[3] >> 16) & 0xFF;
3498 case 31: tmp0 = (buf1[3] >> 24) & 0xFF;
3504 case 0: tmp1 = (buf0[0] >> 0) & 0xff;
3505 buf0[0] = (buf0[0] & 0xffffff00) | tmp0 << 0;
3507 case 1: tmp1 = (buf0[0] >> 8) & 0xff;
3508 buf0[0] = (buf0[0] & 0xffff00ff) | tmp0 << 8;
3510 case 2: tmp1 = (buf0[0] >> 16) & 0xff;
3511 buf0[0] = (buf0[0] & 0xff00ffff) | tmp0 << 16;
3513 case 3: tmp1 = (buf0[0] >> 24) & 0xff;
3514 buf0[0] = (buf0[0] & 0x00ffffff) | tmp0 << 24;
3516 case 4: tmp1 = (buf0[1] >> 0) & 0xff;
3517 buf0[1] = (buf0[1] & 0xffffff00) | tmp0 << 0;
3519 case 5: tmp1 = (buf0[1] >> 8) & 0xff;
3520 buf0[1] = (buf0[1] & 0xffff00ff) | tmp0 << 8;
3522 case 6: tmp1 = (buf0[1] >> 16) & 0xff;
3523 buf0[1] = (buf0[1] & 0xff00ffff) | tmp0 << 16;
3525 case 7: tmp1 = (buf0[1] >> 24) & 0xff;
3526 buf0[1] = (buf0[1] & 0x00ffffff) | tmp0 << 24;
3528 case 8: tmp1 = (buf0[2] >> 0) & 0xff;
3529 buf0[2] = (buf0[2] & 0xffffff00) | tmp0 << 0;
3531 case 9: tmp1 = (buf0[2] >> 8) & 0xff;
3532 buf0[2] = (buf0[2] & 0xffff00ff) | tmp0 << 8;
3534 case 10: tmp1 = (buf0[2] >> 16) & 0xff;
3535 buf0[2] = (buf0[2] & 0xff00ffff) | tmp0 << 16;
3537 case 11: tmp1 = (buf0[2] >> 24) & 0xff;
3538 buf0[2] = (buf0[2] & 0x00ffffff) | tmp0 << 24;
3540 case 12: tmp1 = (buf0[3] >> 0) & 0xff;
3541 buf0[3] = (buf0[3] & 0xffffff00) | tmp0 << 0;
3543 case 13: tmp1 = (buf0[3] >> 8) & 0xff;
3544 buf0[3] = (buf0[3] & 0xffff00ff) | tmp0 << 8;
3546 case 14: tmp1 = (buf0[3] >> 16) & 0xff;
3547 buf0[3] = (buf0[3] & 0xff00ffff) | tmp0 << 16;
3549 case 15: tmp1 = (buf0[3] >> 24) & 0xff;
3550 buf0[3] = (buf0[3] & 0x00ffffff) | tmp0 << 24;
3552 case 16: tmp1 = (buf1[0] >> 0) & 0xff;
3553 buf1[0] = (buf1[0] & 0xffffff00) | tmp0 << 0;
3555 case 17: tmp1 = (buf1[0] >> 8) & 0xff;
3556 buf1[0] = (buf1[0] & 0xffff00ff) | tmp0 << 8;
3558 case 18: tmp1 = (buf1[0] >> 16) & 0xff;
3559 buf1[0] = (buf1[0] & 0xff00ffff) | tmp0 << 16;
3561 case 19: tmp1 = (buf1[0] >> 24) & 0xff;
3562 buf1[0] = (buf1[0] & 0x00ffffff) | tmp0 << 24;
3564 case 20: tmp1 = (buf1[1] >> 0) & 0xff;
3565 buf1[1] = (buf1[1] & 0xffffff00) | tmp0 << 0;
3567 case 21: tmp1 = (buf1[1] >> 8) & 0xff;
3568 buf1[1] = (buf1[1] & 0xffff00ff) | tmp0 << 8;
3570 case 22: tmp1 = (buf1[1] >> 16) & 0xff;
3571 buf1[1] = (buf1[1] & 0xff00ffff) | tmp0 << 16;
3573 case 23: tmp1 = (buf1[1] >> 24) & 0xff;
3574 buf1[1] = (buf1[1] & 0x00ffffff) | tmp0 << 24;
3576 case 24: tmp1 = (buf1[2] >> 0) & 0xff;
3577 buf1[2] = (buf1[2] & 0xffffff00) | tmp0 << 0;
3579 case 25: tmp1 = (buf1[2] >> 8) & 0xff;
3580 buf1[2] = (buf1[2] & 0xffff00ff) | tmp0 << 8;
3582 case 26: tmp1 = (buf1[2] >> 16) & 0xff;
3583 buf1[2] = (buf1[2] & 0xff00ffff) | tmp0 << 16;
3585 case 27: tmp1 = (buf1[2] >> 24) & 0xff;
3586 buf1[2] = (buf1[2] & 0x00ffffff) | tmp0 << 24;
3588 case 28: tmp1 = (buf1[3] >> 0) & 0xff;
3589 buf1[3] = (buf1[3] & 0xffffff00) | tmp0 << 0;
3591 case 29: tmp1 = (buf1[3] >> 8) & 0xff;
3592 buf1[3] = (buf1[3] & 0xffff00ff) | tmp0 << 8;
3594 case 30: tmp1 = (buf1[3] >> 16) & 0xff;
3595 buf1[3] = (buf1[3] & 0xff00ffff) | tmp0 << 16;
3597 case 31: tmp1 = (buf1[3] >> 24) & 0xff;
3598 buf1[3] = (buf1[3] & 0x00ffffff) | tmp0 << 24;
3604 case 0: buf0[0] = (buf0[0] & 0xffffff00) | tmp1 << 0;
3606 case 1: buf0[0] = (buf0[0] & 0xffff00ff) | tmp1 << 8;
3608 case 2: buf0[0] = (buf0[0] & 0xff00ffff) | tmp1 << 16;
3610 case 3: buf0[0] = (buf0[0] & 0x00ffffff) | tmp1 << 24;
3612 case 4: buf0[1] = (buf0[1] & 0xffffff00) | tmp1 << 0;
3614 case 5: buf0[1] = (buf0[1] & 0xffff00ff) | tmp1 << 8;
3616 case 6: buf0[1] = (buf0[1] & 0xff00ffff) | tmp1 << 16;
3618 case 7: buf0[1] = (buf0[1] & 0x00ffffff) | tmp1 << 24;
3620 case 8: buf0[2] = (buf0[2] & 0xffffff00) | tmp1 << 0;
3622 case 9: buf0[2] = (buf0[2] & 0xffff00ff) | tmp1 << 8;
3624 case 10: buf0[2] = (buf0[2] & 0xff00ffff) | tmp1 << 16;
3626 case 11: buf0[2] = (buf0[2] & 0x00ffffff) | tmp1 << 24;
3628 case 12: buf0[3] = (buf0[3] & 0xffffff00) | tmp1 << 0;
3630 case 13: buf0[3] = (buf0[3] & 0xffff00ff) | tmp1 << 8;
3632 case 14: buf0[3] = (buf0[3] & 0xff00ffff) | tmp1 << 16;
3634 case 15: buf0[3] = (buf0[3] & 0x00ffffff) | tmp1 << 24;
3636 case 16: buf1[0] = (buf1[0] & 0xffffff00) | tmp1 << 0;
3638 case 17: buf1[0] = (buf1[0] & 0xffff00ff) | tmp1 << 8;
3640 case 18: buf1[0] = (buf1[0] & 0xff00ffff) | tmp1 << 16;
3642 case 19: buf1[0] = (buf1[0] & 0x00ffffff) | tmp1 << 24;
3644 case 20: buf1[1] = (buf1[1] & 0xffffff00) | tmp1 << 0;
3646 case 21: buf1[1] = (buf1[1] & 0xffff00ff) | tmp1 << 8;
3648 case 22: buf1[1] = (buf1[1] & 0xff00ffff) | tmp1 << 16;
3650 case 23: buf1[1] = (buf1[1] & 0x00ffffff) | tmp1 << 24;
3652 case 24: buf1[2] = (buf1[2] & 0xffffff00) | tmp1 << 0;
3654 case 25: buf1[2] = (buf1[2] & 0xffff00ff) | tmp1 << 8;
3656 case 26: buf1[2] = (buf1[2] & 0xff00ffff) | tmp1 << 16;
3658 case 27: buf1[2] = (buf1[2] & 0x00ffffff) | tmp1 << 24;
3660 case 28: buf1[3] = (buf1[3] & 0xffffff00) | tmp1 << 0;
3662 case 29: buf1[3] = (buf1[3] & 0xffff00ff) | tmp1 << 8;
3664 case 30: buf1[3] = (buf1[3] & 0xff00ffff) | tmp1 << 16;
3666 case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24;
3674 inline u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3676 if (p0 >= in_len) return (in_len);
3678 const u32 mr = 0xffu << ((p0 & 3) * 8);
3683 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) << 1) & mr); break;
3684 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) << 1) & mr); break;
3685 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) << 1) & mr); break;
3686 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) << 1) & mr); break;
3687 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) << 1) & mr); break;
3688 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) << 1) & mr); break;
3689 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) << 1) & mr); break;
3690 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) << 1) & mr); break;
3696 inline u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3698 if (p0 >= in_len) return (in_len);
3700 const u32 mr = 0xffu << ((p0 & 3) * 8);
3705 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) >> 1) & mr); break;
3706 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) >> 1) & mr); break;
3707 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) >> 1) & mr); break;
3708 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) >> 1) & mr); break;
3709 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) >> 1) & mr); break;
3710 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) >> 1) & mr); break;
3711 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) >> 1) & mr); break;
3712 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) >> 1) & mr); break;
3718 inline u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3720 if (p0 >= in_len) return (in_len);
3722 const u32 mr = 0xffu << ((p0 & 3) * 8);
3725 const u32 n = 0x01010101 & mr;
3729 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) + n) & mr); break;
3730 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) + n) & mr); break;
3731 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) + n) & mr); break;
3732 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) + n) & mr); break;
3733 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) + n) & mr); break;
3734 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) + n) & mr); break;
3735 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) + n) & mr); break;
3736 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) + n) & mr); break;
3742 inline u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3744 if (p0 >= in_len) return (in_len);
3746 const u32 mr = 0xffu << ((p0 & 3) * 8);
3749 const u32 n = 0x01010101 & mr;
3753 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) - n) & mr); break;
3754 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) - n) & mr); break;
3755 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) - n) & mr); break;
3756 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) - n) & mr); break;
3757 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) - n) & mr); break;
3758 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) - n) & mr); break;
3759 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) - n) & mr); break;
3760 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) - n) & mr); break;
3766 inline u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3768 if ((p0 + 1) >= in_len) return (in_len);
3773 lshift_block (buf0, buf1, tib40, tib41);
3775 const u32 mr = 0xffu << ((p0 & 3) * 8);
3780 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3781 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3782 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3783 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3784 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3785 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3786 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3787 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3793 inline u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3795 if (p0 == 0) return (in_len);
3797 if (p0 >= in_len) return (in_len);
3802 rshift_block (buf0, buf1, tib40, tib41);
3804 const u32 mr = 0xffu << ((p0 & 3) * 8);
3809 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3810 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3811 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3812 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3813 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3814 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3815 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3816 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3822 inline u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3824 if (p0 > in_len) return (in_len);
3826 if ((in_len + p0) >= 32) return (in_len);
3828 u32 out_len = in_len;
3842 truncate_right (tib40, tib41, p0);
3844 rshift_block_N (buf0, buf1, buf0, buf1, p0);
3846 buf0[0] |= tib40[0];
3847 buf0[1] |= tib40[1];
3848 buf0[2] |= tib40[2];
3849 buf0[3] |= tib40[3];
3850 buf1[0] |= tib41[0];
3851 buf1[1] |= tib41[1];
3852 buf1[2] |= tib41[2];
3853 buf1[3] |= tib41[3];
3860 inline u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3862 if (p0 > in_len) return (in_len);
3864 if ((in_len + p0) >= 32) return (in_len);
3866 u32 out_len = in_len;
3871 rshift_block_N (buf0, buf1, tib40, tib41, p0);
3873 truncate_left (tib40, tib41, out_len);
3875 buf0[0] |= tib40[0];
3876 buf0[1] |= tib40[1];
3877 buf0[2] |= tib40[2];
3878 buf0[3] |= tib40[3];
3879 buf1[0] |= tib41[0];
3880 buf1[1] |= tib41[1];
3881 buf1[2] |= tib41[2];
3882 buf1[3] |= tib41[3];
3889 inline u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3891 buf0[0] |= (generate_cmask (buf0[0]));
3892 buf0[1] |= (generate_cmask (buf0[1]));
3893 buf0[2] |= (generate_cmask (buf0[2]));
3894 buf0[3] |= (generate_cmask (buf0[3]));
3895 buf1[0] |= (generate_cmask (buf1[0]));
3896 buf1[1] |= (generate_cmask (buf1[1]));
3897 buf1[2] |= (generate_cmask (buf1[2]));
3898 buf1[3] |= (generate_cmask (buf1[3]));
3901 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
3903 for (u32 i = 0; i < in_len; i++)
3910 case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540);
3911 tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break;
3912 case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541);
3913 tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break;
3914 case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542);
3915 tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break;
3916 case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543);
3917 tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break;
3918 case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540);
3919 tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break;
3920 case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541);
3921 tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break;
3922 case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542);
3923 tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break;
3924 case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543);
3925 tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break;
3926 case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540);
3927 tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break;
3928 case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541);
3929 tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break;
3930 case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542);
3931 tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break;
3932 case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543);
3933 tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break;
3934 case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540);
3935 tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break;
3936 case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541);
3937 tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break;
3938 case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542);
3939 tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break;
3940 case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543);
3941 tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break;
3942 case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540);
3943 tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break;
3944 case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541);
3945 tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break;
3946 case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542);
3947 tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break;
3948 case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543);
3949 tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break;
3950 case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540);
3951 tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break;
3952 case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541);
3953 tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break;
3954 case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542);
3955 tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break;
3956 case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543);
3957 tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break;
3958 case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540);
3959 tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break;
3960 case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541);
3961 tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break;
3962 case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542);
3963 tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break;
3964 case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543);
3965 tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break;
3966 case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540);
3967 tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break;
3968 case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541);
3969 tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break;
3970 case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542);
3971 tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break;
3976 if (tmp0 == ' ') buf0[0] &= tmp1 ;
3980 if (tmp0 == ' ') buf0[1] &= tmp1 ;
3984 if (tmp0 == ' ') buf0[2] &= tmp1 ;
3988 if (tmp0 == ' ') buf0[3] &= tmp1 ;
3992 if (tmp0 == ' ') buf1[0] &= tmp1 ;
3996 if (tmp0 == ' ') buf1[1] &= tmp1 ;
4000 if (tmp0 == ' ') buf1[2] &= tmp1 ;
4004 if (tmp0 == ' ') buf1[3] &= tmp1 ;
4009 #if defined IS_AMD || defined IS_GENERIC
4013 const uchar4 tmp0 = (uchar4) (' ');
4014 const uchar4 tmp1 = (uchar4) (0x00);
4015 const uchar4 tmp2 = (uchar4) (0xff);
4019 tmp = as_uchar4 (buf0[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[0] = as_uint (tmp);
4020 tmp = as_uchar4 (buf0[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[1] = as_uint (tmp);
4021 tmp = as_uchar4 (buf0[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[2] = as_uint (tmp);
4022 tmp = as_uchar4 (buf0[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[3] = as_uint (tmp);
4023 tmp = as_uchar4 (buf1[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[0] = as_uint (tmp);
4024 tmp = as_uchar4 (buf1[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[1] = as_uint (tmp);
4025 tmp = as_uchar4 (buf1[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[2] = as_uint (tmp);
4026 tmp = as_uchar4 (buf1[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[3] = as_uint (tmp);
4028 rshift_block (tib40, tib41, tib40, tib41); tib40[0] |= 0xff;
4030 buf0[0] &= ~(generate_cmask (buf0[0]) & tib40[0]);
4031 buf0[1] &= ~(generate_cmask (buf0[1]) & tib40[1]);
4032 buf0[2] &= ~(generate_cmask (buf0[2]) & tib40[2]);
4033 buf0[3] &= ~(generate_cmask (buf0[3]) & tib40[3]);
4034 buf1[0] &= ~(generate_cmask (buf1[0]) & tib41[0]);
4035 buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]);
4036 buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]);
4037 buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]);
4043 inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
4045 u32 out_len = in_len;
4049 case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break;
4050 case RULE_OP_MANGLE_UREST: out_len = rule_op_mangle_urest (p0, p1, buf0, buf1, out_len); break;
4051 case RULE_OP_MANGLE_LREST_UFIRST: out_len = rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, out_len); break;
4052 case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break;
4053 case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break;
4054 case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break;
4055 case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break;
4056 case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break;
4057 case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break;
4058 case RULE_OP_MANGLE_REFLECT: out_len = rule_op_mangle_reflect (p0, p1, buf0, buf1, out_len); break;
4059 case RULE_OP_MANGLE_APPEND: out_len = rule_op_mangle_append (p0, p1, buf0, buf1, out_len); break;
4060 case RULE_OP_MANGLE_PREPEND: out_len = rule_op_mangle_prepend (p0, p1, buf0, buf1, out_len); break;
4061 case RULE_OP_MANGLE_ROTATE_LEFT: out_len = rule_op_mangle_rotate_left (p0, p1, buf0, buf1, out_len); break;
4062 case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = rule_op_mangle_rotate_right (p0, p1, buf0, buf1, out_len); break;
4063 case RULE_OP_MANGLE_DELETE_FIRST: out_len = rule_op_mangle_delete_first (p0, p1, buf0, buf1, out_len); break;
4064 case RULE_OP_MANGLE_DELETE_LAST: out_len = rule_op_mangle_delete_last (p0, p1, buf0, buf1, out_len); break;
4065 case RULE_OP_MANGLE_DELETE_AT: out_len = rule_op_mangle_delete_at (p0, p1, buf0, buf1, out_len); break;
4066 case RULE_OP_MANGLE_EXTRACT: out_len = rule_op_mangle_extract (p0, p1, buf0, buf1, out_len); break;
4067 case RULE_OP_MANGLE_OMIT: out_len = rule_op_mangle_omit (p0, p1, buf0, buf1, out_len); break;
4068 case RULE_OP_MANGLE_INSERT: out_len = rule_op_mangle_insert (p0, p1, buf0, buf1, out_len); break;
4069 case RULE_OP_MANGLE_OVERSTRIKE: out_len = rule_op_mangle_overstrike (p0, p1, buf0, buf1, out_len); break;
4070 case RULE_OP_MANGLE_TRUNCATE_AT: out_len = rule_op_mangle_truncate_at (p0, p1, buf0, buf1, out_len); break;
4071 case RULE_OP_MANGLE_REPLACE: out_len = rule_op_mangle_replace (p0, p1, buf0, buf1, out_len); break;
4072 //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
4073 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
4074 case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = rule_op_mangle_dupechar_first (p0, p1, buf0, buf1, out_len); break;
4075 case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = rule_op_mangle_dupechar_last (p0, p1, buf0, buf1, out_len); break;
4076 case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = rule_op_mangle_dupechar_all (p0, p1, buf0, buf1, out_len); break;
4077 case RULE_OP_MANGLE_SWITCH_FIRST: out_len = rule_op_mangle_switch_first (p0, p1, buf0, buf1, out_len); break;
4078 case RULE_OP_MANGLE_SWITCH_LAST: out_len = rule_op_mangle_switch_last (p0, p1, buf0, buf1, out_len); break;
4079 case RULE_OP_MANGLE_SWITCH_AT: out_len = rule_op_mangle_switch_at (p0, p1, buf0, buf1, out_len); break;
4080 case RULE_OP_MANGLE_CHR_SHIFTL: out_len = rule_op_mangle_chr_shiftl (p0, p1, buf0, buf1, out_len); break;
4081 case RULE_OP_MANGLE_CHR_SHIFTR: out_len = rule_op_mangle_chr_shiftr (p0, p1, buf0, buf1, out_len); break;
4082 case RULE_OP_MANGLE_CHR_INCR: out_len = rule_op_mangle_chr_incr (p0, p1, buf0, buf1, out_len); break;
4083 case RULE_OP_MANGLE_CHR_DECR: out_len = rule_op_mangle_chr_decr (p0, p1, buf0, buf1, out_len); break;
4084 case RULE_OP_MANGLE_REPLACE_NP1: out_len = rule_op_mangle_replace_np1 (p0, p1, buf0, buf1, out_len); break;
4085 case RULE_OP_MANGLE_REPLACE_NM1: out_len = rule_op_mangle_replace_nm1 (p0, p1, buf0, buf1, out_len); break;
4086 case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = rule_op_mangle_dupeblock_first (p0, p1, buf0, buf1, out_len); break;
4087 case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = rule_op_mangle_dupeblock_last (p0, p1, buf0, buf1, out_len); break;
4088 case RULE_OP_MANGLE_TITLE: out_len = rule_op_mangle_title (p0, p1, buf0, buf1, out_len); break;
4094 inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
4098 for (u32 i = 0; cmds[i] != 0; i++)
4100 const u32 cmd = cmds[i];
4102 const u32 name = (cmd >> 0) & 0xff;
4103 const u32 p0 = (cmd >> 8) & 0xff;
4104 const u32 p1 = (cmd >> 16) & 0xff;
4106 out_len = apply_rule (name, p0, p1, buf0, buf1, out_len);
4112 inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4])
4125 return apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
4134 for (int i = 0; i < VECT_SIZE; i++)
4139 tmp0[0] = pw_buf0[0];
4140 tmp0[1] = pw_buf0[1];
4141 tmp0[2] = pw_buf0[2];
4142 tmp0[3] = pw_buf0[3];
4143 tmp1[0] = pw_buf1[0];
4144 tmp1[1] = pw_buf1[1];
4145 tmp1[2] = pw_buf1[2];
4146 tmp1[3] = pw_buf1[3];
4148 const u32 tmp_len = apply_rules (rules_buf[il_pos + i].cmds, tmp0, tmp1, pw_len);
4162 out_len.s0 = tmp_len;
4174 out_len.s1 = tmp_len;
4188 out_len.s2 = tmp_len;
4200 out_len.s3 = tmp_len;
4214 out_len.s4 = tmp_len;
4226 out_len.s5 = tmp_len;
4238 out_len.s6 = tmp_len;
4250 out_len.s7 = tmp_len;
4264 out_len.s8 = tmp_len;
4276 out_len.s9 = tmp_len;
4288 out_len.sa = tmp_len;
4300 out_len.sb = tmp_len;
4312 out_len.sc = tmp_len;
4324 out_len.sd = tmp_len;
4336 out_len.se = tmp_len;
4348 out_len.sf = tmp_len;