2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
8 inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len);
9 inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len);
10 inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4]);
12 inline u32 generate_cmask (u32 buf)
14 const u32 rmask = ((buf & 0x40404040) >> 1)
15 & ~((buf & 0x80808080) >> 2);
17 const u32 hmask = (buf & 0x1f1f1f1f) + 0x05050505;
18 const u32 lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f;
20 return rmask & ~hmask & lmask;
23 inline void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
25 const u32 tmp = (1 << ((len % 4) * 8)) - 1;
76 inline void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
78 const u32 tmp = ~((1 << ((len % 4) * 8)) - 1);
129 inline void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
131 out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
132 out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
133 out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
134 out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
135 out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
136 out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
137 out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
138 out1[3] = amd_bytealign_S ( 0, in1[3], 1);
141 inline void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
143 out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
144 out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
145 out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
146 out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
147 out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
148 out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
149 out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
150 out0[0] = amd_bytealign_S (in0[0], 0, 3);
153 inline void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
157 case 0: out0[0] = in0[0];
166 case 1: out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
167 out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
168 out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
169 out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
170 out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
171 out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
172 out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
173 out1[3] = amd_bytealign_S ( 0, in1[3], 1);
175 case 2: out0[0] = amd_bytealign_S (in0[1], in0[0], 2);
176 out0[1] = amd_bytealign_S (in0[2], in0[1], 2);
177 out0[2] = amd_bytealign_S (in0[3], in0[2], 2);
178 out0[3] = amd_bytealign_S (in1[0], in0[3], 2);
179 out1[0] = amd_bytealign_S (in1[1], in1[0], 2);
180 out1[1] = amd_bytealign_S (in1[2], in1[1], 2);
181 out1[2] = amd_bytealign_S (in1[3], in1[2], 2);
182 out1[3] = amd_bytealign_S ( 0, in1[3], 2);
184 case 3: out0[0] = amd_bytealign_S (in0[1], in0[0], 3);
185 out0[1] = amd_bytealign_S (in0[2], in0[1], 3);
186 out0[2] = amd_bytealign_S (in0[3], in0[2], 3);
187 out0[3] = amd_bytealign_S (in1[0], in0[3], 3);
188 out1[0] = amd_bytealign_S (in1[1], in1[0], 3);
189 out1[1] = amd_bytealign_S (in1[2], in1[1], 3);
190 out1[2] = amd_bytealign_S (in1[3], in1[2], 3);
191 out1[3] = amd_bytealign_S ( 0, in1[3], 3);
193 case 4: out0[0] = in0[1];
202 case 5: out0[0] = amd_bytealign_S (in0[2], in0[1], 1);
203 out0[1] = amd_bytealign_S (in0[3], in0[2], 1);
204 out0[2] = amd_bytealign_S (in1[0], in0[3], 1);
205 out0[3] = amd_bytealign_S (in1[1], in1[0], 1);
206 out1[0] = amd_bytealign_S (in1[2], in1[1], 1);
207 out1[1] = amd_bytealign_S (in1[3], in1[2], 1);
208 out1[2] = amd_bytealign_S ( 0, in1[3], 1);
211 case 6: out0[0] = amd_bytealign_S (in0[2], in0[1], 2);
212 out0[1] = amd_bytealign_S (in0[3], in0[2], 2);
213 out0[2] = amd_bytealign_S (in1[0], in0[3], 2);
214 out0[3] = amd_bytealign_S (in1[1], in1[0], 2);
215 out1[0] = amd_bytealign_S (in1[2], in1[1], 2);
216 out1[1] = amd_bytealign_S (in1[3], in1[2], 2);
217 out1[2] = amd_bytealign_S ( 0, in1[3], 2);
220 case 7: out0[0] = amd_bytealign_S (in0[2], in0[1], 3);
221 out0[1] = amd_bytealign_S (in0[3], in0[2], 3);
222 out0[2] = amd_bytealign_S (in1[0], in0[3], 3);
223 out0[3] = amd_bytealign_S (in1[1], in1[0], 3);
224 out1[0] = amd_bytealign_S (in1[2], in1[1], 3);
225 out1[1] = amd_bytealign_S (in1[3], in1[2], 3);
226 out1[2] = amd_bytealign_S ( 0, in1[3], 3);
229 case 8: out0[0] = in0[2];
238 case 9: out0[0] = amd_bytealign_S (in0[3], in0[2], 1);
239 out0[1] = amd_bytealign_S (in1[0], in0[3], 1);
240 out0[2] = amd_bytealign_S (in1[1], in1[0], 1);
241 out0[3] = amd_bytealign_S (in1[2], in1[1], 1);
242 out1[0] = amd_bytealign_S (in1[3], in1[2], 1);
243 out1[1] = amd_bytealign_S ( 0, in1[3], 1);
247 case 10: out0[0] = amd_bytealign_S (in0[3], in0[2], 2);
248 out0[1] = amd_bytealign_S (in1[0], in0[3], 2);
249 out0[2] = amd_bytealign_S (in1[1], in1[0], 2);
250 out0[3] = amd_bytealign_S (in1[2], in1[1], 2);
251 out1[0] = amd_bytealign_S (in1[3], in1[2], 2);
252 out1[1] = amd_bytealign_S ( 0, in1[3], 2);
256 case 11: out0[0] = amd_bytealign_S (in0[3], in0[2], 3);
257 out0[1] = amd_bytealign_S (in1[0], in0[3], 3);
258 out0[2] = amd_bytealign_S (in1[1], in1[0], 3);
259 out0[3] = amd_bytealign_S (in1[2], in1[1], 3);
260 out1[0] = amd_bytealign_S (in1[3], in1[2], 3);
261 out1[1] = amd_bytealign_S ( 0, in1[3], 3);
265 case 12: out0[0] = in0[3];
274 case 13: out0[0] = amd_bytealign_S (in1[0], in0[3], 1);
275 out0[1] = amd_bytealign_S (in1[1], in1[0], 1);
276 out0[2] = amd_bytealign_S (in1[2], in1[1], 1);
277 out0[3] = amd_bytealign_S (in1[3], in1[2], 1);
278 out1[0] = amd_bytealign_S ( 0, in1[3], 1);
283 case 14: out0[0] = amd_bytealign_S (in1[0], in0[3], 2);
284 out0[1] = amd_bytealign_S (in1[1], in1[0], 2);
285 out0[2] = amd_bytealign_S (in1[2], in1[1], 2);
286 out0[3] = amd_bytealign_S (in1[3], in1[2], 2);
287 out1[0] = amd_bytealign_S ( 0, in1[3], 2);
292 case 15: out0[0] = amd_bytealign_S (in1[0], in0[3], 3);
293 out0[1] = amd_bytealign_S (in1[1], in1[0], 3);
294 out0[2] = amd_bytealign_S (in1[2], in1[1], 3);
295 out0[3] = amd_bytealign_S (in1[3], in1[2], 3);
296 out1[0] = amd_bytealign_S ( 0, in1[3], 3);
301 case 16: out0[0] = in1[0];
310 case 17: out0[0] = amd_bytealign_S (in1[1], in1[0], 1);
311 out0[1] = amd_bytealign_S (in1[2], in1[1], 1);
312 out0[2] = amd_bytealign_S (in1[3], in1[2], 1);
313 out0[3] = amd_bytealign_S ( 0, in1[3], 1);
319 case 18: out0[0] = amd_bytealign_S (in1[1], in1[0], 2);
320 out0[1] = amd_bytealign_S (in1[2], in1[1], 2);
321 out0[2] = amd_bytealign_S (in1[3], in1[2], 2);
322 out0[3] = amd_bytealign_S ( 0, in1[3], 2);
328 case 19: out0[0] = amd_bytealign_S (in1[1], in1[0], 3);
329 out0[1] = amd_bytealign_S (in1[2], in1[1], 3);
330 out0[2] = amd_bytealign_S (in1[3], in1[2], 3);
331 out0[3] = amd_bytealign_S ( 0, in1[3], 3);
337 case 20: out0[0] = in1[1];
346 case 21: out0[0] = amd_bytealign_S (in1[2], in1[1], 1);
347 out0[1] = amd_bytealign_S (in1[3], in1[2], 1);
348 out0[2] = amd_bytealign_S ( 0, in1[3], 1);
355 case 22: out0[0] = amd_bytealign_S (in1[2], in1[1], 2);
356 out0[1] = amd_bytealign_S (in1[3], in1[2], 2);
357 out0[2] = amd_bytealign_S ( 0, in1[3], 2);
364 case 23: out0[0] = amd_bytealign_S (in1[2], in1[1], 3);
365 out0[1] = amd_bytealign_S (in1[3], in1[2], 3);
366 out0[2] = amd_bytealign_S ( 0, in1[3], 3);
373 case 24: out0[0] = in1[2];
382 case 25: out0[0] = amd_bytealign_S (in1[3], in1[2], 1);
383 out0[1] = amd_bytealign_S ( 0, in1[3], 1);
391 case 26: out0[0] = amd_bytealign_S (in1[3], in1[2], 2);
392 out0[1] = amd_bytealign_S ( 0, in1[3], 2);
400 case 27: out0[0] = amd_bytealign_S (in1[3], in1[2], 3);
401 out0[1] = amd_bytealign_S ( 0, in1[3], 3);
409 case 28: out0[0] = in1[3];
418 case 29: out0[0] = amd_bytealign_S ( 0, in1[3], 1);
427 case 30: out0[0] = amd_bytealign_S ( 0, in1[3], 2);
436 case 31: out0[0] = amd_bytealign_S ( 0, in1[3], 3);
448 inline void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
452 case 0: out1[3] = in1[3];
461 case 1: out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
462 out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
463 out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
464 out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
465 out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
466 out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
467 out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
468 out0[0] = amd_bytealign_S (in0[0], 0, 3);
470 case 2: out1[3] = amd_bytealign_S (in1[3], in1[2], 2);
471 out1[2] = amd_bytealign_S (in1[2], in1[1], 2);
472 out1[1] = amd_bytealign_S (in1[1], in1[0], 2);
473 out1[0] = amd_bytealign_S (in1[0], in0[3], 2);
474 out0[3] = amd_bytealign_S (in0[3], in0[2], 2);
475 out0[2] = amd_bytealign_S (in0[2], in0[1], 2);
476 out0[1] = amd_bytealign_S (in0[1], in0[0], 2);
477 out0[0] = amd_bytealign_S (in0[0], 0, 2);
479 case 3: out1[3] = amd_bytealign_S (in1[3], in1[2], 1);
480 out1[2] = amd_bytealign_S (in1[2], in1[1], 1);
481 out1[1] = amd_bytealign_S (in1[1], in1[0], 1);
482 out1[0] = amd_bytealign_S (in1[0], in0[3], 1);
483 out0[3] = amd_bytealign_S (in0[3], in0[2], 1);
484 out0[2] = amd_bytealign_S (in0[2], in0[1], 1);
485 out0[1] = amd_bytealign_S (in0[1], in0[0], 1);
486 out0[0] = amd_bytealign_S (in0[0], 0, 1);
488 case 4: out1[3] = in1[2];
497 case 5: out1[3] = amd_bytealign_S (in1[2], in1[1], 3);
498 out1[2] = amd_bytealign_S (in1[1], in1[0], 3);
499 out1[1] = amd_bytealign_S (in1[0], in0[3], 3);
500 out1[0] = amd_bytealign_S (in0[3], in0[2], 3);
501 out0[3] = amd_bytealign_S (in0[2], in0[1], 3);
502 out0[2] = amd_bytealign_S (in0[1], in0[0], 3);
503 out0[1] = amd_bytealign_S (in0[0], 0, 3);
506 case 6: out1[3] = amd_bytealign_S (in1[2], in1[1], 2);
507 out1[2] = amd_bytealign_S (in1[1], in1[0], 2);
508 out1[1] = amd_bytealign_S (in1[0], in0[3], 2);
509 out1[0] = amd_bytealign_S (in0[3], in0[2], 2);
510 out0[3] = amd_bytealign_S (in0[2], in0[1], 2);
511 out0[2] = amd_bytealign_S (in0[1], in0[0], 2);
512 out0[1] = amd_bytealign_S (in0[0], 0, 2);
515 case 7: out1[3] = amd_bytealign_S (in1[2], in1[1], 1);
516 out1[2] = amd_bytealign_S (in1[1], in1[0], 1);
517 out1[1] = amd_bytealign_S (in1[0], in0[3], 1);
518 out1[0] = amd_bytealign_S (in0[3], in0[2], 1);
519 out0[3] = amd_bytealign_S (in0[2], in0[1], 1);
520 out0[2] = amd_bytealign_S (in0[1], in0[0], 1);
521 out0[1] = amd_bytealign_S (in0[0], 0, 1);
524 case 8: out1[3] = in1[1];
533 case 9: out1[3] = amd_bytealign_S (in1[1], in1[0], 3);
534 out1[2] = amd_bytealign_S (in1[0], in0[3], 3);
535 out1[1] = amd_bytealign_S (in0[3], in0[2], 3);
536 out1[0] = amd_bytealign_S (in0[2], in0[1], 3);
537 out0[3] = amd_bytealign_S (in0[1], in0[0], 3);
538 out0[2] = amd_bytealign_S (in0[0], 0, 3);
542 case 10: out1[3] = amd_bytealign_S (in1[1], in1[0], 2);
543 out1[2] = amd_bytealign_S (in1[0], in0[3], 2);
544 out1[1] = amd_bytealign_S (in0[3], in0[2], 2);
545 out1[0] = amd_bytealign_S (in0[2], in0[1], 2);
546 out0[3] = amd_bytealign_S (in0[1], in0[0], 2);
547 out0[2] = amd_bytealign_S (in0[0], 0, 2);
551 case 11: out1[3] = amd_bytealign_S (in1[1], in1[0], 1);
552 out1[2] = amd_bytealign_S (in1[0], in0[3], 1);
553 out1[1] = amd_bytealign_S (in0[3], in0[2], 1);
554 out1[0] = amd_bytealign_S (in0[2], in0[1], 1);
555 out0[3] = amd_bytealign_S (in0[1], in0[0], 1);
556 out0[2] = amd_bytealign_S (in0[0], 0, 1);
560 case 12: out1[3] = in1[0];
569 case 13: out1[3] = amd_bytealign_S (in1[0], in0[3], 3);
570 out1[2] = amd_bytealign_S (in0[3], in0[2], 3);
571 out1[1] = amd_bytealign_S (in0[2], in0[1], 3);
572 out1[0] = amd_bytealign_S (in0[1], in0[0], 3);
573 out0[3] = amd_bytealign_S (in0[0], 0, 3);
578 case 14: out1[3] = amd_bytealign_S (in1[0], in0[3], 2);
579 out1[2] = amd_bytealign_S (in0[3], in0[2], 2);
580 out1[1] = amd_bytealign_S (in0[2], in0[1], 2);
581 out1[0] = amd_bytealign_S (in0[1], in0[0], 2);
582 out0[3] = amd_bytealign_S (in0[0], 0, 2);
587 case 15: out1[3] = amd_bytealign_S (in1[0], in0[3], 1);
588 out1[2] = amd_bytealign_S (in0[3], in0[2], 1);
589 out1[1] = amd_bytealign_S (in0[2], in0[1], 1);
590 out1[0] = amd_bytealign_S (in0[1], in0[0], 1);
591 out0[3] = amd_bytealign_S (in0[0], 0, 1);
596 case 16: out1[3] = in0[3];
605 case 17: out1[3] = amd_bytealign_S (in0[3], in0[2], 3);
606 out1[2] = amd_bytealign_S (in0[2], in0[1], 3);
607 out1[1] = amd_bytealign_S (in0[1], in0[0], 3);
608 out1[0] = amd_bytealign_S (in0[0], 0, 3);
614 case 18: out1[3] = amd_bytealign_S (in0[3], in0[2], 2);
615 out1[2] = amd_bytealign_S (in0[2], in0[1], 2);
616 out1[1] = amd_bytealign_S (in0[1], in0[0], 2);
617 out1[0] = amd_bytealign_S (in0[0], 0, 2);
623 case 19: out1[3] = amd_bytealign_S (in0[3], in0[2], 1);
624 out1[2] = amd_bytealign_S (in0[2], in0[1], 1);
625 out1[1] = amd_bytealign_S (in0[1], in0[0], 1);
626 out1[0] = amd_bytealign_S (in0[0], 0, 1);
632 case 20: out1[3] = in0[2];
641 case 21: out1[3] = amd_bytealign_S (in0[2], in0[1], 3);
642 out1[2] = amd_bytealign_S (in0[1], in0[0], 3);
643 out1[1] = amd_bytealign_S (in0[0], 0, 3);
650 case 22: out1[3] = amd_bytealign_S (in0[2], in0[1], 2);
651 out1[2] = amd_bytealign_S (in0[1], in0[0], 2);
652 out1[1] = amd_bytealign_S (in0[0], 0, 2);
659 case 23: out1[3] = amd_bytealign_S (in0[2], in0[1], 1);
660 out1[2] = amd_bytealign_S (in0[1], in0[0], 1);
661 out1[1] = amd_bytealign_S (in0[0], 0, 1);
668 case 24: out1[3] = in0[1];
677 case 25: out1[3] = amd_bytealign_S (in0[1], in0[0], 3);
678 out1[2] = amd_bytealign_S (in0[0], 0, 3);
686 case 26: out1[3] = amd_bytealign_S (in0[1], in0[0], 2);
687 out1[2] = amd_bytealign_S (in0[0], 0, 2);
695 case 27: out1[3] = amd_bytealign_S (in0[1], in0[0], 1);
696 out1[2] = amd_bytealign_S (in0[0], 0, 1);
704 case 28: out1[3] = in0[0];
713 case 29: out1[3] = amd_bytealign_S (in0[0], 0, 3);
722 case 30: out1[3] = amd_bytealign_S (in0[0], 0, 2);
731 case 31: out1[3] = amd_bytealign_S (in0[0], 0, 1);
743 inline void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
749 case 0: tmp[0] = src_r0;
752 case 1: tmp[0] = src_r0 << 8;
753 tmp[1] = src_r0 >> 24;
755 case 2: tmp[0] = src_r0 << 16;
756 tmp[1] = src_r0 >> 16;
758 case 3: tmp[0] = src_r0 << 24;
759 tmp[1] = src_r0 >> 8;
765 case 0: dst0[0] |= tmp[0];
768 case 1: dst0[1] |= tmp[0];
771 case 2: dst0[2] |= tmp[0];
774 case 3: dst0[3] |= tmp[0];
777 case 4: dst1[0] |= tmp[0];
780 case 5: dst1[1] |= tmp[0];
783 case 6: dst1[2] |= tmp[0];
786 case 7: dst1[3] |= tmp[0];
791 inline void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
796 dst1[3] = src_l1[3] | src_r0[0] << 24;
799 dst1[3] = src_l1[3] | src_r0[0] << 16;
802 dst1[3] = src_l1[3] | src_r0[0] << 8;
808 dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
809 dst1[2] = src_l1[2] | src_r0[0] << 24;
812 dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
813 dst1[2] = src_l1[2] | src_r0[0] << 16;
816 dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
817 dst1[2] = src_l1[2] | src_r0[0] << 8;
824 dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
825 dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
826 dst1[1] = src_l1[1] | src_r0[0] << 24;
829 dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
830 dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
831 dst1[1] = src_l1[1] | src_r0[0] << 16;
834 dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
835 dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
836 dst1[1] = src_l1[1] | src_r0[0] << 8;
844 dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
845 dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
846 dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
847 dst1[0] = src_l1[0] | src_r0[0] << 24;
850 dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
851 dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
852 dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
853 dst1[0] = src_l1[0] | src_r0[0] << 16;
856 dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
857 dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
858 dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
859 dst1[0] = src_l1[0] | src_r0[0] << 8;
868 dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
869 dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
870 dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
871 dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
872 dst0[3] = src_l0[3] | src_r0[0] << 24;
875 dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
876 dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
877 dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
878 dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
879 dst0[3] = src_l0[3] | src_r0[0] << 16;
882 dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
883 dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
884 dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
885 dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
886 dst0[3] = src_l0[3] | src_r0[0] << 8;
896 dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
897 dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
898 dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
899 dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
900 dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
901 dst0[2] = src_l0[2] | src_r0[0] << 24;
904 dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
905 dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
906 dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
907 dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
908 dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
909 dst0[2] = src_l0[2] | src_r0[0] << 16;
912 dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
913 dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
914 dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
915 dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
916 dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
917 dst0[2] = src_l0[2] | src_r0[0] << 8;
928 dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
929 dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
930 dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
931 dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
932 dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
933 dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
934 dst0[1] = src_l0[1] | src_r0[0] << 24;
937 dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
938 dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
939 dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
940 dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
941 dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
942 dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
943 dst0[1] = src_l0[1] | src_r0[0] << 16;
946 dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
947 dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
948 dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
949 dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
950 dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
951 dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
952 dst0[1] = src_l0[1] | src_r0[0] << 8;
964 dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 1);
965 dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
966 dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
967 dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
968 dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
969 dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
970 dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
971 dst0[0] = src_l0[0] | src_r0[0] << 24;
974 dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 2);
975 dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
976 dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
977 dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
978 dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
979 dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
980 dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
981 dst0[0] = src_l0[0] | src_r0[0] << 16;
984 dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 3);
985 dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
986 dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
987 dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
988 dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
989 dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
990 dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
991 dst0[0] = src_l0[0] | src_r0[0] << 8;
1000 dst0[1] = src_r0[1];
1001 dst0[0] = src_r0[0];
1006 inline void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
1008 rshift_block_N (in0, in1, out0, out1, 32 - len);
1022 out0[0] = swap32_S (tib40[0]);
1023 out0[1] = swap32_S (tib40[1]);
1024 out0[2] = swap32_S (tib40[2]);
1025 out0[3] = swap32_S (tib40[3]);
1026 out1[0] = swap32_S (tib41[0]);
1027 out1[1] = swap32_S (tib41[1]);
1028 out1[2] = swap32_S (tib41[2]);
1029 out1[3] = swap32_S (tib41[3]);
1032 inline u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1034 buf0[0] |= (generate_cmask (buf0[0]));
1035 buf0[1] |= (generate_cmask (buf0[1]));
1036 buf0[2] |= (generate_cmask (buf0[2]));
1037 buf0[3] |= (generate_cmask (buf0[3]));
1038 buf1[0] |= (generate_cmask (buf1[0]));
1039 buf1[1] |= (generate_cmask (buf1[1]));
1040 buf1[2] |= (generate_cmask (buf1[2]));
1041 buf1[3] |= (generate_cmask (buf1[3]));
1046 inline u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1048 buf0[0] &= ~(generate_cmask (buf0[0]));
1049 buf0[1] &= ~(generate_cmask (buf0[1]));
1050 buf0[2] &= ~(generate_cmask (buf0[2]));
1051 buf0[3] &= ~(generate_cmask (buf0[3]));
1052 buf1[0] &= ~(generate_cmask (buf1[0]));
1053 buf1[1] &= ~(generate_cmask (buf1[1]));
1054 buf1[2] &= ~(generate_cmask (buf1[2]));
1055 buf1[3] &= ~(generate_cmask (buf1[3]));
1060 inline u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1062 rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len);
1064 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
1069 inline u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1071 rule_op_mangle_urest (p0, p1, buf0, buf1, in_len);
1073 buf0[0] |= (0x00000020 & generate_cmask (buf0[0]));
1078 inline u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1080 buf0[0] ^= (generate_cmask (buf0[0]));
1081 buf0[1] ^= (generate_cmask (buf0[1]));
1082 buf0[2] ^= (generate_cmask (buf0[2]));
1083 buf0[3] ^= (generate_cmask (buf0[3]));
1084 buf1[0] ^= (generate_cmask (buf1[0]));
1085 buf1[1] ^= (generate_cmask (buf1[1]));
1086 buf1[2] ^= (generate_cmask (buf1[2]));
1087 buf1[3] ^= (generate_cmask (buf1[3]));
1092 inline u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1094 if (p0 >= in_len) return (in_len);
1096 const u32 tmp = 0x20u << ((p0 & 3) * 8);
1100 case 0: buf0[0] ^= (tmp & generate_cmask (buf0[0])); break;
1101 case 1: buf0[1] ^= (tmp & generate_cmask (buf0[1])); break;
1102 case 2: buf0[2] ^= (tmp & generate_cmask (buf0[2])); break;
1103 case 3: buf0[3] ^= (tmp & generate_cmask (buf0[3])); break;
1104 case 4: buf1[0] ^= (tmp & generate_cmask (buf1[0])); break;
1105 case 5: buf1[1] ^= (tmp & generate_cmask (buf1[1])); break;
1106 case 6: buf1[2] ^= (tmp & generate_cmask (buf1[2])); break;
1107 case 7: buf1[3] ^= (tmp & generate_cmask (buf1[3])); break;
1113 inline u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1115 reverse_block (buf0, buf1, buf0, buf1, in_len);
1120 inline u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1122 if ((in_len + in_len) >= 32) return (in_len);
1124 u32 out_len = in_len;
1126 append_block8 (out_len, buf0, buf1, buf0, buf1, buf0, buf1);
1133 inline u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1135 if (((in_len * p0) + in_len) >= 32) return (in_len);
1137 u32 out_len = in_len;
1151 for (u32 i = 0; i < p0; i++)
1153 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
1161 inline u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1163 if ((in_len + in_len) >= 32) return (in_len);
1165 u32 out_len = in_len;
1170 reverse_block (buf0, buf1, tib40, tib41, out_len);
1172 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
1179 inline u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1181 if ((in_len + 1) >= 32) return (in_len);
1183 u32 out_len = in_len;
1185 append_block1 (out_len, buf0, buf1, p0);
1192 inline u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1194 if ((in_len + 1) >= 32) return (in_len);
1196 u32 out_len = in_len;
1198 rshift_block (buf0, buf1, buf0, buf1);
1200 buf0[0] = buf0[0] | p0;
1207 inline u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1209 if (in_len == 0) return (in_len);
1211 const u32 in_len1 = in_len - 1;
1213 const u32 sh = (in_len1 & 3) * 8;
1215 const u32 tmp = (buf0[0] & 0xff) << sh;
1217 lshift_block (buf0, buf1, buf0, buf1);
1219 switch (in_len1 / 4)
1221 case 0: buf0[0] |= tmp; break;
1222 case 1: buf0[1] |= tmp; break;
1223 case 2: buf0[2] |= tmp; break;
1224 case 3: buf0[3] |= tmp; break;
1225 case 4: buf1[0] |= tmp; break;
1226 case 5: buf1[1] |= tmp; break;
1227 case 6: buf1[2] |= tmp; break;
1228 case 7: buf1[3] |= tmp; break;
1234 inline u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1236 if (in_len == 0) return (in_len);
1238 const u32 in_len1 = in_len - 1;
1240 const u32 sh = (in_len1 & 3) * 8;
1244 switch (in_len1 / 4)
1246 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
1247 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
1248 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
1249 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
1250 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
1251 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
1252 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
1253 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
1256 rshift_block (buf0, buf1, buf0, buf1);
1260 truncate_right (buf0, buf1, in_len);
1265 inline u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1267 if (in_len == 0) return (in_len);
1269 const u32 in_len1 = in_len - 1;
1271 lshift_block (buf0, buf1, buf0, buf1);
1276 inline u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1278 if (in_len == 0) return (in_len);
1280 const u32 in_len1 = in_len - 1;
1282 const u32 tmp = (1 << ((in_len1 & 3) * 8)) - 1;
1284 switch (in_len1 / 4)
1286 case 0: buf0[0] &= tmp; break;
1287 case 1: buf0[1] &= tmp; break;
1288 case 2: buf0[2] &= tmp; break;
1289 case 3: buf0[3] &= tmp; break;
1290 case 4: buf1[0] &= tmp; break;
1291 case 5: buf1[1] &= tmp; break;
1292 case 6: buf1[2] &= tmp; break;
1293 case 7: buf1[3] &= tmp; break;
1299 inline u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1301 if (p0 >= in_len) return (in_len);
1303 u32 out_len = in_len;
1308 lshift_block (buf0, buf1, tib40, tib41);
1310 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
1315 case 0: buf0[0] = (buf0[0] & ml)
1325 case 1: buf0[1] = (buf0[1] & ml)
1334 case 2: buf0[2] = (buf0[2] & ml)
1342 case 3: buf0[3] = (buf0[3] & ml)
1349 case 4: buf1[0] = (buf1[0] & ml)
1355 case 5: buf1[1] = (buf1[1] & ml)
1360 case 6: buf1[2] = (buf1[2] & ml)
1364 case 7: buf1[3] = (buf1[3] & ml)
1374 inline u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1376 if (p0 >= in_len) return (in_len);
1378 if ((p0 + p1) > in_len) return (in_len);
1382 lshift_block_N (buf0, buf1, buf0, buf1, p0);
1384 truncate_right (buf0, buf1, out_len);
1389 inline u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1391 if (p0 >= in_len) return (in_len);
1393 if ((p0 + p1) > in_len) return (in_len);
1395 u32 out_len = in_len;
1409 lshift_block_N (buf0, buf1, tib40, tib41, p1);
1411 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
1416 case 0: buf0[0] = (buf0[0] & ml)
1426 case 1: buf0[1] = (buf0[1] & ml)
1435 case 2: buf0[2] = (buf0[2] & ml)
1443 case 3: buf0[3] = (buf0[3] & ml)
1450 case 4: buf1[0] = (buf1[0] & ml)
1456 case 5: buf1[1] = (buf1[1] & ml)
1461 case 6: buf1[2] = (buf1[2] & ml)
1465 case 7: buf1[3] = (buf1[3] & ml)
1475 inline u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1477 if (p0 > in_len) return (in_len);
1479 if ((in_len + 1) >= 32) return (in_len);
1481 u32 out_len = in_len;
1486 rshift_block (buf0, buf1, tib40, tib41);
1488 const u32 p1n = p1 << ((p0 & 3) * 8);
1490 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
1492 const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
1496 case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr);
1505 case 1: buf0[1] = (buf0[1] & ml) | p1n | (tib40[1] & mr);
1513 case 2: buf0[2] = (buf0[2] & ml) | p1n | (tib40[2] & mr);
1520 case 3: buf0[3] = (buf0[3] & ml) | p1n | (tib40[3] & mr);
1526 case 4: buf1[0] = (buf1[0] & ml) | p1n | (tib41[0] & mr);
1531 case 5: buf1[1] = (buf1[1] & ml) | p1n | (tib41[1] & mr);
1535 case 6: buf1[2] = (buf1[2] & ml) | p1n | (tib41[2] & mr);
1538 case 7: buf1[3] = (buf1[3] & ml) | p1n | (tib41[3] & mr);
1547 inline u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1549 if (p0 >= in_len) return (in_len);
1551 const u32 p1n = p1 << ((p0 & 3) * 8);
1553 const u32 m = ~(0xffu << ((p0 & 3) * 8));
1557 case 0: buf0[0] = (buf0[0] & m) | p1n; break;
1558 case 1: buf0[1] = (buf0[1] & m) | p1n; break;
1559 case 2: buf0[2] = (buf0[2] & m) | p1n; break;
1560 case 3: buf0[3] = (buf0[3] & m) | p1n; break;
1561 case 4: buf1[0] = (buf1[0] & m) | p1n; break;
1562 case 5: buf1[1] = (buf1[1] & m) | p1n; break;
1563 case 6: buf1[2] = (buf1[2] & m) | p1n; break;
1564 case 7: buf1[3] = (buf1[3] & m) | p1n; break;
1570 inline u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1572 if (p0 >= in_len) return (in_len);
1574 truncate_right (buf0, buf1, p0);
1579 inline u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1581 const uchar4 tmp0 = (uchar4) (p0);
1582 const uchar4 tmp1 = (uchar4) (p1);
1586 tmp = as_uchar4 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint (tmp);
1587 tmp = as_uchar4 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint (tmp);
1588 tmp = as_uchar4 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint (tmp);
1589 tmp = as_uchar4 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint (tmp);
1590 tmp = as_uchar4 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint (tmp);
1591 tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp);
1592 tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp);
1593 tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp);
1598 inline u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1604 buf_in[0] = buf0[0];
1605 buf_in[1] = buf0[1];
1606 buf_in[2] = buf0[2];
1607 buf_in[3] = buf0[3];
1608 buf_in[4] = buf1[0];
1609 buf_in[5] = buf1[1];
1610 buf_in[6] = buf1[2];
1611 buf_in[7] = buf1[3];
1613 u32 buf_out[8] = { 0 };
1615 u8 *in = (u8 *) buf_in;
1616 u8 *out = (u8 *) buf_out;
1618 for (u32 pos = 0; pos < in_len; pos++)
1620 if (in[pos] == (u8) p0) continue;
1622 out[out_len] = in[pos];
1627 buf0[0] = buf_out[0];
1628 buf0[1] = buf_out[1];
1629 buf0[2] = buf_out[2];
1630 buf0[3] = buf_out[3];
1631 buf1[0] = buf_out[4];
1632 buf1[1] = buf_out[5];
1633 buf1[2] = buf_out[6];
1634 buf1[3] = buf_out[7];
1639 inline u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1645 inline u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1647 if ( in_len == 0) return (in_len);
1648 if ((in_len + p0) >= 32) return (in_len);
1650 u32 out_len = in_len;
1652 const u32 tmp = buf0[0] & 0xFF;
1654 rshift_block_N (buf0, buf1, buf0, buf1, p0);
1658 case 1: buf0[0] |= tmp << 0;
1660 case 2: buf0[0] |= tmp << 0 | tmp << 8;
1662 case 3: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16;
1664 case 4: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1666 case 5: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1667 buf0[1] |= tmp << 0;
1669 case 6: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1670 buf0[1] |= tmp << 0 | tmp << 8;
1672 case 7: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1673 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16;
1675 case 8: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1676 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1678 case 9: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1679 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1680 buf0[2] |= tmp << 0;
1682 case 10: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1683 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1684 buf0[2] |= tmp << 0 | tmp << 8;
1686 case 11: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1687 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1688 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16;
1690 case 12: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1691 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1692 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1694 case 13: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1695 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1696 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1697 buf0[3] |= tmp << 0;
1699 case 14: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1700 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1701 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1702 buf0[3] |= tmp << 0 | tmp << 8;
1704 case 15: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1705 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1706 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1707 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16;
1709 case 16: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1710 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1711 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1712 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1714 case 17: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1715 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1716 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1717 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1718 buf1[0] |= tmp << 0;
1720 case 18: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1721 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1722 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1723 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1724 buf1[0] |= tmp << 0 | tmp << 8;
1726 case 19: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1727 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1728 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1729 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1730 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16;
1732 case 20: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1733 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1734 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1735 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1736 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1738 case 21: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1739 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1740 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1741 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1742 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1743 buf1[1] |= tmp << 0;
1745 case 22: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1746 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1747 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1748 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1749 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1750 buf1[1] |= tmp << 0 | tmp << 8;
1752 case 23: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1753 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1754 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1755 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1756 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1757 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16;
1759 case 24: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1760 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1761 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1762 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1763 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1764 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1766 case 25: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1767 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1768 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1769 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1770 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1771 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1772 buf1[2] |= tmp << 0;
1774 case 26: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1775 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1776 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1777 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1778 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1779 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1780 buf1[2] |= tmp << 0 | tmp << 8;
1782 case 27: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1783 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1784 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1785 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1786 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1787 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1788 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16;
1790 case 28: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1791 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1792 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1793 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1794 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1795 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1796 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1798 case 29: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1799 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1800 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1801 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1802 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1803 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1804 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1805 buf1[3] |= tmp << 0;
1807 case 30: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1808 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1809 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1810 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1811 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1812 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1813 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1814 buf1[3] |= tmp << 0 | tmp << 8;
1816 case 31: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1817 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1818 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1819 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1820 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1821 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1822 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1823 buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16;
1832 inline u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1834 if ( in_len == 0) return (in_len);
1835 if ((in_len + p0) >= 32) return (in_len);
1837 const u32 in_len1 = in_len - 1;
1839 const u32 sh = (in_len1 & 3) * 8;
1843 switch (in_len1 / 4)
1845 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
1846 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
1847 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
1848 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
1849 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
1850 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
1851 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
1852 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
1855 u32 out_len = in_len;
1857 for (u32 i = 0; i < p0; i++)
1859 append_block1 (out_len, buf0, buf1, tmp);
1867 inline u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1869 if ( in_len == 0) return (in_len);
1870 if ((in_len + in_len) >= 32) return (in_len);
1872 u32 out_len = in_len;
1877 tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8);
1878 tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8);
1879 tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8);
1880 tib40[3] = ((buf0[1] & 0x00FF0000) >> 16) | ((buf0[1] & 0xFF000000) >> 8);
1881 tib41[0] = ((buf0[2] & 0x000000FF) << 0) | ((buf0[2] & 0x0000FF00) << 8);
1882 tib41[1] = ((buf0[2] & 0x00FF0000) >> 16) | ((buf0[2] & 0xFF000000) >> 8);
1883 tib41[2] = ((buf0[3] & 0x000000FF) << 0) | ((buf0[3] & 0x0000FF00) << 8);
1884 tib41[3] = ((buf0[3] & 0x00FF0000) >> 16) | ((buf0[3] & 0xFF000000) >> 8);
1886 buf0[0] = tib40[0] | (tib40[0] << 8);
1887 buf0[1] = tib40[1] | (tib40[1] << 8);
1888 buf0[2] = tib40[2] | (tib40[2] << 8);
1889 buf0[3] = tib40[3] | (tib40[3] << 8);
1890 buf1[0] = tib41[0] | (tib41[0] << 8);
1891 buf1[1] = tib41[1] | (tib41[1] << 8);
1892 buf1[2] = tib41[2] | (tib41[2] << 8);
1893 buf1[3] = tib41[3] | (tib41[3] << 8);
1895 out_len = out_len + out_len;
1900 inline u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1902 if (in_len < 2) return (in_len);
1904 buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
1909 inline u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1911 if (in_len < 2) return (in_len);
1915 case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
1917 case 3: buf0[0] = (buf0[0] & 0x000000FF) | ((buf0[0] << 8) & 0x00FF0000) | ((buf0[0] >> 8) & 0x0000FF00);
1919 case 4: buf0[0] = (buf0[0] & 0x0000FFFF) | ((buf0[0] << 8) & 0xFF000000) | ((buf0[0] >> 8) & 0x00FF0000);
1921 case 5: buf0[1] = (buf0[0] & 0xFF000000) | buf0[1];
1922 buf0[0] = (buf0[0] & 0x00FFFFFF) | (buf0[1] << 24);
1923 buf0[1] = (buf0[1] >> 24);
1925 case 6: buf0[1] = ((buf0[1] << 8) & 0x0000FF00) | ((buf0[1] >> 8) & 0x000000FF);
1927 case 7: buf0[1] = (buf0[1] & 0x000000FF) | ((buf0[1] << 8) & 0x00FF0000) | ((buf0[1] >> 8) & 0x0000FF00);
1929 case 8: buf0[1] = (buf0[1] & 0x0000FFFF) | ((buf0[1] << 8) & 0xFF000000) | ((buf0[1] >> 8) & 0x00FF0000);
1931 case 9: buf0[2] = (buf0[1] & 0xFF000000) | buf0[2];
1932 buf0[1] = (buf0[1] & 0x00FFFFFF) | (buf0[2] << 24);
1933 buf0[2] = (buf0[2] >> 24);
1935 case 10: buf0[2] = ((buf0[2] << 8) & 0x0000FF00) | ((buf0[2] >> 8) & 0x000000FF);
1937 case 11: buf0[2] = (buf0[2] & 0x000000FF) | ((buf0[2] << 8) & 0x00FF0000) | ((buf0[2] >> 8) & 0x0000FF00);
1939 case 12: buf0[2] = (buf0[2] & 0x0000FFFF) | ((buf0[2] << 8) & 0xFF000000) | ((buf0[2] >> 8) & 0x00FF0000);
1941 case 13: buf0[3] = (buf0[2] & 0xFF000000) | buf0[3];
1942 buf0[2] = (buf0[2] & 0x00FFFFFF) | (buf0[3] << 24);
1943 buf0[3] = (buf0[3] >> 24);
1945 case 14: buf0[3] = ((buf0[3] << 8) & 0x0000FF00) | ((buf0[3] >> 8) & 0x000000FF);
1947 case 15: buf0[3] = (buf0[3] & 0x000000FF) | ((buf0[3] << 8) & 0x00FF0000) | ((buf0[3] >> 8) & 0x0000FF00);
1949 case 16: buf0[3] = (buf0[3] & 0x0000FFFF) | ((buf0[3] << 8) & 0xFF000000) | ((buf0[3] >> 8) & 0x00FF0000);
1951 case 17: buf1[0] = (buf0[3] & 0xFF000000) | buf1[0];
1952 buf0[3] = (buf0[3] & 0x00FFFFFF) | (buf1[0] << 24);
1953 buf1[0] = (buf1[0] >> 24);
1955 case 18: buf1[0] = ((buf1[0] << 8) & 0x0000FF00) | ((buf1[0] >> 8) & 0x000000FF);
1957 case 19: buf1[0] = (buf1[0] & 0x000000FF) | ((buf1[0] << 8) & 0x00FF0000) | ((buf1[0] >> 8) & 0x0000FF00);
1959 case 20: buf1[0] = (buf1[0] & 0x0000FFFF) | ((buf1[0] << 8) & 0xFF000000) | ((buf1[0] >> 8) & 0x00FF0000);
1961 case 21: buf1[1] = (buf1[0] & 0xFF000000) | buf1[1];
1962 buf1[0] = (buf1[0] & 0x00FFFFFF) | (buf1[1] << 24);
1963 buf1[1] = (buf1[1] >> 24);
1965 case 22: buf1[1] = ((buf1[1] << 8) & 0x0000FF00) | ((buf1[1] >> 8) & 0x000000FF);
1967 case 23: buf1[1] = (buf1[1] & 0x000000FF) | ((buf1[1] << 8) & 0x00FF0000) | ((buf1[1] >> 8) & 0x0000FF00);
1969 case 24: buf1[1] = (buf1[1] & 0x0000FFFF) | ((buf1[1] << 8) & 0xFF000000) | ((buf1[1] >> 8) & 0x00FF0000);
1971 case 25: buf1[2] = (buf1[1] & 0xFF000000) | buf1[2];
1972 buf1[1] = (buf1[1] & 0x00FFFFFF) | (buf1[2] << 24);
1973 buf1[2] = (buf1[2] >> 24);
1975 case 26: buf1[2] = ((buf1[2] << 8) & 0x0000FF00) | ((buf1[2] >> 8) & 0x000000FF);
1977 case 27: buf1[2] = (buf1[2] & 0x000000FF) | ((buf1[2] << 8) & 0x00FF0000) | ((buf1[2] >> 8) & 0x0000FF00);
1979 case 28: buf1[2] = (buf1[2] & 0x0000FFFF) | ((buf1[2] << 8) & 0xFF000000) | ((buf1[2] >> 8) & 0x00FF0000);
1981 case 29: buf1[3] = (buf1[2] & 0xFF000000) | buf1[3];
1982 buf1[2] = (buf1[2] & 0x00FFFFFF) | (buf1[3] << 24);
1983 buf1[3] = (buf1[3] >> 24);
1985 case 30: buf1[3] = ((buf1[3] << 8) & 0x0000FF00) | ((buf1[3] >> 8) & 0x000000FF);
1987 case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00);
1994 inline u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1996 if (p0 >= in_len) return (in_len);
1997 if (p1 >= in_len) return (in_len);
2004 case 0: tmp0 = (buf0[0] >> 0) & 0xFF;
2006 case 1: tmp0 = (buf0[0] >> 8) & 0xFF;
2008 case 2: tmp0 = (buf0[0] >> 16) & 0xFF;
2010 case 3: tmp0 = (buf0[0] >> 24) & 0xFF;
2012 case 4: tmp0 = (buf0[1] >> 0) & 0xFF;
2014 case 5: tmp0 = (buf0[1] >> 8) & 0xFF;
2016 case 6: tmp0 = (buf0[1] >> 16) & 0xFF;
2018 case 7: tmp0 = (buf0[1] >> 24) & 0xFF;
2020 case 8: tmp0 = (buf0[2] >> 0) & 0xFF;
2022 case 9: tmp0 = (buf0[2] >> 8) & 0xFF;
2024 case 10: tmp0 = (buf0[2] >> 16) & 0xFF;
2026 case 11: tmp0 = (buf0[2] >> 24) & 0xFF;
2028 case 12: tmp0 = (buf0[3] >> 0) & 0xFF;
2030 case 13: tmp0 = (buf0[3] >> 8) & 0xFF;
2032 case 14: tmp0 = (buf0[3] >> 16) & 0xFF;
2034 case 15: tmp0 = (buf0[3] >> 24) & 0xFF;
2036 case 16: tmp0 = (buf1[0] >> 0) & 0xFF;
2038 case 17: tmp0 = (buf1[0] >> 8) & 0xFF;
2040 case 18: tmp0 = (buf1[0] >> 16) & 0xFF;
2042 case 19: tmp0 = (buf1[0] >> 24) & 0xFF;
2044 case 20: tmp0 = (buf1[1] >> 0) & 0xFF;
2046 case 21: tmp0 = (buf1[1] >> 8) & 0xFF;
2048 case 22: tmp0 = (buf1[1] >> 16) & 0xFF;
2050 case 23: tmp0 = (buf1[1] >> 24) & 0xFF;
2052 case 24: tmp0 = (buf1[2] >> 0) & 0xFF;
2054 case 25: tmp0 = (buf1[2] >> 8) & 0xFF;
2056 case 26: tmp0 = (buf1[2] >> 16) & 0xFF;
2058 case 27: tmp0 = (buf1[2] >> 24) & 0xFF;
2060 case 28: tmp0 = (buf1[3] >> 0) & 0xFF;
2062 case 29: tmp0 = (buf1[3] >> 8) & 0xFF;
2064 case 30: tmp0 = (buf1[3] >> 16) & 0xFF;
2066 case 31: tmp0 = (buf1[3] >> 24) & 0xFF;
2072 case 0: tmp1 = (buf0[0] >> 0) & 0xff;
2073 buf0[0] = (buf0[0] & 0xffffff00) | tmp0 << 0;
2075 case 1: tmp1 = (buf0[0] >> 8) & 0xff;
2076 buf0[0] = (buf0[0] & 0xffff00ff) | tmp0 << 8;
2078 case 2: tmp1 = (buf0[0] >> 16) & 0xff;
2079 buf0[0] = (buf0[0] & 0xff00ffff) | tmp0 << 16;
2081 case 3: tmp1 = (buf0[0] >> 24) & 0xff;
2082 buf0[0] = (buf0[0] & 0x00ffffff) | tmp0 << 24;
2084 case 4: tmp1 = (buf0[1] >> 0) & 0xff;
2085 buf0[1] = (buf0[1] & 0xffffff00) | tmp0 << 0;
2087 case 5: tmp1 = (buf0[1] >> 8) & 0xff;
2088 buf0[1] = (buf0[1] & 0xffff00ff) | tmp0 << 8;
2090 case 6: tmp1 = (buf0[1] >> 16) & 0xff;
2091 buf0[1] = (buf0[1] & 0xff00ffff) | tmp0 << 16;
2093 case 7: tmp1 = (buf0[1] >> 24) & 0xff;
2094 buf0[1] = (buf0[1] & 0x00ffffff) | tmp0 << 24;
2096 case 8: tmp1 = (buf0[2] >> 0) & 0xff;
2097 buf0[2] = (buf0[2] & 0xffffff00) | tmp0 << 0;
2099 case 9: tmp1 = (buf0[2] >> 8) & 0xff;
2100 buf0[2] = (buf0[2] & 0xffff00ff) | tmp0 << 8;
2102 case 10: tmp1 = (buf0[2] >> 16) & 0xff;
2103 buf0[2] = (buf0[2] & 0xff00ffff) | tmp0 << 16;
2105 case 11: tmp1 = (buf0[2] >> 24) & 0xff;
2106 buf0[2] = (buf0[2] & 0x00ffffff) | tmp0 << 24;
2108 case 12: tmp1 = (buf0[3] >> 0) & 0xff;
2109 buf0[3] = (buf0[3] & 0xffffff00) | tmp0 << 0;
2111 case 13: tmp1 = (buf0[3] >> 8) & 0xff;
2112 buf0[3] = (buf0[3] & 0xffff00ff) | tmp0 << 8;
2114 case 14: tmp1 = (buf0[3] >> 16) & 0xff;
2115 buf0[3] = (buf0[3] & 0xff00ffff) | tmp0 << 16;
2117 case 15: tmp1 = (buf0[3] >> 24) & 0xff;
2118 buf0[3] = (buf0[3] & 0x00ffffff) | tmp0 << 24;
2120 case 16: tmp1 = (buf1[0] >> 0) & 0xff;
2121 buf1[0] = (buf1[0] & 0xffffff00) | tmp0 << 0;
2123 case 17: tmp1 = (buf1[0] >> 8) & 0xff;
2124 buf1[0] = (buf1[0] & 0xffff00ff) | tmp0 << 8;
2126 case 18: tmp1 = (buf1[0] >> 16) & 0xff;
2127 buf1[0] = (buf1[0] & 0xff00ffff) | tmp0 << 16;
2129 case 19: tmp1 = (buf1[0] >> 24) & 0xff;
2130 buf1[0] = (buf1[0] & 0x00ffffff) | tmp0 << 24;
2132 case 20: tmp1 = (buf1[1] >> 0) & 0xff;
2133 buf1[1] = (buf1[1] & 0xffffff00) | tmp0 << 0;
2135 case 21: tmp1 = (buf1[1] >> 8) & 0xff;
2136 buf1[1] = (buf1[1] & 0xffff00ff) | tmp0 << 8;
2138 case 22: tmp1 = (buf1[1] >> 16) & 0xff;
2139 buf1[1] = (buf1[1] & 0xff00ffff) | tmp0 << 16;
2141 case 23: tmp1 = (buf1[1] >> 24) & 0xff;
2142 buf1[1] = (buf1[1] & 0x00ffffff) | tmp0 << 24;
2144 case 24: tmp1 = (buf1[2] >> 0) & 0xff;
2145 buf1[2] = (buf1[2] & 0xffffff00) | tmp0 << 0;
2147 case 25: tmp1 = (buf1[2] >> 8) & 0xff;
2148 buf1[2] = (buf1[2] & 0xffff00ff) | tmp0 << 8;
2150 case 26: tmp1 = (buf1[2] >> 16) & 0xff;
2151 buf1[2] = (buf1[2] & 0xff00ffff) | tmp0 << 16;
2153 case 27: tmp1 = (buf1[2] >> 24) & 0xff;
2154 buf1[2] = (buf1[2] & 0x00ffffff) | tmp0 << 24;
2156 case 28: tmp1 = (buf1[3] >> 0) & 0xff;
2157 buf1[3] = (buf1[3] & 0xffffff00) | tmp0 << 0;
2159 case 29: tmp1 = (buf1[3] >> 8) & 0xff;
2160 buf1[3] = (buf1[3] & 0xffff00ff) | tmp0 << 8;
2162 case 30: tmp1 = (buf1[3] >> 16) & 0xff;
2163 buf1[3] = (buf1[3] & 0xff00ffff) | tmp0 << 16;
2165 case 31: tmp1 = (buf1[3] >> 24) & 0xff;
2166 buf1[3] = (buf1[3] & 0x00ffffff) | tmp0 << 24;
2172 case 0: buf0[0] = (buf0[0] & 0xffffff00) | tmp1 << 0;
2174 case 1: buf0[0] = (buf0[0] & 0xffff00ff) | tmp1 << 8;
2176 case 2: buf0[0] = (buf0[0] & 0xff00ffff) | tmp1 << 16;
2178 case 3: buf0[0] = (buf0[0] & 0x00ffffff) | tmp1 << 24;
2180 case 4: buf0[1] = (buf0[1] & 0xffffff00) | tmp1 << 0;
2182 case 5: buf0[1] = (buf0[1] & 0xffff00ff) | tmp1 << 8;
2184 case 6: buf0[1] = (buf0[1] & 0xff00ffff) | tmp1 << 16;
2186 case 7: buf0[1] = (buf0[1] & 0x00ffffff) | tmp1 << 24;
2188 case 8: buf0[2] = (buf0[2] & 0xffffff00) | tmp1 << 0;
2190 case 9: buf0[2] = (buf0[2] & 0xffff00ff) | tmp1 << 8;
2192 case 10: buf0[2] = (buf0[2] & 0xff00ffff) | tmp1 << 16;
2194 case 11: buf0[2] = (buf0[2] & 0x00ffffff) | tmp1 << 24;
2196 case 12: buf0[3] = (buf0[3] & 0xffffff00) | tmp1 << 0;
2198 case 13: buf0[3] = (buf0[3] & 0xffff00ff) | tmp1 << 8;
2200 case 14: buf0[3] = (buf0[3] & 0xff00ffff) | tmp1 << 16;
2202 case 15: buf0[3] = (buf0[3] & 0x00ffffff) | tmp1 << 24;
2204 case 16: buf1[0] = (buf1[0] & 0xffffff00) | tmp1 << 0;
2206 case 17: buf1[0] = (buf1[0] & 0xffff00ff) | tmp1 << 8;
2208 case 18: buf1[0] = (buf1[0] & 0xff00ffff) | tmp1 << 16;
2210 case 19: buf1[0] = (buf1[0] & 0x00ffffff) | tmp1 << 24;
2212 case 20: buf1[1] = (buf1[1] & 0xffffff00) | tmp1 << 0;
2214 case 21: buf1[1] = (buf1[1] & 0xffff00ff) | tmp1 << 8;
2216 case 22: buf1[1] = (buf1[1] & 0xff00ffff) | tmp1 << 16;
2218 case 23: buf1[1] = (buf1[1] & 0x00ffffff) | tmp1 << 24;
2220 case 24: buf1[2] = (buf1[2] & 0xffffff00) | tmp1 << 0;
2222 case 25: buf1[2] = (buf1[2] & 0xffff00ff) | tmp1 << 8;
2224 case 26: buf1[2] = (buf1[2] & 0xff00ffff) | tmp1 << 16;
2226 case 27: buf1[2] = (buf1[2] & 0x00ffffff) | tmp1 << 24;
2228 case 28: buf1[3] = (buf1[3] & 0xffffff00) | tmp1 << 0;
2230 case 29: buf1[3] = (buf1[3] & 0xffff00ff) | tmp1 << 8;
2232 case 30: buf1[3] = (buf1[3] & 0xff00ffff) | tmp1 << 16;
2234 case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24;
2241 inline u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2243 if (p0 >= in_len) return (in_len);
2245 const u32 mr = 0xffu << ((p0 & 3) * 8);
2250 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) << 1) & mr); break;
2251 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) << 1) & mr); break;
2252 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) << 1) & mr); break;
2253 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) << 1) & mr); break;
2254 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) << 1) & mr); break;
2255 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) << 1) & mr); break;
2256 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) << 1) & mr); break;
2257 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) << 1) & mr); break;
2263 inline u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2265 if (p0 >= in_len) return (in_len);
2267 const u32 mr = 0xffu << ((p0 & 3) * 8);
2272 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) >> 1) & mr); break;
2273 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) >> 1) & mr); break;
2274 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) >> 1) & mr); break;
2275 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) >> 1) & mr); break;
2276 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) >> 1) & mr); break;
2277 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) >> 1) & mr); break;
2278 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) >> 1) & mr); break;
2279 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) >> 1) & mr); break;
2285 inline u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2287 if (p0 >= in_len) return (in_len);
2289 const u32 mr = 0xffu << ((p0 & 3) * 8);
2292 const u32 n = 0x01010101 & mr;
2296 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) + n) & mr); break;
2297 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) + n) & mr); break;
2298 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) + n) & mr); break;
2299 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) + n) & mr); break;
2300 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) + n) & mr); break;
2301 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) + n) & mr); break;
2302 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) + n) & mr); break;
2303 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) + n) & mr); break;
2309 inline u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2311 if (p0 >= in_len) return (in_len);
2313 const u32 mr = 0xffu << ((p0 & 3) * 8);
2316 const u32 n = 0x01010101 & mr;
2320 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) - n) & mr); break;
2321 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) - n) & mr); break;
2322 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) - n) & mr); break;
2323 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) - n) & mr); break;
2324 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) - n) & mr); break;
2325 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) - n) & mr); break;
2326 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) - n) & mr); break;
2327 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) - n) & mr); break;
2333 inline u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2335 if ((p0 + 1) >= in_len) return (in_len);
2340 lshift_block (buf0, buf1, tib40, tib41);
2342 const u32 mr = 0xffu << ((p0 & 3) * 8);
2347 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
2348 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
2349 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
2350 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
2351 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
2352 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
2353 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
2354 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
2360 inline u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2362 if (p0 == 0) return (in_len);
2364 if (p0 >= in_len) return (in_len);
2369 rshift_block (buf0, buf1, tib40, tib41);
2371 const u32 mr = 0xffu << ((p0 & 3) * 8);
2376 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
2377 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
2378 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
2379 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
2380 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
2381 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
2382 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
2383 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
2389 inline u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2391 if (p0 > in_len) return (in_len);
2393 if ((in_len + p0) >= 32) return (in_len);
2395 u32 out_len = in_len;
2409 truncate_right (tib40, tib41, p0);
2411 rshift_block_N (buf0, buf1, buf0, buf1, p0);
2413 buf0[0] |= tib40[0];
2414 buf0[1] |= tib40[1];
2415 buf0[2] |= tib40[2];
2416 buf0[3] |= tib40[3];
2417 buf1[0] |= tib41[0];
2418 buf1[1] |= tib41[1];
2419 buf1[2] |= tib41[2];
2420 buf1[3] |= tib41[3];
2427 inline u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2429 if (p0 > in_len) return (in_len);
2431 if ((in_len + p0) >= 32) return (in_len);
2433 u32 out_len = in_len;
2438 rshift_block_N (buf0, buf1, tib40, tib41, p0);
2440 truncate_left (tib40, tib41, out_len);
2442 buf0[0] |= tib40[0];
2443 buf0[1] |= tib40[1];
2444 buf0[2] |= tib40[2];
2445 buf0[3] |= tib40[3];
2446 buf1[0] |= tib41[0];
2447 buf1[1] |= tib41[1];
2448 buf1[2] |= tib41[2];
2449 buf1[3] |= tib41[3];
2456 inline u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2458 buf0[0] |= (generate_cmask (buf0[0]));
2459 buf0[1] |= (generate_cmask (buf0[1]));
2460 buf0[2] |= (generate_cmask (buf0[2]));
2461 buf0[3] |= (generate_cmask (buf0[3]));
2462 buf1[0] |= (generate_cmask (buf1[0]));
2463 buf1[1] |= (generate_cmask (buf1[1]));
2464 buf1[2] |= (generate_cmask (buf1[2]));
2465 buf1[3] |= (generate_cmask (buf1[3]));
2470 const uchar4 tmp0 = (uchar4) (' ');
2471 const uchar4 tmp1 = (uchar4) (0x00);
2472 const uchar4 tmp2 = (uchar4) (0xff);
2476 tmp = as_uchar4 (buf0[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[0] = as_uint (tmp);
2477 tmp = as_uchar4 (buf0[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[1] = as_uint (tmp);
2478 tmp = as_uchar4 (buf0[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[2] = as_uint (tmp);
2479 tmp = as_uchar4 (buf0[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[3] = as_uint (tmp);
2480 tmp = as_uchar4 (buf1[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[0] = as_uint (tmp);
2481 tmp = as_uchar4 (buf1[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[1] = as_uint (tmp);
2482 tmp = as_uchar4 (buf1[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[2] = as_uint (tmp);
2483 tmp = as_uchar4 (buf1[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[3] = as_uint (tmp);
2485 rshift_block (tib40, tib41, tib40, tib41); tib40[0] |= 0xff;
2487 buf0[0] &= ~(generate_cmask (buf0[0]) & tib40[0]);
2488 buf0[1] &= ~(generate_cmask (buf0[1]) & tib40[1]);
2489 buf0[2] &= ~(generate_cmask (buf0[2]) & tib40[2]);
2490 buf0[3] &= ~(generate_cmask (buf0[3]) & tib40[3]);
2491 buf1[0] &= ~(generate_cmask (buf1[0]) & tib41[0]);
2492 buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]);
2493 buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]);
2494 buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]);
2499 inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2501 u32 out_len = in_len;
2505 case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break;
2506 case RULE_OP_MANGLE_UREST: out_len = rule_op_mangle_urest (p0, p1, buf0, buf1, out_len); break;
2507 case RULE_OP_MANGLE_LREST_UFIRST: out_len = rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, out_len); break;
2508 case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break;
2509 case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break;
2510 case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break;
2511 case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break;
2512 case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break;
2513 case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break;
2514 case RULE_OP_MANGLE_REFLECT: out_len = rule_op_mangle_reflect (p0, p1, buf0, buf1, out_len); break;
2515 case RULE_OP_MANGLE_APPEND: out_len = rule_op_mangle_append (p0, p1, buf0, buf1, out_len); break;
2516 case RULE_OP_MANGLE_PREPEND: out_len = rule_op_mangle_prepend (p0, p1, buf0, buf1, out_len); break;
2517 case RULE_OP_MANGLE_ROTATE_LEFT: out_len = rule_op_mangle_rotate_left (p0, p1, buf0, buf1, out_len); break;
2518 case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = rule_op_mangle_rotate_right (p0, p1, buf0, buf1, out_len); break;
2519 case RULE_OP_MANGLE_DELETE_FIRST: out_len = rule_op_mangle_delete_first (p0, p1, buf0, buf1, out_len); break;
2520 case RULE_OP_MANGLE_DELETE_LAST: out_len = rule_op_mangle_delete_last (p0, p1, buf0, buf1, out_len); break;
2521 case RULE_OP_MANGLE_DELETE_AT: out_len = rule_op_mangle_delete_at (p0, p1, buf0, buf1, out_len); break;
2522 case RULE_OP_MANGLE_EXTRACT: out_len = rule_op_mangle_extract (p0, p1, buf0, buf1, out_len); break;
2523 case RULE_OP_MANGLE_OMIT: out_len = rule_op_mangle_omit (p0, p1, buf0, buf1, out_len); break;
2524 case RULE_OP_MANGLE_INSERT: out_len = rule_op_mangle_insert (p0, p1, buf0, buf1, out_len); break;
2525 case RULE_OP_MANGLE_OVERSTRIKE: out_len = rule_op_mangle_overstrike (p0, p1, buf0, buf1, out_len); break;
2526 case RULE_OP_MANGLE_TRUNCATE_AT: out_len = rule_op_mangle_truncate_at (p0, p1, buf0, buf1, out_len); break;
2527 case RULE_OP_MANGLE_REPLACE: out_len = rule_op_mangle_replace (p0, p1, buf0, buf1, out_len); break;
2528 case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
2529 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
2530 case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = rule_op_mangle_dupechar_first (p0, p1, buf0, buf1, out_len); break;
2531 case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = rule_op_mangle_dupechar_last (p0, p1, buf0, buf1, out_len); break;
2532 case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = rule_op_mangle_dupechar_all (p0, p1, buf0, buf1, out_len); break;
2533 case RULE_OP_MANGLE_SWITCH_FIRST: out_len = rule_op_mangle_switch_first (p0, p1, buf0, buf1, out_len); break;
2534 case RULE_OP_MANGLE_SWITCH_LAST: out_len = rule_op_mangle_switch_last (p0, p1, buf0, buf1, out_len); break;
2535 case RULE_OP_MANGLE_SWITCH_AT: out_len = rule_op_mangle_switch_at (p0, p1, buf0, buf1, out_len); break;
2536 case RULE_OP_MANGLE_CHR_SHIFTL: out_len = rule_op_mangle_chr_shiftl (p0, p1, buf0, buf1, out_len); break;
2537 case RULE_OP_MANGLE_CHR_SHIFTR: out_len = rule_op_mangle_chr_shiftr (p0, p1, buf0, buf1, out_len); break;
2538 case RULE_OP_MANGLE_CHR_INCR: out_len = rule_op_mangle_chr_incr (p0, p1, buf0, buf1, out_len); break;
2539 case RULE_OP_MANGLE_CHR_DECR: out_len = rule_op_mangle_chr_decr (p0, p1, buf0, buf1, out_len); break;
2540 case RULE_OP_MANGLE_REPLACE_NP1: out_len = rule_op_mangle_replace_np1 (p0, p1, buf0, buf1, out_len); break;
2541 case RULE_OP_MANGLE_REPLACE_NM1: out_len = rule_op_mangle_replace_nm1 (p0, p1, buf0, buf1, out_len); break;
2542 case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = rule_op_mangle_dupeblock_first (p0, p1, buf0, buf1, out_len); break;
2543 case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = rule_op_mangle_dupeblock_last (p0, p1, buf0, buf1, out_len); break;
2544 case RULE_OP_MANGLE_TITLE: out_len = rule_op_mangle_title (p0, p1, buf0, buf1, out_len); break;
2550 inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
2554 for (u32 i = 0; cmds[i] != 0; i++)
2556 const u32 cmd = cmds[i];
2558 const u32 name = (cmd >> 0) & 0xff;
2559 const u32 p0 = (cmd >> 8) & 0xff;
2560 const u32 p1 = (cmd >> 16) & 0xff;
2562 out_len = apply_rule (name, p0, p1, buf0, buf1, out_len);
2568 inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4])
2581 return apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
2590 for (int i = 0; i < VECT_SIZE; i++)
2595 tmp0[0] = pw_buf0[0];
2596 tmp0[1] = pw_buf0[1];
2597 tmp0[2] = pw_buf0[2];
2598 tmp0[3] = pw_buf0[3];
2599 tmp1[0] = pw_buf1[0];
2600 tmp1[1] = pw_buf1[1];
2601 tmp1[2] = pw_buf1[2];
2602 tmp1[3] = pw_buf1[3];
2604 const u32 tmp_len = apply_rules (rules_buf[il_pos + i].cmds, tmp0, tmp1, pw_len);
2618 out_len.s0 = tmp_len;
2630 out_len.s1 = tmp_len;
2644 out_len.s2 = tmp_len;
2656 out_len.s3 = tmp_len;
2670 out_len.s4 = tmp_len;
2682 out_len.s5 = tmp_len;
2694 out_len.s6 = tmp_len;
2706 out_len.s7 = tmp_len;
2720 out_len.s8 = tmp_len;
2732 out_len.s9 = tmp_len;
2744 out_len.sa = tmp_len;
2756 out_len.sb = tmp_len;
2768 out_len.sc = tmp_len;
2780 out_len.sd = tmp_len;
2792 out_len.se = tmp_len;
2804 out_len.sf = tmp_len;