2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
8 #define DO_NOT_USE_BYTE_PERM
10 #ifdef DO_NOT_USE_BYTE_PERM
18 inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len);
19 inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len);
20 inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4]);
22 inline u32 generate_cmask (u32 buf)
24 const u32 rmask = ((buf & 0x40404040) >> 1)
25 & ~((buf & 0x80808080) >> 2);
27 const u32 hmask = (buf & 0x1f1f1f1f) + 0x05050505;
28 const u32 lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f;
30 return rmask & ~hmask & lmask;
33 inline void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
35 const u32 tmp = (1 << ((len % 4) * 8)) - 1;
86 inline void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
88 const u32 tmp = ~((1 << ((len % 4) * 8)) - 1);
139 inline void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
142 out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321);
143 out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321);
144 out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321);
145 out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321);
146 out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321);
147 out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321);
148 out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321);
149 out1[3] = __byte_perm_S (in1[3], 0, 0x4321);
152 #if defined IS_AMD || defined IS_GENERIC
153 out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
154 out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
155 out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
156 out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
157 out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
158 out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
159 out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
160 out1[3] = amd_bytealign_S ( 0, in1[3], 1);
164 inline void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
167 out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543);
168 out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543);
169 out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543);
170 out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543);
171 out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543);
172 out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543);
173 out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543);
174 out0[0] = __byte_perm_S ( 0, in0[0], 0x6543);
177 #if defined IS_AMD || defined IS_GENERIC
178 out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
179 out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
180 out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
181 out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
182 out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
183 out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
184 out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
185 out0[0] = amd_bytealign_S (in0[0], 0, 3);
189 inline void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
194 case 0: out0[0] = in0[0];
203 case 1: out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321);
204 out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321);
205 out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321);
206 out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321);
207 out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321);
208 out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321);
209 out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321);
210 out1[3] = __byte_perm_S (in1[3], 0, 0x4321);
212 case 2: out0[0] = __byte_perm_S (in0[0], in0[1], 0x5432);
213 out0[1] = __byte_perm_S (in0[1], in0[2], 0x5432);
214 out0[2] = __byte_perm_S (in0[2], in0[3], 0x5432);
215 out0[3] = __byte_perm_S (in0[3], in1[0], 0x5432);
216 out1[0] = __byte_perm_S (in1[0], in1[1], 0x5432);
217 out1[1] = __byte_perm_S (in1[1], in1[2], 0x5432);
218 out1[2] = __byte_perm_S (in1[2], in1[3], 0x5432);
219 out1[3] = __byte_perm_S (in1[3], 0, 0x5432);
221 case 3: out0[0] = __byte_perm_S (in0[0], in0[1], 0x6543);
222 out0[1] = __byte_perm_S (in0[1], in0[2], 0x6543);
223 out0[2] = __byte_perm_S (in0[2], in0[3], 0x6543);
224 out0[3] = __byte_perm_S (in0[3], in1[0], 0x6543);
225 out1[0] = __byte_perm_S (in1[0], in1[1], 0x6543);
226 out1[1] = __byte_perm_S (in1[1], in1[2], 0x6543);
227 out1[2] = __byte_perm_S (in1[2], in1[3], 0x6543);
228 out1[3] = __byte_perm_S (in1[3], 0, 0x6543);
230 case 4: out0[0] = in0[1];
239 case 5: out0[0] = __byte_perm_S (in0[1], in0[2], 0x4321);
240 out0[1] = __byte_perm_S (in0[2], in0[3], 0x4321);
241 out0[2] = __byte_perm_S (in0[3], in1[0], 0x4321);
242 out0[3] = __byte_perm_S (in1[0], in1[1], 0x4321);
243 out1[0] = __byte_perm_S (in1[1], in1[2], 0x4321);
244 out1[1] = __byte_perm_S (in1[2], in1[3], 0x4321);
245 out1[2] = __byte_perm_S (in1[3], 0, 0x4321);
248 case 6: out0[0] = __byte_perm_S (in0[1], in0[2], 0x5432);
249 out0[1] = __byte_perm_S (in0[2], in0[3], 0x5432);
250 out0[2] = __byte_perm_S (in0[3], in1[0], 0x5432);
251 out0[3] = __byte_perm_S (in1[0], in1[1], 0x5432);
252 out1[0] = __byte_perm_S (in1[1], in1[2], 0x5432);
253 out1[1] = __byte_perm_S (in1[2], in1[3], 0x5432);
254 out1[2] = __byte_perm_S (in1[3], 0, 0x5432);
257 case 7: out0[0] = __byte_perm_S (in0[1], in0[2], 0x6543);
258 out0[1] = __byte_perm_S (in0[2], in0[3], 0x6543);
259 out0[2] = __byte_perm_S (in0[3], in1[0], 0x6543);
260 out0[3] = __byte_perm_S (in1[0], in1[1], 0x6543);
261 out1[0] = __byte_perm_S (in1[1], in1[2], 0x6543);
262 out1[1] = __byte_perm_S (in1[2], in1[3], 0x6543);
263 out1[2] = __byte_perm_S (in1[3], 0, 0x6543);
266 case 8: out0[0] = in0[2];
275 case 9: out0[0] = __byte_perm_S (in0[2], in0[3], 0x4321);
276 out0[1] = __byte_perm_S (in0[3], in1[0], 0x4321);
277 out0[2] = __byte_perm_S (in1[0], in1[1], 0x4321);
278 out0[3] = __byte_perm_S (in1[1], in1[2], 0x4321);
279 out1[0] = __byte_perm_S (in1[2], in1[3], 0x4321);
280 out1[1] = __byte_perm_S (in1[3], 0, 0x4321);
284 case 10: out0[0] = __byte_perm_S (in0[2], in0[3], 0x5432);
285 out0[1] = __byte_perm_S (in0[3], in1[0], 0x5432);
286 out0[2] = __byte_perm_S (in1[0], in1[1], 0x5432);
287 out0[3] = __byte_perm_S (in1[1], in1[2], 0x5432);
288 out1[0] = __byte_perm_S (in1[2], in1[3], 0x5432);
289 out1[1] = __byte_perm_S (in1[3], 0, 0x5432);
293 case 11: out0[0] = __byte_perm_S (in0[2], in0[3], 0x6543);
294 out0[1] = __byte_perm_S (in0[3], in1[0], 0x6543);
295 out0[2] = __byte_perm_S (in1[0], in1[1], 0x6543);
296 out0[3] = __byte_perm_S (in1[1], in1[2], 0x6543);
297 out1[0] = __byte_perm_S (in1[2], in1[3], 0x6543);
298 out1[1] = __byte_perm_S (in1[3], 0, 0x6543);
302 case 12: out0[0] = in0[3];
312 out0[0] = __byte_perm_S (in0[3], in1[0], 0x4321);
313 out0[1] = __byte_perm_S (in1[0], in1[1], 0x4321);
314 out0[2] = __byte_perm_S (in1[1], in1[2], 0x4321);
315 out0[3] = __byte_perm_S (in1[2], in1[3], 0x4321);
316 out1[0] = __byte_perm_S (in1[3], 0, 0x4321);
321 case 14: out0[0] = __byte_perm_S (in0[3], in1[0], 0x5432);
322 out0[1] = __byte_perm_S (in1[0], in1[1], 0x5432);
323 out0[2] = __byte_perm_S (in1[1], in1[2], 0x5432);
324 out0[3] = __byte_perm_S (in1[2], in1[3], 0x5432);
325 out1[0] = __byte_perm_S (in1[3], 0, 0x5432);
330 case 15: out0[0] = __byte_perm_S (in0[3], in1[0], 0x6543);
331 out0[1] = __byte_perm_S (in1[0], in1[1], 0x6543);
332 out0[2] = __byte_perm_S (in1[1], in1[2], 0x6543);
333 out0[3] = __byte_perm_S (in1[2], in1[3], 0x6543);
334 out1[0] = __byte_perm_S (in1[3], 0, 0x6543);
339 case 16: out0[0] = in1[0];
348 case 17: out0[0] = __byte_perm_S (in1[0], in1[1], 0x4321);
349 out0[1] = __byte_perm_S (in1[1], in1[2], 0x4321);
350 out0[2] = __byte_perm_S (in1[2], in1[3], 0x4321);
351 out0[3] = __byte_perm_S (in1[3], 0, 0x4321);
357 case 18: out0[0] = __byte_perm_S (in1[0], in1[1], 0x5432);
358 out0[1] = __byte_perm_S (in1[1], in1[2], 0x5432);
359 out0[2] = __byte_perm_S (in1[2], in1[3], 0x5432);
360 out0[3] = __byte_perm_S (in1[3], 0, 0x5432);
366 case 19: out0[0] = __byte_perm_S (in1[0], in1[1], 0x6543);
367 out0[1] = __byte_perm_S (in1[1], in1[2], 0x6543);
368 out0[2] = __byte_perm_S (in1[2], in1[3], 0x6543);
369 out0[3] = __byte_perm_S (in1[3], 0, 0x6543);
375 case 20: out0[0] = in1[1];
384 case 21: out0[0] = __byte_perm_S (in1[1], in1[2], 0x4321);
385 out0[1] = __byte_perm_S (in1[2], in1[3], 0x4321);
386 out0[2] = __byte_perm_S (in1[3], 0, 0x4321);
393 case 22: out0[0] = __byte_perm_S (in1[1], in1[2], 0x5432);
394 out0[1] = __byte_perm_S (in1[2], in1[3], 0x5432);
395 out0[2] = __byte_perm_S (in1[3], 0, 0x5432);
402 case 23: out0[0] = __byte_perm_S (in1[1], in1[2], 0x6543);
403 out0[1] = __byte_perm_S (in1[2], in1[3], 0x6543);
404 out0[2] = __byte_perm_S (in1[3], 0, 0x6543);
411 case 24: out0[0] = in1[2];
420 case 25: out0[0] = __byte_perm_S (in1[2], in1[3], 0x4321);
421 out0[1] = __byte_perm_S (in1[3], 0, 0x4321);
429 case 26: out0[0] = __byte_perm_S (in1[2], in1[3], 0x5432);
430 out0[1] = __byte_perm_S (in1[3], 0, 0x5432);
438 case 27: out0[0] = __byte_perm_S (in1[2], in1[3], 0x6543);
439 out0[1] = __byte_perm_S (in1[3], 0, 0x6543);
447 case 28: out0[0] = in1[3];
456 case 29: out0[0] = __byte_perm_S (in1[3], 0, 0x4321);
465 case 30: out0[0] = __byte_perm_S (in1[3], 0, 0x5432);
474 case 31: out0[0] = __byte_perm_S (in1[3], 0, 0x6543);
486 #if defined IS_AMD || defined IS_GENERIC
489 case 0: out0[0] = in0[0];
498 case 1: out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
499 out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
500 out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
501 out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
502 out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
503 out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
504 out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
505 out1[3] = amd_bytealign_S ( 0, in1[3], 1);
507 case 2: out0[0] = amd_bytealign_S (in0[1], in0[0], 2);
508 out0[1] = amd_bytealign_S (in0[2], in0[1], 2);
509 out0[2] = amd_bytealign_S (in0[3], in0[2], 2);
510 out0[3] = amd_bytealign_S (in1[0], in0[3], 2);
511 out1[0] = amd_bytealign_S (in1[1], in1[0], 2);
512 out1[1] = amd_bytealign_S (in1[2], in1[1], 2);
513 out1[2] = amd_bytealign_S (in1[3], in1[2], 2);
514 out1[3] = amd_bytealign_S ( 0, in1[3], 2);
516 case 3: out0[0] = amd_bytealign_S (in0[1], in0[0], 3);
517 out0[1] = amd_bytealign_S (in0[2], in0[1], 3);
518 out0[2] = amd_bytealign_S (in0[3], in0[2], 3);
519 out0[3] = amd_bytealign_S (in1[0], in0[3], 3);
520 out1[0] = amd_bytealign_S (in1[1], in1[0], 3);
521 out1[1] = amd_bytealign_S (in1[2], in1[1], 3);
522 out1[2] = amd_bytealign_S (in1[3], in1[2], 3);
523 out1[3] = amd_bytealign_S ( 0, in1[3], 3);
525 case 4: out0[0] = in0[1];
534 case 5: out0[0] = amd_bytealign_S (in0[2], in0[1], 1);
535 out0[1] = amd_bytealign_S (in0[3], in0[2], 1);
536 out0[2] = amd_bytealign_S (in1[0], in0[3], 1);
537 out0[3] = amd_bytealign_S (in1[1], in1[0], 1);
538 out1[0] = amd_bytealign_S (in1[2], in1[1], 1);
539 out1[1] = amd_bytealign_S (in1[3], in1[2], 1);
540 out1[2] = amd_bytealign_S ( 0, in1[3], 1);
543 case 6: out0[0] = amd_bytealign_S (in0[2], in0[1], 2);
544 out0[1] = amd_bytealign_S (in0[3], in0[2], 2);
545 out0[2] = amd_bytealign_S (in1[0], in0[3], 2);
546 out0[3] = amd_bytealign_S (in1[1], in1[0], 2);
547 out1[0] = amd_bytealign_S (in1[2], in1[1], 2);
548 out1[1] = amd_bytealign_S (in1[3], in1[2], 2);
549 out1[2] = amd_bytealign_S ( 0, in1[3], 2);
552 case 7: out0[0] = amd_bytealign_S (in0[2], in0[1], 3);
553 out0[1] = amd_bytealign_S (in0[3], in0[2], 3);
554 out0[2] = amd_bytealign_S (in1[0], in0[3], 3);
555 out0[3] = amd_bytealign_S (in1[1], in1[0], 3);
556 out1[0] = amd_bytealign_S (in1[2], in1[1], 3);
557 out1[1] = amd_bytealign_S (in1[3], in1[2], 3);
558 out1[2] = amd_bytealign_S ( 0, in1[3], 3);
561 case 8: out0[0] = in0[2];
570 case 9: out0[0] = amd_bytealign_S (in0[3], in0[2], 1);
571 out0[1] = amd_bytealign_S (in1[0], in0[3], 1);
572 out0[2] = amd_bytealign_S (in1[1], in1[0], 1);
573 out0[3] = amd_bytealign_S (in1[2], in1[1], 1);
574 out1[0] = amd_bytealign_S (in1[3], in1[2], 1);
575 out1[1] = amd_bytealign_S ( 0, in1[3], 1);
579 case 10: out0[0] = amd_bytealign_S (in0[3], in0[2], 2);
580 out0[1] = amd_bytealign_S (in1[0], in0[3], 2);
581 out0[2] = amd_bytealign_S (in1[1], in1[0], 2);
582 out0[3] = amd_bytealign_S (in1[2], in1[1], 2);
583 out1[0] = amd_bytealign_S (in1[3], in1[2], 2);
584 out1[1] = amd_bytealign_S ( 0, in1[3], 2);
588 case 11: out0[0] = amd_bytealign_S (in0[3], in0[2], 3);
589 out0[1] = amd_bytealign_S (in1[0], in0[3], 3);
590 out0[2] = amd_bytealign_S (in1[1], in1[0], 3);
591 out0[3] = amd_bytealign_S (in1[2], in1[1], 3);
592 out1[0] = amd_bytealign_S (in1[3], in1[2], 3);
593 out1[1] = amd_bytealign_S ( 0, in1[3], 3);
597 case 12: out0[0] = in0[3];
606 case 13: out0[0] = amd_bytealign_S (in1[0], in0[3], 1);
607 out0[1] = amd_bytealign_S (in1[1], in1[0], 1);
608 out0[2] = amd_bytealign_S (in1[2], in1[1], 1);
609 out0[3] = amd_bytealign_S (in1[3], in1[2], 1);
610 out1[0] = amd_bytealign_S ( 0, in1[3], 1);
615 case 14: out0[0] = amd_bytealign_S (in1[0], in0[3], 2);
616 out0[1] = amd_bytealign_S (in1[1], in1[0], 2);
617 out0[2] = amd_bytealign_S (in1[2], in1[1], 2);
618 out0[3] = amd_bytealign_S (in1[3], in1[2], 2);
619 out1[0] = amd_bytealign_S ( 0, in1[3], 2);
624 case 15: out0[0] = amd_bytealign_S (in1[0], in0[3], 3);
625 out0[1] = amd_bytealign_S (in1[1], in1[0], 3);
626 out0[2] = amd_bytealign_S (in1[2], in1[1], 3);
627 out0[3] = amd_bytealign_S (in1[3], in1[2], 3);
628 out1[0] = amd_bytealign_S ( 0, in1[3], 3);
633 case 16: out0[0] = in1[0];
642 case 17: out0[0] = amd_bytealign_S (in1[1], in1[0], 1);
643 out0[1] = amd_bytealign_S (in1[2], in1[1], 1);
644 out0[2] = amd_bytealign_S (in1[3], in1[2], 1);
645 out0[3] = amd_bytealign_S ( 0, in1[3], 1);
651 case 18: out0[0] = amd_bytealign_S (in1[1], in1[0], 2);
652 out0[1] = amd_bytealign_S (in1[2], in1[1], 2);
653 out0[2] = amd_bytealign_S (in1[3], in1[2], 2);
654 out0[3] = amd_bytealign_S ( 0, in1[3], 2);
660 case 19: out0[0] = amd_bytealign_S (in1[1], in1[0], 3);
661 out0[1] = amd_bytealign_S (in1[2], in1[1], 3);
662 out0[2] = amd_bytealign_S (in1[3], in1[2], 3);
663 out0[3] = amd_bytealign_S ( 0, in1[3], 3);
669 case 20: out0[0] = in1[1];
678 case 21: out0[0] = amd_bytealign_S (in1[2], in1[1], 1);
679 out0[1] = amd_bytealign_S (in1[3], in1[2], 1);
680 out0[2] = amd_bytealign_S ( 0, in1[3], 1);
687 case 22: out0[0] = amd_bytealign_S (in1[2], in1[1], 2);
688 out0[1] = amd_bytealign_S (in1[3], in1[2], 2);
689 out0[2] = amd_bytealign_S ( 0, in1[3], 2);
696 case 23: out0[0] = amd_bytealign_S (in1[2], in1[1], 3);
697 out0[1] = amd_bytealign_S (in1[3], in1[2], 3);
698 out0[2] = amd_bytealign_S ( 0, in1[3], 3);
705 case 24: out0[0] = in1[2];
714 case 25: out0[0] = amd_bytealign_S (in1[3], in1[2], 1);
715 out0[1] = amd_bytealign_S ( 0, in1[3], 1);
723 case 26: out0[0] = amd_bytealign_S (in1[3], in1[2], 2);
724 out0[1] = amd_bytealign_S ( 0, in1[3], 2);
732 case 27: out0[0] = amd_bytealign_S (in1[3], in1[2], 3);
733 out0[1] = amd_bytealign_S ( 0, in1[3], 3);
741 case 28: out0[0] = in1[3];
750 case 29: out0[0] = amd_bytealign_S ( 0, in1[3], 1);
759 case 30: out0[0] = amd_bytealign_S ( 0, in1[3], 2);
768 case 31: out0[0] = amd_bytealign_S ( 0, in1[3], 3);
781 inline void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
786 case 0: out1[3] = in1[3];
795 case 1: out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543);
796 out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543);
797 out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543);
798 out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543);
799 out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543);
800 out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543);
801 out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543);
802 out0[0] = __byte_perm_S ( 0, in0[0], 0x6543);
804 case 2: out1[3] = __byte_perm_S (in1[2], in1[3], 0x5432);
805 out1[2] = __byte_perm_S (in1[1], in1[2], 0x5432);
806 out1[1] = __byte_perm_S (in1[0], in1[1], 0x5432);
807 out1[0] = __byte_perm_S (in0[3], in1[0], 0x5432);
808 out0[3] = __byte_perm_S (in0[2], in0[3], 0x5432);
809 out0[2] = __byte_perm_S (in0[1], in0[2], 0x5432);
810 out0[1] = __byte_perm_S (in0[0], in0[1], 0x5432);
811 out0[0] = __byte_perm_S ( 0, in0[0], 0x5432);
813 case 3: out1[3] = __byte_perm_S (in1[2], in1[3], 0x4321);
814 out1[2] = __byte_perm_S (in1[1], in1[2], 0x4321);
815 out1[1] = __byte_perm_S (in1[0], in1[1], 0x4321);
816 out1[0] = __byte_perm_S (in0[3], in1[0], 0x4321);
817 out0[3] = __byte_perm_S (in0[2], in0[3], 0x4321);
818 out0[2] = __byte_perm_S (in0[1], in0[2], 0x4321);
819 out0[1] = __byte_perm_S (in0[0], in0[1], 0x4321);
820 out0[0] = __byte_perm_S ( 0, in0[0], 0x4321);
822 case 4: out1[3] = in1[2];
831 case 5: out1[3] = __byte_perm_S (in1[1], in1[2], 0x6543);
832 out1[2] = __byte_perm_S (in1[0], in1[1], 0x6543);
833 out1[1] = __byte_perm_S (in0[3], in1[0], 0x6543);
834 out1[0] = __byte_perm_S (in0[2], in0[3], 0x6543);
835 out0[3] = __byte_perm_S (in0[1], in0[2], 0x6543);
836 out0[2] = __byte_perm_S (in0[0], in0[1], 0x6543);
837 out0[1] = __byte_perm_S ( 0, in0[0], 0x6543);
840 case 6: out1[3] = __byte_perm_S (in1[1], in1[2], 0x5432);
841 out1[2] = __byte_perm_S (in1[0], in1[1], 0x5432);
842 out1[1] = __byte_perm_S (in0[3], in1[0], 0x5432);
843 out1[0] = __byte_perm_S (in0[2], in0[3], 0x5432);
844 out0[3] = __byte_perm_S (in0[1], in0[2], 0x5432);
845 out0[2] = __byte_perm_S (in0[0], in0[1], 0x5432);
846 out0[1] = __byte_perm_S ( 0, in0[0], 0x5432);
849 case 7: out1[3] = __byte_perm_S (in1[1], in1[2], 0x4321);
850 out1[2] = __byte_perm_S (in1[0], in1[1], 0x4321);
851 out1[1] = __byte_perm_S (in0[3], in1[0], 0x4321);
852 out1[0] = __byte_perm_S (in0[2], in0[3], 0x4321);
853 out0[3] = __byte_perm_S (in0[1], in0[2], 0x4321);
854 out0[2] = __byte_perm_S (in0[0], in0[1], 0x4321);
855 out0[1] = __byte_perm_S ( 0, in0[0], 0x4321);
858 case 8: out1[3] = in1[1];
867 case 9: out1[3] = __byte_perm_S (in1[0], in1[1], 0x6543);
868 out1[2] = __byte_perm_S (in0[3], in1[0], 0x6543);
869 out1[1] = __byte_perm_S (in0[2], in0[3], 0x6543);
870 out1[0] = __byte_perm_S (in0[1], in0[2], 0x6543);
871 out0[3] = __byte_perm_S (in0[0], in0[1], 0x6543);
872 out0[2] = __byte_perm_S ( 0, in0[0], 0x6543);
876 case 10: out1[3] = __byte_perm_S (in1[0], in1[1], 0x5432);
877 out1[2] = __byte_perm_S (in0[3], in1[0], 0x5432);
878 out1[1] = __byte_perm_S (in0[2], in0[3], 0x5432);
879 out1[0] = __byte_perm_S (in0[1], in0[2], 0x5432);
880 out0[3] = __byte_perm_S (in0[0], in0[1], 0x5432);
881 out0[2] = __byte_perm_S ( 0, in0[0], 0x5432);
885 case 11: out1[3] = __byte_perm_S (in1[0], in1[1], 0x4321);
886 out1[2] = __byte_perm_S (in0[3], in1[0], 0x4321);
887 out1[1] = __byte_perm_S (in0[2], in0[3], 0x4321);
888 out1[0] = __byte_perm_S (in0[1], in0[2], 0x4321);
889 out0[3] = __byte_perm_S (in0[0], in0[1], 0x4321);
890 out0[2] = __byte_perm_S ( 0, in0[0], 0x4321);
894 case 12: out1[3] = in1[0];
903 case 13: out1[3] = __byte_perm_S (in0[3], in1[0], 0x6543);
904 out1[2] = __byte_perm_S (in0[2], in0[3], 0x6543);
905 out1[1] = __byte_perm_S (in0[1], in0[2], 0x6543);
906 out1[0] = __byte_perm_S (in0[0], in0[1], 0x6543);
907 out0[3] = __byte_perm_S ( 0, in0[0], 0x6543);
912 case 14: out1[3] = __byte_perm_S (in0[3], in1[0], 0x5432);
913 out1[2] = __byte_perm_S (in0[2], in0[3], 0x5432);
914 out1[1] = __byte_perm_S (in0[1], in0[2], 0x5432);
915 out1[0] = __byte_perm_S (in0[0], in0[1], 0x5432);
916 out0[3] = __byte_perm_S ( 0, in0[0], 0x5432);
921 case 15: out1[3] = __byte_perm_S (in0[3], in1[0], 0x4321);
922 out1[2] = __byte_perm_S (in0[2], in0[3], 0x4321);
923 out1[1] = __byte_perm_S (in0[1], in0[2], 0x4321);
924 out1[0] = __byte_perm_S (in0[0], in0[1], 0x4321);
925 out0[3] = __byte_perm_S ( 0, in0[0], 0x4321);
930 case 16: out1[3] = in0[3];
939 case 17: out1[3] = __byte_perm_S (in0[2], in0[3], 0x6543);
940 out1[2] = __byte_perm_S (in0[1], in0[2], 0x6543);
941 out1[1] = __byte_perm_S (in0[0], in0[1], 0x6543);
942 out1[0] = __byte_perm_S ( 0, in0[0], 0x6543);
948 case 18: out1[3] = __byte_perm_S (in0[2], in0[3], 0x5432);
949 out1[2] = __byte_perm_S (in0[1], in0[2], 0x5432);
950 out1[1] = __byte_perm_S (in0[0], in0[1], 0x5432);
951 out1[0] = __byte_perm_S ( 0, in0[0], 0x5432);
957 case 19: out1[3] = __byte_perm_S (in0[2], in0[3], 0x4321);
958 out1[2] = __byte_perm_S (in0[1], in0[2], 0x4321);
959 out1[1] = __byte_perm_S (in0[0], in0[1], 0x4321);
960 out1[0] = __byte_perm_S ( 0, in0[0], 0x4321);
966 case 20: out1[3] = in0[2];
975 case 21: out1[3] = __byte_perm_S (in0[1], in0[2], 0x6543);
976 out1[2] = __byte_perm_S (in0[0], in0[1], 0x6543);
977 out1[1] = __byte_perm_S ( 0, in0[0], 0x6543);
984 case 22: out1[3] = __byte_perm_S (in0[1], in0[2], 0x5432);
985 out1[2] = __byte_perm_S (in0[0], in0[1], 0x5432);
986 out1[1] = __byte_perm_S ( 0, in0[0], 0x5432);
993 case 23: out1[3] = __byte_perm_S (in0[1], in0[2], 0x4321);
994 out1[2] = __byte_perm_S (in0[0], in0[1], 0x4321);
995 out1[1] = __byte_perm_S ( 0, in0[0], 0x4321);
1002 case 24: out1[3] = in0[1];
1011 case 25: out1[3] = __byte_perm_S (in0[0], in0[1], 0x6543);
1012 out1[2] = __byte_perm_S ( 0, in0[0], 0x6543);
1020 case 26: out1[3] = __byte_perm_S (in0[0], in0[1], 0x5432);
1021 out1[2] = __byte_perm_S ( 0, in0[0], 0x5432);
1029 case 27: out1[3] = __byte_perm_S (in0[0], in0[1], 0x4321);
1030 out1[2] = __byte_perm_S ( 0, in0[0], 0x4321);
1038 case 28: out1[3] = in0[0];
1047 case 29: out1[3] = __byte_perm_S ( 0, in0[0], 0x6543);
1056 case 30: out1[3] = __byte_perm_S ( 0, in0[0], 0x5432);
1065 case 31: out1[3] = __byte_perm_S ( 0, in0[0], 0x4321);
1077 #if defined IS_AMD || defined IS_GENERIC
1080 case 0: out1[3] = in1[3];
1089 case 1: out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
1090 out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
1091 out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
1092 out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
1093 out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
1094 out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
1095 out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
1096 out0[0] = amd_bytealign_S (in0[0], 0, 3);
1098 case 2: out1[3] = amd_bytealign_S (in1[3], in1[2], 2);
1099 out1[2] = amd_bytealign_S (in1[2], in1[1], 2);
1100 out1[1] = amd_bytealign_S (in1[1], in1[0], 2);
1101 out1[0] = amd_bytealign_S (in1[0], in0[3], 2);
1102 out0[3] = amd_bytealign_S (in0[3], in0[2], 2);
1103 out0[2] = amd_bytealign_S (in0[2], in0[1], 2);
1104 out0[1] = amd_bytealign_S (in0[1], in0[0], 2);
1105 out0[0] = amd_bytealign_S (in0[0], 0, 2);
1107 case 3: out1[3] = amd_bytealign_S (in1[3], in1[2], 1);
1108 out1[2] = amd_bytealign_S (in1[2], in1[1], 1);
1109 out1[1] = amd_bytealign_S (in1[1], in1[0], 1);
1110 out1[0] = amd_bytealign_S (in1[0], in0[3], 1);
1111 out0[3] = amd_bytealign_S (in0[3], in0[2], 1);
1112 out0[2] = amd_bytealign_S (in0[2], in0[1], 1);
1113 out0[1] = amd_bytealign_S (in0[1], in0[0], 1);
1114 out0[0] = amd_bytealign_S (in0[0], 0, 1);
1116 case 4: out1[3] = in1[2];
1125 case 5: out1[3] = amd_bytealign_S (in1[2], in1[1], 3);
1126 out1[2] = amd_bytealign_S (in1[1], in1[0], 3);
1127 out1[1] = amd_bytealign_S (in1[0], in0[3], 3);
1128 out1[0] = amd_bytealign_S (in0[3], in0[2], 3);
1129 out0[3] = amd_bytealign_S (in0[2], in0[1], 3);
1130 out0[2] = amd_bytealign_S (in0[1], in0[0], 3);
1131 out0[1] = amd_bytealign_S (in0[0], 0, 3);
1134 case 6: out1[3] = amd_bytealign_S (in1[2], in1[1], 2);
1135 out1[2] = amd_bytealign_S (in1[1], in1[0], 2);
1136 out1[1] = amd_bytealign_S (in1[0], in0[3], 2);
1137 out1[0] = amd_bytealign_S (in0[3], in0[2], 2);
1138 out0[3] = amd_bytealign_S (in0[2], in0[1], 2);
1139 out0[2] = amd_bytealign_S (in0[1], in0[0], 2);
1140 out0[1] = amd_bytealign_S (in0[0], 0, 2);
1143 case 7: out1[3] = amd_bytealign_S (in1[2], in1[1], 1);
1144 out1[2] = amd_bytealign_S (in1[1], in1[0], 1);
1145 out1[1] = amd_bytealign_S (in1[0], in0[3], 1);
1146 out1[0] = amd_bytealign_S (in0[3], in0[2], 1);
1147 out0[3] = amd_bytealign_S (in0[2], in0[1], 1);
1148 out0[2] = amd_bytealign_S (in0[1], in0[0], 1);
1149 out0[1] = amd_bytealign_S (in0[0], 0, 1);
1152 case 8: out1[3] = in1[1];
1161 case 9: out1[3] = amd_bytealign_S (in1[1], in1[0], 3);
1162 out1[2] = amd_bytealign_S (in1[0], in0[3], 3);
1163 out1[1] = amd_bytealign_S (in0[3], in0[2], 3);
1164 out1[0] = amd_bytealign_S (in0[2], in0[1], 3);
1165 out0[3] = amd_bytealign_S (in0[1], in0[0], 3);
1166 out0[2] = amd_bytealign_S (in0[0], 0, 3);
1170 case 10: out1[3] = amd_bytealign_S (in1[1], in1[0], 2);
1171 out1[2] = amd_bytealign_S (in1[0], in0[3], 2);
1172 out1[1] = amd_bytealign_S (in0[3], in0[2], 2);
1173 out1[0] = amd_bytealign_S (in0[2], in0[1], 2);
1174 out0[3] = amd_bytealign_S (in0[1], in0[0], 2);
1175 out0[2] = amd_bytealign_S (in0[0], 0, 2);
1179 case 11: out1[3] = amd_bytealign_S (in1[1], in1[0], 1);
1180 out1[2] = amd_bytealign_S (in1[0], in0[3], 1);
1181 out1[1] = amd_bytealign_S (in0[3], in0[2], 1);
1182 out1[0] = amd_bytealign_S (in0[2], in0[1], 1);
1183 out0[3] = amd_bytealign_S (in0[1], in0[0], 1);
1184 out0[2] = amd_bytealign_S (in0[0], 0, 1);
1188 case 12: out1[3] = in1[0];
1197 case 13: out1[3] = amd_bytealign_S (in1[0], in0[3], 3);
1198 out1[2] = amd_bytealign_S (in0[3], in0[2], 3);
1199 out1[1] = amd_bytealign_S (in0[2], in0[1], 3);
1200 out1[0] = amd_bytealign_S (in0[1], in0[0], 3);
1201 out0[3] = amd_bytealign_S (in0[0], 0, 3);
1206 case 14: out1[3] = amd_bytealign_S (in1[0], in0[3], 2);
1207 out1[2] = amd_bytealign_S (in0[3], in0[2], 2);
1208 out1[1] = amd_bytealign_S (in0[2], in0[1], 2);
1209 out1[0] = amd_bytealign_S (in0[1], in0[0], 2);
1210 out0[3] = amd_bytealign_S (in0[0], 0, 2);
1215 case 15: out1[3] = amd_bytealign_S (in1[0], in0[3], 1);
1216 out1[2] = amd_bytealign_S (in0[3], in0[2], 1);
1217 out1[1] = amd_bytealign_S (in0[2], in0[1], 1);
1218 out1[0] = amd_bytealign_S (in0[1], in0[0], 1);
1219 out0[3] = amd_bytealign_S (in0[0], 0, 1);
1224 case 16: out1[3] = in0[3];
1233 case 17: out1[3] = amd_bytealign_S (in0[3], in0[2], 3);
1234 out1[2] = amd_bytealign_S (in0[2], in0[1], 3);
1235 out1[1] = amd_bytealign_S (in0[1], in0[0], 3);
1236 out1[0] = amd_bytealign_S (in0[0], 0, 3);
1242 case 18: out1[3] = amd_bytealign_S (in0[3], in0[2], 2);
1243 out1[2] = amd_bytealign_S (in0[2], in0[1], 2);
1244 out1[1] = amd_bytealign_S (in0[1], in0[0], 2);
1245 out1[0] = amd_bytealign_S (in0[0], 0, 2);
1251 case 19: out1[3] = amd_bytealign_S (in0[3], in0[2], 1);
1252 out1[2] = amd_bytealign_S (in0[2], in0[1], 1);
1253 out1[1] = amd_bytealign_S (in0[1], in0[0], 1);
1254 out1[0] = amd_bytealign_S (in0[0], 0, 1);
1260 case 20: out1[3] = in0[2];
1269 case 21: out1[3] = amd_bytealign_S (in0[2], in0[1], 3);
1270 out1[2] = amd_bytealign_S (in0[1], in0[0], 3);
1271 out1[1] = amd_bytealign_S (in0[0], 0, 3);
1278 case 22: out1[3] = amd_bytealign_S (in0[2], in0[1], 2);
1279 out1[2] = amd_bytealign_S (in0[1], in0[0], 2);
1280 out1[1] = amd_bytealign_S (in0[0], 0, 2);
1287 case 23: out1[3] = amd_bytealign_S (in0[2], in0[1], 1);
1288 out1[2] = amd_bytealign_S (in0[1], in0[0], 1);
1289 out1[1] = amd_bytealign_S (in0[0], 0, 1);
1296 case 24: out1[3] = in0[1];
1305 case 25: out1[3] = amd_bytealign_S (in0[1], in0[0], 3);
1306 out1[2] = amd_bytealign_S (in0[0], 0, 3);
1314 case 26: out1[3] = amd_bytealign_S (in0[1], in0[0], 2);
1315 out1[2] = amd_bytealign_S (in0[0], 0, 2);
1323 case 27: out1[3] = amd_bytealign_S (in0[1], in0[0], 1);
1324 out1[2] = amd_bytealign_S (in0[0], 0, 1);
1332 case 28: out1[3] = in0[0];
1341 case 29: out1[3] = amd_bytealign_S (in0[0], 0, 3);
1350 case 30: out1[3] = amd_bytealign_S (in0[0], 0, 2);
1359 case 31: out1[3] = amd_bytealign_S (in0[0], 0, 1);
1372 inline void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
1378 case 0: tmp[0] = src_r0;
1381 case 1: tmp[0] = src_r0 << 8;
1382 tmp[1] = src_r0 >> 24;
1384 case 2: tmp[0] = src_r0 << 16;
1385 tmp[1] = src_r0 >> 16;
1387 case 3: tmp[0] = src_r0 << 24;
1388 tmp[1] = src_r0 >> 8;
1394 case 0: dst0[0] |= tmp[0];
1397 case 1: dst0[1] |= tmp[0];
1400 case 2: dst0[2] |= tmp[0];
1403 case 3: dst0[3] |= tmp[0];
1406 case 4: dst1[0] |= tmp[0];
1409 case 5: dst1[1] |= tmp[0];
1412 case 6: dst1[2] |= tmp[0];
1415 case 7: dst1[3] |= tmp[0];
1420 inline void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
1427 dst0[0] = src_r0[0];
1428 dst0[1] = src_r0[1];
1429 dst0[2] = src_r0[2];
1430 dst0[3] = src_r0[3];
1431 dst1[0] = src_r1[0];
1432 dst1[1] = src_r1[1];
1433 dst1[2] = src_r1[2];
1434 dst1[3] = src_r1[3];
1438 dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x6540);
1439 dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1440 dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1441 dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1442 dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
1443 dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543);
1444 dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543);
1445 dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x6543);
1449 dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x5410);
1450 dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1451 dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1452 dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1453 dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
1454 dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432);
1455 dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432);
1456 dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x5432);
1460 dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x4210);
1461 dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1462 dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1463 dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1464 dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
1465 dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321);
1466 dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321);
1467 dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x4321);
1471 dst0[1] = src_r0[0];
1472 dst0[2] = src_r0[1];
1473 dst0[3] = src_r0[2];
1474 dst1[0] = src_r0[3];
1475 dst1[1] = src_r1[0];
1476 dst1[2] = src_r1[1];
1477 dst1[3] = src_r1[2];
1481 dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x6540);
1482 dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1483 dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1484 dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1485 dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
1486 dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543);
1487 dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543);
1491 dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x5410);
1492 dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1493 dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1494 dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1495 dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
1496 dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432);
1497 dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432);
1501 dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x4210);
1502 dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1503 dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1504 dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1505 dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
1506 dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321);
1507 dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321);
1511 dst0[2] = src_r0[0];
1512 dst0[3] = src_r0[1];
1513 dst1[0] = src_r0[2];
1514 dst1[1] = src_r0[3];
1515 dst1[2] = src_r1[0];
1516 dst1[3] = src_r1[1];
1520 dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x6540);
1521 dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1522 dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1523 dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1524 dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
1525 dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543);
1529 dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x5410);
1530 dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1531 dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1532 dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1533 dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
1534 dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432);
1538 dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x4210);
1539 dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1540 dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1541 dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1542 dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
1543 dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321);
1547 dst0[3] = src_r0[0];
1548 dst1[0] = src_r0[1];
1549 dst1[1] = src_r0[2];
1550 dst1[2] = src_r0[3];
1551 dst1[3] = src_r1[0];
1555 dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x6540);
1556 dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1557 dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1558 dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1559 dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
1563 dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x5410);
1564 dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1565 dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1566 dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1567 dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
1571 dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x4210);
1572 dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1573 dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1574 dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1575 dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
1579 dst1[0] = src_r0[0];
1580 dst1[1] = src_r0[1];
1581 dst1[2] = src_r0[2];
1582 dst1[3] = src_r0[3];
1586 dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x6540);
1587 dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1588 dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1589 dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
1593 dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x5410);
1594 dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1595 dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1596 dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
1600 dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x4210);
1601 dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1602 dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1603 dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
1607 dst1[1] = src_r0[0];
1608 dst1[2] = src_r0[1];
1609 dst1[3] = src_r0[2];
1613 dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x6540);
1614 dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1615 dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
1619 dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x5410);
1620 dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1621 dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
1625 dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x4210);
1626 dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1627 dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
1631 dst1[2] = src_r0[0];
1632 dst1[3] = src_r0[1];
1636 dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x6540);
1637 dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
1641 dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x5410);
1642 dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
1646 dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x4210);
1647 dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
1651 dst1[3] = src_r0[0];
1655 dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x6540);
1659 dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x5410);
1663 dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x4210);
1668 #if defined IS_AMD || defined IS_GENERIC
1673 dst1[3] = src_l1[3] | src_r0[0] << 24;
1676 dst1[3] = src_l1[3] | src_r0[0] << 16;
1679 dst1[3] = src_l1[3] | src_r0[0] << 8;
1682 dst1[3] = src_r0[0];
1685 dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1686 dst1[2] = src_l1[2] | src_r0[0] << 24;
1689 dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1690 dst1[2] = src_l1[2] | src_r0[0] << 16;
1693 dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1694 dst1[2] = src_l1[2] | src_r0[0] << 8;
1697 dst1[3] = src_r0[1];
1698 dst1[2] = src_r0[0];
1701 dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1702 dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1703 dst1[1] = src_l1[1] | src_r0[0] << 24;
1706 dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1707 dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1708 dst1[1] = src_l1[1] | src_r0[0] << 16;
1711 dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1712 dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1713 dst1[1] = src_l1[1] | src_r0[0] << 8;
1716 dst1[3] = src_r0[2];
1717 dst1[2] = src_r0[1];
1718 dst1[1] = src_r0[0];
1721 dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1722 dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1723 dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1724 dst1[0] = src_l1[0] | src_r0[0] << 24;
1727 dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1728 dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1729 dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1730 dst1[0] = src_l1[0] | src_r0[0] << 16;
1733 dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1734 dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1735 dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1736 dst1[0] = src_l1[0] | src_r0[0] << 8;
1739 dst1[3] = src_r0[3];
1740 dst1[2] = src_r0[2];
1741 dst1[1] = src_r0[1];
1742 dst1[0] = src_r0[0];
1745 dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
1746 dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1747 dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1748 dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1749 dst0[3] = src_l0[3] | src_r0[0] << 24;
1752 dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
1753 dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1754 dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1755 dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1756 dst0[3] = src_l0[3] | src_r0[0] << 16;
1759 dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
1760 dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1761 dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1762 dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1763 dst0[3] = src_l0[3] | src_r0[0] << 8;
1766 dst1[3] = src_r1[0];
1767 dst1[2] = src_r0[3];
1768 dst1[1] = src_r0[2];
1769 dst1[0] = src_r0[1];
1770 dst0[3] = src_r0[0];
1773 dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
1774 dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
1775 dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1776 dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1777 dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1778 dst0[2] = src_l0[2] | src_r0[0] << 24;
1781 dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
1782 dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
1783 dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1784 dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1785 dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1786 dst0[2] = src_l0[2] | src_r0[0] << 16;
1789 dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
1790 dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
1791 dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1792 dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1793 dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1794 dst0[2] = src_l0[2] | src_r0[0] << 8;
1797 dst1[3] = src_r1[1];
1798 dst1[2] = src_r1[0];
1799 dst1[1] = src_r0[3];
1800 dst1[0] = src_r0[2];
1801 dst0[3] = src_r0[1];
1802 dst0[2] = src_r0[0];
1805 dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
1806 dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
1807 dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
1808 dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1809 dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1810 dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1811 dst0[1] = src_l0[1] | src_r0[0] << 24;
1814 dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
1815 dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
1816 dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
1817 dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1818 dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1819 dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1820 dst0[1] = src_l0[1] | src_r0[0] << 16;
1823 dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
1824 dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
1825 dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
1826 dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1827 dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1828 dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1829 dst0[1] = src_l0[1] | src_r0[0] << 8;
1832 dst1[3] = src_r1[2];
1833 dst1[2] = src_r1[1];
1834 dst1[1] = src_r1[0];
1835 dst1[0] = src_r0[3];
1836 dst0[3] = src_r0[2];
1837 dst0[2] = src_r0[1];
1838 dst0[1] = src_r0[0];
1841 dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 1);
1842 dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
1843 dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
1844 dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
1845 dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
1846 dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
1847 dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
1848 dst0[0] = src_l0[0] | src_r0[0] << 24;
1851 dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 2);
1852 dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
1853 dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
1854 dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
1855 dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
1856 dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
1857 dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
1858 dst0[0] = src_l0[0] | src_r0[0] << 16;
1861 dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 3);
1862 dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
1863 dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
1864 dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
1865 dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
1866 dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
1867 dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
1868 dst0[0] = src_l0[0] | src_r0[0] << 8;
1871 dst1[3] = src_r1[3];
1872 dst1[2] = src_r1[2];
1873 dst1[1] = src_r1[1];
1874 dst1[0] = src_r1[0];
1875 dst0[3] = src_r0[3];
1876 dst0[2] = src_r0[2];
1877 dst0[1] = src_r0[1];
1878 dst0[0] = src_r0[0];
1884 inline void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
1886 rshift_block_N (in0, in1, out0, out1, 32 - len);
1900 out0[0] = swap32_S (tib40[0]);
1901 out0[1] = swap32_S (tib40[1]);
1902 out0[2] = swap32_S (tib40[2]);
1903 out0[3] = swap32_S (tib40[3]);
1904 out1[0] = swap32_S (tib41[0]);
1905 out1[1] = swap32_S (tib41[1]);
1906 out1[2] = swap32_S (tib41[2]);
1907 out1[3] = swap32_S (tib41[3]);
1910 inline u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1912 buf0[0] |= (generate_cmask (buf0[0]));
1913 buf0[1] |= (generate_cmask (buf0[1]));
1914 buf0[2] |= (generate_cmask (buf0[2]));
1915 buf0[3] |= (generate_cmask (buf0[3]));
1916 buf1[0] |= (generate_cmask (buf1[0]));
1917 buf1[1] |= (generate_cmask (buf1[1]));
1918 buf1[2] |= (generate_cmask (buf1[2]));
1919 buf1[3] |= (generate_cmask (buf1[3]));
1924 inline u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1926 buf0[0] &= ~(generate_cmask (buf0[0]));
1927 buf0[1] &= ~(generate_cmask (buf0[1]));
1928 buf0[2] &= ~(generate_cmask (buf0[2]));
1929 buf0[3] &= ~(generate_cmask (buf0[3]));
1930 buf1[0] &= ~(generate_cmask (buf1[0]));
1931 buf1[1] &= ~(generate_cmask (buf1[1]));
1932 buf1[2] &= ~(generate_cmask (buf1[2]));
1933 buf1[3] &= ~(generate_cmask (buf1[3]));
1938 inline u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1940 rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len);
1942 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
1947 inline u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1949 rule_op_mangle_urest (p0, p1, buf0, buf1, in_len);
1951 buf0[0] |= (0x00000020 & generate_cmask (buf0[0]));
1956 inline u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1958 buf0[0] ^= (generate_cmask (buf0[0]));
1959 buf0[1] ^= (generate_cmask (buf0[1]));
1960 buf0[2] ^= (generate_cmask (buf0[2]));
1961 buf0[3] ^= (generate_cmask (buf0[3]));
1962 buf1[0] ^= (generate_cmask (buf1[0]));
1963 buf1[1] ^= (generate_cmask (buf1[1]));
1964 buf1[2] ^= (generate_cmask (buf1[2]));
1965 buf1[3] ^= (generate_cmask (buf1[3]));
1970 inline u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1972 if (p0 >= in_len) return (in_len);
1974 const u32 tmp = 0x20u << ((p0 & 3) * 8);
1978 case 0: buf0[0] ^= (tmp & generate_cmask (buf0[0])); break;
1979 case 1: buf0[1] ^= (tmp & generate_cmask (buf0[1])); break;
1980 case 2: buf0[2] ^= (tmp & generate_cmask (buf0[2])); break;
1981 case 3: buf0[3] ^= (tmp & generate_cmask (buf0[3])); break;
1982 case 4: buf1[0] ^= (tmp & generate_cmask (buf1[0])); break;
1983 case 5: buf1[1] ^= (tmp & generate_cmask (buf1[1])); break;
1984 case 6: buf1[2] ^= (tmp & generate_cmask (buf1[2])); break;
1985 case 7: buf1[3] ^= (tmp & generate_cmask (buf1[3])); break;
1991 inline u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1993 reverse_block (buf0, buf1, buf0, buf1, in_len);
1998 inline u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2000 if ((in_len + in_len) >= 32) return (in_len);
2002 u32 out_len = in_len;
2004 append_block8 (out_len, buf0, buf1, buf0, buf1, buf0, buf1);
2011 inline u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2013 if (((in_len * p0) + in_len) >= 32) return (in_len);
2015 u32 out_len = in_len;
2029 for (u32 i = 0; i < p0; i++)
2031 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2039 inline u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2041 if ((in_len + in_len) >= 32) return (in_len);
2043 u32 out_len = in_len;
2048 reverse_block (buf0, buf1, tib40, tib41, out_len);
2050 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2057 inline u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2059 if ((in_len + 1) >= 32) return (in_len);
2061 u32 out_len = in_len;
2063 append_block1 (out_len, buf0, buf1, p0);
2070 inline u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2072 if ((in_len + 1) >= 32) return (in_len);
2074 u32 out_len = in_len;
2076 rshift_block (buf0, buf1, buf0, buf1);
2078 buf0[0] = buf0[0] | p0;
2085 inline u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2087 if (in_len == 0) return (in_len);
2089 const u32 in_len1 = in_len - 1;
2091 const u32 sh = (in_len1 & 3) * 8;
2093 const u32 tmp = (buf0[0] & 0xff) << sh;
2095 lshift_block (buf0, buf1, buf0, buf1);
2097 switch (in_len1 / 4)
2099 case 0: buf0[0] |= tmp; break;
2100 case 1: buf0[1] |= tmp; break;
2101 case 2: buf0[2] |= tmp; break;
2102 case 3: buf0[3] |= tmp; break;
2103 case 4: buf1[0] |= tmp; break;
2104 case 5: buf1[1] |= tmp; break;
2105 case 6: buf1[2] |= tmp; break;
2106 case 7: buf1[3] |= tmp; break;
2112 inline u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2114 if (in_len == 0) return (in_len);
2116 const u32 in_len1 = in_len - 1;
2118 const u32 sh = (in_len1 & 3) * 8;
2122 switch (in_len1 / 4)
2124 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2125 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2126 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2127 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2128 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2129 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2130 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2131 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2134 rshift_block (buf0, buf1, buf0, buf1);
2138 truncate_right (buf0, buf1, in_len);
2143 inline u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2145 if (in_len == 0) return (in_len);
2147 const u32 in_len1 = in_len - 1;
2149 lshift_block (buf0, buf1, buf0, buf1);
2154 inline u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2156 if (in_len == 0) return (in_len);
2158 const u32 in_len1 = in_len - 1;
2160 const u32 tmp = (1 << ((in_len1 & 3) * 8)) - 1;
2162 switch (in_len1 / 4)
2164 case 0: buf0[0] &= tmp; break;
2165 case 1: buf0[1] &= tmp; break;
2166 case 2: buf0[2] &= tmp; break;
2167 case 3: buf0[3] &= tmp; break;
2168 case 4: buf1[0] &= tmp; break;
2169 case 5: buf1[1] &= tmp; break;
2170 case 6: buf1[2] &= tmp; break;
2171 case 7: buf1[3] &= tmp; break;
2177 inline u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2179 if (p0 >= in_len) return (in_len);
2181 u32 out_len = in_len;
2186 lshift_block (buf0, buf1, tib40, tib41);
2188 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2193 case 0: buf0[0] = (buf0[0] & ml)
2203 case 1: buf0[1] = (buf0[1] & ml)
2212 case 2: buf0[2] = (buf0[2] & ml)
2220 case 3: buf0[3] = (buf0[3] & ml)
2227 case 4: buf1[0] = (buf1[0] & ml)
2233 case 5: buf1[1] = (buf1[1] & ml)
2238 case 6: buf1[2] = (buf1[2] & ml)
2242 case 7: buf1[3] = (buf1[3] & ml)
2252 inline u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2254 if (p0 >= in_len) return (in_len);
2256 if ((p0 + p1) > in_len) return (in_len);
2260 lshift_block_N (buf0, buf1, buf0, buf1, p0);
2262 truncate_right (buf0, buf1, out_len);
2267 inline u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2269 if (p0 >= in_len) return (in_len);
2271 if ((p0 + p1) > in_len) return (in_len);
2273 u32 out_len = in_len;
2287 lshift_block_N (buf0, buf1, tib40, tib41, p1);
2289 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2294 case 0: buf0[0] = (buf0[0] & ml)
2304 case 1: buf0[1] = (buf0[1] & ml)
2313 case 2: buf0[2] = (buf0[2] & ml)
2321 case 3: buf0[3] = (buf0[3] & ml)
2328 case 4: buf1[0] = (buf1[0] & ml)
2334 case 5: buf1[1] = (buf1[1] & ml)
2339 case 6: buf1[2] = (buf1[2] & ml)
2343 case 7: buf1[3] = (buf1[3] & ml)
2353 inline u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2355 if (p0 > in_len) return (in_len);
2357 if ((in_len + 1) >= 32) return (in_len);
2359 u32 out_len = in_len;
2364 rshift_block (buf0, buf1, tib40, tib41);
2366 const u32 p1n = p1 << ((p0 & 3) * 8);
2368 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2370 const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
2374 case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr);
2383 case 1: buf0[1] = (buf0[1] & ml) | p1n | (tib40[1] & mr);
2391 case 2: buf0[2] = (buf0[2] & ml) | p1n | (tib40[2] & mr);
2398 case 3: buf0[3] = (buf0[3] & ml) | p1n | (tib40[3] & mr);
2404 case 4: buf1[0] = (buf1[0] & ml) | p1n | (tib41[0] & mr);
2409 case 5: buf1[1] = (buf1[1] & ml) | p1n | (tib41[1] & mr);
2413 case 6: buf1[2] = (buf1[2] & ml) | p1n | (tib41[2] & mr);
2416 case 7: buf1[3] = (buf1[3] & ml) | p1n | (tib41[3] & mr);
2425 inline u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2427 if (p0 >= in_len) return (in_len);
2429 const u32 p1n = p1 << ((p0 & 3) * 8);
2431 const u32 m = ~(0xffu << ((p0 & 3) * 8));
2435 case 0: buf0[0] = (buf0[0] & m) | p1n; break;
2436 case 1: buf0[1] = (buf0[1] & m) | p1n; break;
2437 case 2: buf0[2] = (buf0[2] & m) | p1n; break;
2438 case 3: buf0[3] = (buf0[3] & m) | p1n; break;
2439 case 4: buf1[0] = (buf1[0] & m) | p1n; break;
2440 case 5: buf1[1] = (buf1[1] & m) | p1n; break;
2441 case 6: buf1[2] = (buf1[2] & m) | p1n; break;
2442 case 7: buf1[3] = (buf1[3] & m) | p1n; break;
2448 inline u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2450 if (p0 >= in_len) return (in_len);
2452 truncate_right (buf0, buf1, p0);
2457 inline u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2460 for (u32 i = 0; i < in_len; i++)
2464 case 0: if ((__byte_perm_S (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7650);
2466 case 1: if ((__byte_perm_S (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7604);
2468 case 2: if ((__byte_perm_S (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7054);
2470 case 3: if ((__byte_perm_S (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x0654);
2472 case 4: if ((__byte_perm_S (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7650);
2474 case 5: if ((__byte_perm_S (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7604);
2476 case 6: if ((__byte_perm_S (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7054);
2478 case 7: if ((__byte_perm_S (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x0654);
2480 case 8: if ((__byte_perm_S (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7650);
2482 case 9: if ((__byte_perm_S (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7604);
2484 case 10: if ((__byte_perm_S (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7054);
2486 case 11: if ((__byte_perm_S (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x0654);
2488 case 12: if ((__byte_perm_S (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7650);
2490 case 13: if ((__byte_perm_S (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7604);
2492 case 14: if ((__byte_perm_S (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7054);
2494 case 15: if ((__byte_perm_S (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x0654);
2496 case 16: if ((__byte_perm_S (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7650);
2498 case 17: if ((__byte_perm_S (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7604);
2500 case 18: if ((__byte_perm_S (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7054);
2502 case 19: if ((__byte_perm_S (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x0654);
2504 case 20: if ((__byte_perm_S (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7650);
2506 case 21: if ((__byte_perm_S (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7604);
2508 case 22: if ((__byte_perm_S (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7054);
2510 case 23: if ((__byte_perm_S (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x0654);
2512 case 24: if ((__byte_perm_S (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7650);
2514 case 25: if ((__byte_perm_S (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7604);
2516 case 26: if ((__byte_perm_S (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7054);
2518 case 27: if ((__byte_perm_S (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x0654);
2520 case 28: if ((__byte_perm_S (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7650);
2522 case 29: if ((__byte_perm_S (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7604);
2524 case 30: if ((__byte_perm_S (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7054);
2526 case 31: if ((__byte_perm_S (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x0654);
2532 #if defined IS_AMD || defined IS_GENERIC
2533 const uchar4 tmp0 = (uchar4) (p0);
2534 const uchar4 tmp1 = (uchar4) (p1);
2538 tmp = as_uchar4 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint (tmp);
2539 tmp = as_uchar4 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint (tmp);
2540 tmp = as_uchar4 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint (tmp);
2541 tmp = as_uchar4 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint (tmp);
2542 tmp = as_uchar4 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint (tmp);
2543 tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp);
2544 tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp);
2545 tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp);
2551 inline u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2557 inline u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2563 inline u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2565 if ( in_len == 0) return (in_len);
2566 if ((in_len + p0) >= 32) return (in_len);
2568 u32 out_len = in_len;
2570 const u32 tmp = buf0[0] & 0xFF;
2572 rshift_block_N (buf0, buf1, buf0, buf1, p0);
2577 case 1: buf0[0] |= tmp;
2579 case 2: buf0[0] |= __byte_perm_S (tmp, 0, 0x5400);
2581 case 3: buf0[0] |= __byte_perm_S (tmp, 0, 0x4000);
2583 case 4: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2585 case 5: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2588 case 6: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2589 buf0[1] |= __byte_perm_S (tmp, 0, 0x5400);
2591 case 7: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2592 buf0[1] |= __byte_perm_S (tmp, 0, 0x4000);
2594 case 8: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2595 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2597 case 9: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2598 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2601 case 10: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2602 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2603 buf0[2] |= __byte_perm_S (tmp, 0, 0x5400);
2605 case 11: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2606 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2607 buf0[2] |= __byte_perm_S (tmp, 0, 0x4000);
2609 case 12: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2610 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2611 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2613 case 13: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2614 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2615 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2618 case 14: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2619 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2620 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2621 buf0[3] |= __byte_perm_S (tmp, 0, 0x5400);
2623 case 15: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2624 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2625 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2626 buf0[3] |= __byte_perm_S (tmp, 0, 0x4000);
2628 case 16: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2629 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2630 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2631 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2633 case 17: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2634 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2635 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2636 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2639 case 18: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2640 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2641 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2642 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2643 buf1[0] |= __byte_perm_S (tmp, 0, 0x5400);
2645 case 19: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2646 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2647 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2648 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2649 buf1[0] |= __byte_perm_S (tmp, 0, 0x4000);
2651 case 20: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2652 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2653 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2654 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2655 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2657 case 21: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2658 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2659 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2660 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2661 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2664 case 22: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2665 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2666 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2667 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2668 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2669 buf1[1] |= __byte_perm_S (tmp, 0, 0x5400);
2671 case 23: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2672 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2673 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2674 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2675 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2676 buf1[1] |= __byte_perm_S (tmp, 0, 0x4000);
2678 case 24: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2679 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2680 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2681 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2682 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2683 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2685 case 25: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2686 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2687 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2688 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2689 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2690 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2693 case 26: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2694 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2695 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2696 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2697 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2698 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2699 buf1[2] |= __byte_perm_S (tmp, 0, 0x5400);
2701 case 27: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2702 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2703 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2704 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2705 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2706 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2707 buf1[2] |= __byte_perm_S (tmp, 0, 0x4000);
2709 case 28: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2710 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2711 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2712 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2713 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2714 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2715 buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
2717 case 29: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2718 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2719 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2720 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2721 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2722 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2723 buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
2726 case 30: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2727 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2728 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2729 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2730 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2731 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2732 buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
2733 buf1[3] |= __byte_perm_S (tmp, 0, 0x5400);
2735 case 31: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
2736 buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
2737 buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
2738 buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
2739 buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
2740 buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
2741 buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
2742 buf1[3] |= __byte_perm_S (tmp, 0, 0x4000);
2747 #if defined IS_AMD || defined IS_GENERIC
2750 case 1: buf0[0] |= tmp << 0;
2752 case 2: buf0[0] |= tmp << 0 | tmp << 8;
2754 case 3: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2756 case 4: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2758 case 5: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2759 buf0[1] |= tmp << 0;
2761 case 6: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2762 buf0[1] |= tmp << 0 | tmp << 8;
2764 case 7: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2765 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2767 case 8: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2768 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2770 case 9: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2771 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2772 buf0[2] |= tmp << 0;
2774 case 10: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2775 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2776 buf0[2] |= tmp << 0 | tmp << 8;
2778 case 11: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2779 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2780 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2782 case 12: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2783 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2784 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2786 case 13: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2787 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2788 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2789 buf0[3] |= tmp << 0;
2791 case 14: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2792 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2793 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2794 buf0[3] |= tmp << 0 | tmp << 8;
2796 case 15: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2797 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2798 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2799 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2801 case 16: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2802 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2803 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2804 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2806 case 17: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2807 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2808 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2809 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2810 buf1[0] |= tmp << 0;
2812 case 18: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2813 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2814 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2815 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2816 buf1[0] |= tmp << 0 | tmp << 8;
2818 case 19: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2819 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2820 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2821 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2822 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2824 case 20: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2825 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2826 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2827 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2828 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2830 case 21: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2831 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2832 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2833 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2834 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2835 buf1[1] |= tmp << 0;
2837 case 22: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2838 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2839 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2840 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2841 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2842 buf1[1] |= tmp << 0 | tmp << 8;
2844 case 23: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2845 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2846 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2847 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2848 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2849 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2851 case 24: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2852 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2853 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2854 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2855 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2856 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2858 case 25: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2859 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2860 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2861 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2862 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2863 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2864 buf1[2] |= tmp << 0;
2866 case 26: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2867 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2868 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2869 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2870 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2871 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2872 buf1[2] |= tmp << 0 | tmp << 8;
2874 case 27: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2875 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2876 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2877 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2878 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2879 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2880 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2882 case 28: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2883 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2884 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2885 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2886 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2887 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2888 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2890 case 29: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2891 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2892 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2893 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2894 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2895 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2896 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2897 buf1[3] |= tmp << 0;
2899 case 30: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2900 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2901 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2902 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2903 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2904 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2905 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2906 buf1[3] |= tmp << 0 | tmp << 8;
2908 case 31: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2909 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2910 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2911 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2912 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2913 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2914 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2915 buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2925 inline u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2927 if ( in_len == 0) return (in_len);
2928 if ((in_len + p0) >= 32) return (in_len);
2930 const u32 in_len1 = in_len - 1;
2932 const u32 sh = (in_len1 & 3) * 8;
2936 switch (in_len1 / 4)
2938 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2939 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2940 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2941 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2942 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2943 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2944 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2945 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2948 u32 out_len = in_len;
2950 for (u32 i = 0; i < p0; i++)
2952 append_block1 (out_len, buf0, buf1, tmp);
2960 inline u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2962 if ( in_len == 0) return (in_len);
2963 if ((in_len + in_len) >= 32) return (in_len);
2965 u32 out_len = in_len;
2971 tib40[0] = __byte_perm_S (buf0[0], 0, 0x1100);
2972 tib40[1] = __byte_perm_S (buf0[0], 0, 0x3322);
2973 tib40[2] = __byte_perm_S (buf0[1], 0, 0x1100);
2974 tib40[3] = __byte_perm_S (buf0[1], 0, 0x3322);
2975 tib41[0] = __byte_perm_S (buf0[2], 0, 0x1100);
2976 tib41[1] = __byte_perm_S (buf0[2], 0, 0x3322);
2977 tib41[2] = __byte_perm_S (buf0[3], 0, 0x1100);
2978 tib41[3] = __byte_perm_S (buf0[3], 0, 0x3322);
2990 #if defined IS_AMD || defined IS_GENERIC
2991 tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8);
2992 tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8);
2993 tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8);
2994 tib40[3] = ((buf0[1] & 0x00FF0000) >> 16) | ((buf0[1] & 0xFF000000) >> 8);
2995 tib41[0] = ((buf0[2] & 0x000000FF) << 0) | ((buf0[2] & 0x0000FF00) << 8);
2996 tib41[1] = ((buf0[2] & 0x00FF0000) >> 16) | ((buf0[2] & 0xFF000000) >> 8);
2997 tib41[2] = ((buf0[3] & 0x000000FF) << 0) | ((buf0[3] & 0x0000FF00) << 8);
2998 tib41[3] = ((buf0[3] & 0x00FF0000) >> 16) | ((buf0[3] & 0xFF000000) >> 8);
3000 buf0[0] = tib40[0] | (tib40[0] << 8);
3001 buf0[1] = tib40[1] | (tib40[1] << 8);
3002 buf0[2] = tib40[2] | (tib40[2] << 8);
3003 buf0[3] = tib40[3] | (tib40[3] << 8);
3004 buf1[0] = tib41[0] | (tib41[0] << 8);
3005 buf1[1] = tib41[1] | (tib41[1] << 8);
3006 buf1[2] = tib41[2] | (tib41[2] << 8);
3007 buf1[3] = tib41[3] | (tib41[3] << 8);
3010 out_len = out_len + out_len;
3015 inline u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3017 if (in_len < 2) return (in_len);
3020 buf0[0] = __byte_perm_S (buf0[0], 0, 0x3201);
3023 #if defined IS_AMD || defined IS_GENERIC
3024 buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3030 inline u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3032 if (in_len < 2) return (in_len);
3037 case 2: buf0[0] = __byte_perm_S (buf0[0], 0, 0x5401);
3039 case 3: buf0[0] = __byte_perm_S (buf0[0], 0, 0x4120);
3041 case 4: buf0[0] = __byte_perm_S (buf0[0], 0, 0x2310);
3043 case 5: buf0[1] = __byte_perm_S (buf0[1], buf0[0], 0x7210);
3044 buf0[0] = __byte_perm_S (buf0[0], buf0[1], 0x4210);
3045 buf0[1] = __byte_perm_S (buf0[1], 0, 0x6543);
3047 case 6: buf0[1] = __byte_perm_S (buf0[1], 0, 0x5401);
3049 case 7: buf0[1] = __byte_perm_S (buf0[1], 0, 0x4120);
3051 case 8: buf0[1] = __byte_perm_S (buf0[1], 0, 0x2310);
3053 case 9: buf0[2] = __byte_perm_S (buf0[2], buf0[1], 0x7210);
3054 buf0[1] = __byte_perm_S (buf0[1], buf0[2], 0x4210);
3055 buf0[2] = __byte_perm_S (buf0[2], 0, 0x6543);
3057 case 10: buf0[2] = __byte_perm_S (buf0[2], 0, 0x5401);
3059 case 11: buf0[2] = __byte_perm_S (buf0[2], 0, 0x4120);
3061 case 12: buf0[2] = __byte_perm_S (buf0[2], 0, 0x2310);
3063 case 13: buf0[3] = __byte_perm_S (buf0[3], buf0[2], 0x7210);
3064 buf0[2] = __byte_perm_S (buf0[2], buf0[3], 0x4210);
3065 buf0[3] = __byte_perm_S (buf0[3], 0, 0x6543);
3067 case 14: buf0[3] = __byte_perm_S (buf0[3], 0, 0x5401);
3069 case 15: buf0[3] = __byte_perm_S (buf0[3], 0, 0x4120);
3071 case 16: buf0[3] = __byte_perm_S (buf0[3], 0, 0x2310);
3073 case 17: buf1[0] = __byte_perm_S (buf1[0], buf0[3], 0x7210);
3074 buf0[3] = __byte_perm_S (buf0[3], buf1[0], 0x4210);
3075 buf1[0] = __byte_perm_S (buf1[0], 0, 0x6543);
3077 case 18: buf1[0] = __byte_perm_S (buf1[0], 0, 0x5401);
3079 case 19: buf1[0] = __byte_perm_S (buf1[0], 0, 0x4120);
3081 case 20: buf1[0] = __byte_perm_S (buf1[0], 0, 0x2310);
3083 case 21: buf1[1] = __byte_perm_S (buf1[1], buf1[0], 0x7210);
3084 buf1[0] = __byte_perm_S (buf1[0], buf1[1], 0x4210);
3085 buf1[1] = __byte_perm_S (buf1[1], 0, 0x6543);
3087 case 22: buf1[1] = __byte_perm_S (buf1[1], 0, 0x5401);
3089 case 23: buf1[1] = __byte_perm_S (buf1[1], 0, 0x4120);
3091 case 24: buf1[1] = __byte_perm_S (buf1[1], 0, 0x2310);
3093 case 25: buf1[2] = __byte_perm_S (buf1[2], buf1[1], 0x7210);
3094 buf1[1] = __byte_perm_S (buf1[1], buf1[2], 0x4210);
3095 buf1[2] = __byte_perm_S (buf1[2], 0, 0x6543);
3097 case 26: buf1[2] = __byte_perm_S (buf1[2], 0, 0x5401);
3099 case 27: buf1[2] = __byte_perm_S (buf1[2], 0, 0x4120);
3101 case 28: buf1[2] = __byte_perm_S (buf1[2], 0, 0x2310);
3103 case 29: buf1[3] = __byte_perm_S (buf1[3], buf1[2], 0x7210);
3104 buf1[2] = __byte_perm_S (buf1[2], buf1[3], 0x4210);
3105 buf1[3] = __byte_perm_S (buf1[3], 0, 0x6543);
3107 case 30: buf1[3] = __byte_perm_S (buf1[3], 0, 0x5401);
3109 case 31: buf1[3] = __byte_perm_S (buf1[3], 0, 0x4120);
3114 #if defined IS_AMD || defined IS_GENERIC
3117 case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3119 case 3: buf0[0] = (buf0[0] & 0x000000FF) | ((buf0[0] << 8) & 0x00FF0000) | ((buf0[0] >> 8) & 0x0000FF00);
3121 case 4: buf0[0] = (buf0[0] & 0x0000FFFF) | ((buf0[0] << 8) & 0xFF000000) | ((buf0[0] >> 8) & 0x00FF0000);
3123 case 5: buf0[1] = (buf0[0] & 0xFF000000) | buf0[1];
3124 buf0[0] = (buf0[0] & 0x00FFFFFF) | (buf0[1] << 24);
3125 buf0[1] = (buf0[1] >> 24);
3127 case 6: buf0[1] = ((buf0[1] << 8) & 0x0000FF00) | ((buf0[1] >> 8) & 0x000000FF);
3129 case 7: buf0[1] = (buf0[1] & 0x000000FF) | ((buf0[1] << 8) & 0x00FF0000) | ((buf0[1] >> 8) & 0x0000FF00);
3131 case 8: buf0[1] = (buf0[1] & 0x0000FFFF) | ((buf0[1] << 8) & 0xFF000000) | ((buf0[1] >> 8) & 0x00FF0000);
3133 case 9: buf0[2] = (buf0[1] & 0xFF000000) | buf0[2];
3134 buf0[1] = (buf0[1] & 0x00FFFFFF) | (buf0[2] << 24);
3135 buf0[2] = (buf0[2] >> 24);
3137 case 10: buf0[2] = ((buf0[2] << 8) & 0x0000FF00) | ((buf0[2] >> 8) & 0x000000FF);
3139 case 11: buf0[2] = (buf0[2] & 0x000000FF) | ((buf0[2] << 8) & 0x00FF0000) | ((buf0[2] >> 8) & 0x0000FF00);
3141 case 12: buf0[2] = (buf0[2] & 0x0000FFFF) | ((buf0[2] << 8) & 0xFF000000) | ((buf0[2] >> 8) & 0x00FF0000);
3143 case 13: buf0[3] = (buf0[2] & 0xFF000000) | buf0[3];
3144 buf0[2] = (buf0[2] & 0x00FFFFFF) | (buf0[3] << 24);
3145 buf0[3] = (buf0[3] >> 24);
3147 case 14: buf0[3] = ((buf0[3] << 8) & 0x0000FF00) | ((buf0[3] >> 8) & 0x000000FF);
3149 case 15: buf0[3] = (buf0[3] & 0x000000FF) | ((buf0[3] << 8) & 0x00FF0000) | ((buf0[3] >> 8) & 0x0000FF00);
3151 case 16: buf0[3] = (buf0[3] & 0x0000FFFF) | ((buf0[3] << 8) & 0xFF000000) | ((buf0[3] >> 8) & 0x00FF0000);
3153 case 17: buf1[0] = (buf0[3] & 0xFF000000) | buf1[0];
3154 buf0[3] = (buf0[3] & 0x00FFFFFF) | (buf1[0] << 24);
3155 buf1[0] = (buf1[0] >> 24);
3157 case 18: buf1[0] = ((buf1[0] << 8) & 0x0000FF00) | ((buf1[0] >> 8) & 0x000000FF);
3159 case 19: buf1[0] = (buf1[0] & 0x000000FF) | ((buf1[0] << 8) & 0x00FF0000) | ((buf1[0] >> 8) & 0x0000FF00);
3161 case 20: buf1[0] = (buf1[0] & 0x0000FFFF) | ((buf1[0] << 8) & 0xFF000000) | ((buf1[0] >> 8) & 0x00FF0000);
3163 case 21: buf1[1] = (buf1[0] & 0xFF000000) | buf1[1];
3164 buf1[0] = (buf1[0] & 0x00FFFFFF) | (buf1[1] << 24);
3165 buf1[1] = (buf1[1] >> 24);
3167 case 22: buf1[1] = ((buf1[1] << 8) & 0x0000FF00) | ((buf1[1] >> 8) & 0x000000FF);
3169 case 23: buf1[1] = (buf1[1] & 0x000000FF) | ((buf1[1] << 8) & 0x00FF0000) | ((buf1[1] >> 8) & 0x0000FF00);
3171 case 24: buf1[1] = (buf1[1] & 0x0000FFFF) | ((buf1[1] << 8) & 0xFF000000) | ((buf1[1] >> 8) & 0x00FF0000);
3173 case 25: buf1[2] = (buf1[1] & 0xFF000000) | buf1[2];
3174 buf1[1] = (buf1[1] & 0x00FFFFFF) | (buf1[2] << 24);
3175 buf1[2] = (buf1[2] >> 24);
3177 case 26: buf1[2] = ((buf1[2] << 8) & 0x0000FF00) | ((buf1[2] >> 8) & 0x000000FF);
3179 case 27: buf1[2] = (buf1[2] & 0x000000FF) | ((buf1[2] << 8) & 0x00FF0000) | ((buf1[2] >> 8) & 0x0000FF00);
3181 case 28: buf1[2] = (buf1[2] & 0x0000FFFF) | ((buf1[2] << 8) & 0xFF000000) | ((buf1[2] >> 8) & 0x00FF0000);
3183 case 29: buf1[3] = (buf1[2] & 0xFF000000) | buf1[3];
3184 buf1[2] = (buf1[2] & 0x00FFFFFF) | (buf1[3] << 24);
3185 buf1[3] = (buf1[3] >> 24);
3187 case 30: buf1[3] = ((buf1[3] << 8) & 0x0000FF00) | ((buf1[3] >> 8) & 0x000000FF);
3189 case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00);
3197 inline u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3199 if (p0 >= in_len) return (in_len);
3200 if (p1 >= in_len) return (in_len);
3208 case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540);
3210 case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541);
3212 case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542);
3214 case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543);
3216 case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540);
3218 case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541);
3220 case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542);
3222 case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543);
3224 case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540);
3226 case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541);
3228 case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542);
3230 case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543);
3232 case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540);
3234 case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541);
3236 case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542);
3238 case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543);
3240 case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540);
3242 case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541);
3244 case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542);
3246 case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543);
3248 case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540);
3250 case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541);
3252 case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542);
3254 case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543);
3256 case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540);
3258 case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541);
3260 case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542);
3262 case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543);
3264 case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540);
3266 case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541);
3268 case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542);
3270 case 31: tmp0 = __byte_perm_S (buf1[3], 0, 0x6543);
3276 case 0: tmp1 = __byte_perm_S (buf0[0], 0, 0x6540);
3277 buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7650);
3279 case 1: tmp1 = __byte_perm_S (buf0[0], 0, 0x6541);
3280 buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7604);
3282 case 2: tmp1 = __byte_perm_S (buf0[0], 0, 0x6542);
3283 buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7054);
3285 case 3: tmp1 = __byte_perm_S (buf0[0], 0, 0x6543);
3286 buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x0654);
3288 case 4: tmp1 = __byte_perm_S (buf0[1], 0, 0x6540);
3289 buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7650);
3291 case 5: tmp1 = __byte_perm_S (buf0[1], 0, 0x6541);
3292 buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7604);
3294 case 6: tmp1 = __byte_perm_S (buf0[1], 0, 0x6542);
3295 buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7054);
3297 case 7: tmp1 = __byte_perm_S (buf0[1], 0, 0x6543);
3298 buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x0654);
3300 case 8: tmp1 = __byte_perm_S (buf0[2], 0, 0x6540);
3301 buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7650);
3303 case 9: tmp1 = __byte_perm_S (buf0[2], 0, 0x6541);
3304 buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7604);
3306 case 10: tmp1 = __byte_perm_S (buf0[2], 0, 0x6542);
3307 buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7054);
3309 case 11: tmp1 = __byte_perm_S (buf0[2], 0, 0x6543);
3310 buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x0654);
3312 case 12: tmp1 = __byte_perm_S (buf0[3], 0, 0x6540);
3313 buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7650);
3315 case 13: tmp1 = __byte_perm_S (buf0[3], 0, 0x6541);
3316 buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7604);
3318 case 14: tmp1 = __byte_perm_S (buf0[3], 0, 0x6542);
3319 buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7054);
3321 case 15: tmp1 = __byte_perm_S (buf0[3], 0, 0x6543);
3322 buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x0654);
3324 case 16: tmp1 = __byte_perm_S (buf1[0], 0, 0x6540);
3325 buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7650);
3327 case 17: tmp1 = __byte_perm_S (buf1[0], 0, 0x6541);
3328 buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7604);
3330 case 18: tmp1 = __byte_perm_S (buf1[0], 0, 0x6542);
3331 buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7054);
3333 case 19: tmp1 = __byte_perm_S (buf1[0], 0, 0x6543);
3334 buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x0654);
3336 case 20: tmp1 = __byte_perm_S (buf1[1], 0, 0x6540);
3337 buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7650);
3339 case 21: tmp1 = __byte_perm_S (buf1[1], 0, 0x6541);
3340 buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7604);
3342 case 22: tmp1 = __byte_perm_S (buf1[1], 0, 0x6542);
3343 buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7054);
3345 case 23: tmp1 = __byte_perm_S (buf1[1], 0, 0x6543);
3346 buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x0654);
3348 case 24: tmp1 = __byte_perm_S (buf1[2], 0, 0x6540);
3349 buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7650);
3351 case 25: tmp1 = __byte_perm_S (buf1[2], 0, 0x6541);
3352 buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7604);
3354 case 26: tmp1 = __byte_perm_S (buf1[2], 0, 0x6542);
3355 buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7054);
3357 case 27: tmp1 = __byte_perm_S (buf1[2], 0, 0x6543);
3358 buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x0654);
3360 case 28: tmp1 = __byte_perm_S (buf1[3], 0, 0x6540);
3361 buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7650);
3363 case 29: tmp1 = __byte_perm_S (buf1[3], 0, 0x6541);
3364 buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7604);
3366 case 30: tmp1 = __byte_perm_S (buf1[3], 0, 0x6542);
3367 buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7054);
3369 case 31: tmp1 = __byte_perm_S (buf1[3], 0, 0x6543);
3370 buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x0654);
3376 case 0: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7650);
3378 case 1: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7604);
3380 case 2: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7054);
3382 case 3: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x0654);
3384 case 4: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7650);
3386 case 5: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7604);
3388 case 6: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7054);
3390 case 7: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x0654);
3392 case 8: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7650);
3394 case 9: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7604);
3396 case 10: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7054);
3398 case 11: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x0654);
3400 case 12: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7650);
3402 case 13: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7604);
3404 case 14: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7054);
3406 case 15: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x0654);
3408 case 16: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7650);
3410 case 17: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7604);
3412 case 18: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7054);
3414 case 19: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x0654);
3416 case 20: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7650);
3418 case 21: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7604);
3420 case 22: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7054);
3422 case 23: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x0654);
3424 case 24: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7650);
3426 case 25: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7604);
3428 case 26: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7054);
3430 case 27: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x0654);
3432 case 28: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7650);
3434 case 29: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7604);
3436 case 30: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7054);
3438 case 31: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x0654);
3443 #if defined IS_AMD || defined IS_GENERIC
3446 case 0: tmp0 = (buf0[0] >> 0) & 0xFF;
3448 case 1: tmp0 = (buf0[0] >> 8) & 0xFF;
3450 case 2: tmp0 = (buf0[0] >> 16) & 0xFF;
3452 case 3: tmp0 = (buf0[0] >> 24) & 0xFF;
3454 case 4: tmp0 = (buf0[1] >> 0) & 0xFF;
3456 case 5: tmp0 = (buf0[1] >> 8) & 0xFF;
3458 case 6: tmp0 = (buf0[1] >> 16) & 0xFF;
3460 case 7: tmp0 = (buf0[1] >> 24) & 0xFF;
3462 case 8: tmp0 = (buf0[2] >> 0) & 0xFF;
3464 case 9: tmp0 = (buf0[2] >> 8) & 0xFF;
3466 case 10: tmp0 = (buf0[2] >> 16) & 0xFF;
3468 case 11: tmp0 = (buf0[2] >> 24) & 0xFF;
3470 case 12: tmp0 = (buf0[3] >> 0) & 0xFF;
3472 case 13: tmp0 = (buf0[3] >> 8) & 0xFF;
3474 case 14: tmp0 = (buf0[3] >> 16) & 0xFF;
3476 case 15: tmp0 = (buf0[3] >> 24) & 0xFF;
3478 case 16: tmp0 = (buf1[0] >> 0) & 0xFF;
3480 case 17: tmp0 = (buf1[0] >> 8) & 0xFF;
3482 case 18: tmp0 = (buf1[0] >> 16) & 0xFF;
3484 case 19: tmp0 = (buf1[0] >> 24) & 0xFF;
3486 case 20: tmp0 = (buf1[1] >> 0) & 0xFF;
3488 case 21: tmp0 = (buf1[1] >> 8) & 0xFF;
3490 case 22: tmp0 = (buf1[1] >> 16) & 0xFF;
3492 case 23: tmp0 = (buf1[1] >> 24) & 0xFF;
3494 case 24: tmp0 = (buf1[2] >> 0) & 0xFF;
3496 case 25: tmp0 = (buf1[2] >> 8) & 0xFF;
3498 case 26: tmp0 = (buf1[2] >> 16) & 0xFF;
3500 case 27: tmp0 = (buf1[2] >> 24) & 0xFF;
3502 case 28: tmp0 = (buf1[3] >> 0) & 0xFF;
3504 case 29: tmp0 = (buf1[3] >> 8) & 0xFF;
3506 case 30: tmp0 = (buf1[3] >> 16) & 0xFF;
3508 case 31: tmp0 = (buf1[3] >> 24) & 0xFF;
3514 case 0: tmp1 = (buf0[0] >> 0) & 0xff;
3515 buf0[0] = (buf0[0] & 0xffffff00) | tmp0 << 0;
3517 case 1: tmp1 = (buf0[0] >> 8) & 0xff;
3518 buf0[0] = (buf0[0] & 0xffff00ff) | tmp0 << 8;
3520 case 2: tmp1 = (buf0[0] >> 16) & 0xff;
3521 buf0[0] = (buf0[0] & 0xff00ffff) | tmp0 << 16;
3523 case 3: tmp1 = (buf0[0] >> 24) & 0xff;
3524 buf0[0] = (buf0[0] & 0x00ffffff) | tmp0 << 24;
3526 case 4: tmp1 = (buf0[1] >> 0) & 0xff;
3527 buf0[1] = (buf0[1] & 0xffffff00) | tmp0 << 0;
3529 case 5: tmp1 = (buf0[1] >> 8) & 0xff;
3530 buf0[1] = (buf0[1] & 0xffff00ff) | tmp0 << 8;
3532 case 6: tmp1 = (buf0[1] >> 16) & 0xff;
3533 buf0[1] = (buf0[1] & 0xff00ffff) | tmp0 << 16;
3535 case 7: tmp1 = (buf0[1] >> 24) & 0xff;
3536 buf0[1] = (buf0[1] & 0x00ffffff) | tmp0 << 24;
3538 case 8: tmp1 = (buf0[2] >> 0) & 0xff;
3539 buf0[2] = (buf0[2] & 0xffffff00) | tmp0 << 0;
3541 case 9: tmp1 = (buf0[2] >> 8) & 0xff;
3542 buf0[2] = (buf0[2] & 0xffff00ff) | tmp0 << 8;
3544 case 10: tmp1 = (buf0[2] >> 16) & 0xff;
3545 buf0[2] = (buf0[2] & 0xff00ffff) | tmp0 << 16;
3547 case 11: tmp1 = (buf0[2] >> 24) & 0xff;
3548 buf0[2] = (buf0[2] & 0x00ffffff) | tmp0 << 24;
3550 case 12: tmp1 = (buf0[3] >> 0) & 0xff;
3551 buf0[3] = (buf0[3] & 0xffffff00) | tmp0 << 0;
3553 case 13: tmp1 = (buf0[3] >> 8) & 0xff;
3554 buf0[3] = (buf0[3] & 0xffff00ff) | tmp0 << 8;
3556 case 14: tmp1 = (buf0[3] >> 16) & 0xff;
3557 buf0[3] = (buf0[3] & 0xff00ffff) | tmp0 << 16;
3559 case 15: tmp1 = (buf0[3] >> 24) & 0xff;
3560 buf0[3] = (buf0[3] & 0x00ffffff) | tmp0 << 24;
3562 case 16: tmp1 = (buf1[0] >> 0) & 0xff;
3563 buf1[0] = (buf1[0] & 0xffffff00) | tmp0 << 0;
3565 case 17: tmp1 = (buf1[0] >> 8) & 0xff;
3566 buf1[0] = (buf1[0] & 0xffff00ff) | tmp0 << 8;
3568 case 18: tmp1 = (buf1[0] >> 16) & 0xff;
3569 buf1[0] = (buf1[0] & 0xff00ffff) | tmp0 << 16;
3571 case 19: tmp1 = (buf1[0] >> 24) & 0xff;
3572 buf1[0] = (buf1[0] & 0x00ffffff) | tmp0 << 24;
3574 case 20: tmp1 = (buf1[1] >> 0) & 0xff;
3575 buf1[1] = (buf1[1] & 0xffffff00) | tmp0 << 0;
3577 case 21: tmp1 = (buf1[1] >> 8) & 0xff;
3578 buf1[1] = (buf1[1] & 0xffff00ff) | tmp0 << 8;
3580 case 22: tmp1 = (buf1[1] >> 16) & 0xff;
3581 buf1[1] = (buf1[1] & 0xff00ffff) | tmp0 << 16;
3583 case 23: tmp1 = (buf1[1] >> 24) & 0xff;
3584 buf1[1] = (buf1[1] & 0x00ffffff) | tmp0 << 24;
3586 case 24: tmp1 = (buf1[2] >> 0) & 0xff;
3587 buf1[2] = (buf1[2] & 0xffffff00) | tmp0 << 0;
3589 case 25: tmp1 = (buf1[2] >> 8) & 0xff;
3590 buf1[2] = (buf1[2] & 0xffff00ff) | tmp0 << 8;
3592 case 26: tmp1 = (buf1[2] >> 16) & 0xff;
3593 buf1[2] = (buf1[2] & 0xff00ffff) | tmp0 << 16;
3595 case 27: tmp1 = (buf1[2] >> 24) & 0xff;
3596 buf1[2] = (buf1[2] & 0x00ffffff) | tmp0 << 24;
3598 case 28: tmp1 = (buf1[3] >> 0) & 0xff;
3599 buf1[3] = (buf1[3] & 0xffffff00) | tmp0 << 0;
3601 case 29: tmp1 = (buf1[3] >> 8) & 0xff;
3602 buf1[3] = (buf1[3] & 0xffff00ff) | tmp0 << 8;
3604 case 30: tmp1 = (buf1[3] >> 16) & 0xff;
3605 buf1[3] = (buf1[3] & 0xff00ffff) | tmp0 << 16;
3607 case 31: tmp1 = (buf1[3] >> 24) & 0xff;
3608 buf1[3] = (buf1[3] & 0x00ffffff) | tmp0 << 24;
3614 case 0: buf0[0] = (buf0[0] & 0xffffff00) | tmp1 << 0;
3616 case 1: buf0[0] = (buf0[0] & 0xffff00ff) | tmp1 << 8;
3618 case 2: buf0[0] = (buf0[0] & 0xff00ffff) | tmp1 << 16;
3620 case 3: buf0[0] = (buf0[0] & 0x00ffffff) | tmp1 << 24;
3622 case 4: buf0[1] = (buf0[1] & 0xffffff00) | tmp1 << 0;
3624 case 5: buf0[1] = (buf0[1] & 0xffff00ff) | tmp1 << 8;
3626 case 6: buf0[1] = (buf0[1] & 0xff00ffff) | tmp1 << 16;
3628 case 7: buf0[1] = (buf0[1] & 0x00ffffff) | tmp1 << 24;
3630 case 8: buf0[2] = (buf0[2] & 0xffffff00) | tmp1 << 0;
3632 case 9: buf0[2] = (buf0[2] & 0xffff00ff) | tmp1 << 8;
3634 case 10: buf0[2] = (buf0[2] & 0xff00ffff) | tmp1 << 16;
3636 case 11: buf0[2] = (buf0[2] & 0x00ffffff) | tmp1 << 24;
3638 case 12: buf0[3] = (buf0[3] & 0xffffff00) | tmp1 << 0;
3640 case 13: buf0[3] = (buf0[3] & 0xffff00ff) | tmp1 << 8;
3642 case 14: buf0[3] = (buf0[3] & 0xff00ffff) | tmp1 << 16;
3644 case 15: buf0[3] = (buf0[3] & 0x00ffffff) | tmp1 << 24;
3646 case 16: buf1[0] = (buf1[0] & 0xffffff00) | tmp1 << 0;
3648 case 17: buf1[0] = (buf1[0] & 0xffff00ff) | tmp1 << 8;
3650 case 18: buf1[0] = (buf1[0] & 0xff00ffff) | tmp1 << 16;
3652 case 19: buf1[0] = (buf1[0] & 0x00ffffff) | tmp1 << 24;
3654 case 20: buf1[1] = (buf1[1] & 0xffffff00) | tmp1 << 0;
3656 case 21: buf1[1] = (buf1[1] & 0xffff00ff) | tmp1 << 8;
3658 case 22: buf1[1] = (buf1[1] & 0xff00ffff) | tmp1 << 16;
3660 case 23: buf1[1] = (buf1[1] & 0x00ffffff) | tmp1 << 24;
3662 case 24: buf1[2] = (buf1[2] & 0xffffff00) | tmp1 << 0;
3664 case 25: buf1[2] = (buf1[2] & 0xffff00ff) | tmp1 << 8;
3666 case 26: buf1[2] = (buf1[2] & 0xff00ffff) | tmp1 << 16;
3668 case 27: buf1[2] = (buf1[2] & 0x00ffffff) | tmp1 << 24;
3670 case 28: buf1[3] = (buf1[3] & 0xffffff00) | tmp1 << 0;
3672 case 29: buf1[3] = (buf1[3] & 0xffff00ff) | tmp1 << 8;
3674 case 30: buf1[3] = (buf1[3] & 0xff00ffff) | tmp1 << 16;
3676 case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24;
3684 inline u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3686 if (p0 >= in_len) return (in_len);
3688 const u32 mr = 0xffu << ((p0 & 3) * 8);
3693 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) << 1) & mr); break;
3694 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) << 1) & mr); break;
3695 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) << 1) & mr); break;
3696 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) << 1) & mr); break;
3697 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) << 1) & mr); break;
3698 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) << 1) & mr); break;
3699 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) << 1) & mr); break;
3700 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) << 1) & mr); break;
3706 inline u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3708 if (p0 >= in_len) return (in_len);
3710 const u32 mr = 0xffu << ((p0 & 3) * 8);
3715 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) >> 1) & mr); break;
3716 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) >> 1) & mr); break;
3717 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) >> 1) & mr); break;
3718 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) >> 1) & mr); break;
3719 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) >> 1) & mr); break;
3720 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) >> 1) & mr); break;
3721 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) >> 1) & mr); break;
3722 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) >> 1) & mr); break;
3728 inline u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3730 if (p0 >= in_len) return (in_len);
3732 const u32 mr = 0xffu << ((p0 & 3) * 8);
3735 const u32 n = 0x01010101 & mr;
3739 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) + n) & mr); break;
3740 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) + n) & mr); break;
3741 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) + n) & mr); break;
3742 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) + n) & mr); break;
3743 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) + n) & mr); break;
3744 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) + n) & mr); break;
3745 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) + n) & mr); break;
3746 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) + n) & mr); break;
3752 inline u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3754 if (p0 >= in_len) return (in_len);
3756 const u32 mr = 0xffu << ((p0 & 3) * 8);
3759 const u32 n = 0x01010101 & mr;
3763 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) - n) & mr); break;
3764 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) - n) & mr); break;
3765 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) - n) & mr); break;
3766 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) - n) & mr); break;
3767 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) - n) & mr); break;
3768 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) - n) & mr); break;
3769 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) - n) & mr); break;
3770 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) - n) & mr); break;
3776 inline u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3778 if ((p0 + 1) >= in_len) return (in_len);
3783 lshift_block (buf0, buf1, tib40, tib41);
3785 const u32 mr = 0xffu << ((p0 & 3) * 8);
3790 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3791 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3792 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3793 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3794 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3795 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3796 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3797 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3803 inline u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3805 if (p0 == 0) return (in_len);
3807 if (p0 >= in_len) return (in_len);
3812 rshift_block (buf0, buf1, tib40, tib41);
3814 const u32 mr = 0xffu << ((p0 & 3) * 8);
3819 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3820 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3821 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3822 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3823 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3824 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3825 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3826 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3832 inline u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3834 if (p0 > in_len) return (in_len);
3836 if ((in_len + p0) >= 32) return (in_len);
3838 u32 out_len = in_len;
3852 truncate_right (tib40, tib41, p0);
3854 rshift_block_N (buf0, buf1, buf0, buf1, p0);
3856 buf0[0] |= tib40[0];
3857 buf0[1] |= tib40[1];
3858 buf0[2] |= tib40[2];
3859 buf0[3] |= tib40[3];
3860 buf1[0] |= tib41[0];
3861 buf1[1] |= tib41[1];
3862 buf1[2] |= tib41[2];
3863 buf1[3] |= tib41[3];
3870 inline u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3872 if (p0 > in_len) return (in_len);
3874 if ((in_len + p0) >= 32) return (in_len);
3876 u32 out_len = in_len;
3881 rshift_block_N (buf0, buf1, tib40, tib41, p0);
3883 truncate_left (tib40, tib41, out_len);
3885 buf0[0] |= tib40[0];
3886 buf0[1] |= tib40[1];
3887 buf0[2] |= tib40[2];
3888 buf0[3] |= tib40[3];
3889 buf1[0] |= tib41[0];
3890 buf1[1] |= tib41[1];
3891 buf1[2] |= tib41[2];
3892 buf1[3] |= tib41[3];
3899 inline u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3901 buf0[0] |= (generate_cmask (buf0[0]));
3902 buf0[1] |= (generate_cmask (buf0[1]));
3903 buf0[2] |= (generate_cmask (buf0[2]));
3904 buf0[3] |= (generate_cmask (buf0[3]));
3905 buf1[0] |= (generate_cmask (buf1[0]));
3906 buf1[1] |= (generate_cmask (buf1[1]));
3907 buf1[2] |= (generate_cmask (buf1[2]));
3908 buf1[3] |= (generate_cmask (buf1[3]));
3911 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
3913 for (u32 i = 0; i < in_len; i++)
3920 case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540);
3921 tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break;
3922 case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541);
3923 tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break;
3924 case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542);
3925 tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break;
3926 case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543);
3927 tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break;
3928 case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540);
3929 tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break;
3930 case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541);
3931 tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break;
3932 case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542);
3933 tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break;
3934 case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543);
3935 tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break;
3936 case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540);
3937 tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break;
3938 case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541);
3939 tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break;
3940 case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542);
3941 tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break;
3942 case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543);
3943 tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break;
3944 case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540);
3945 tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break;
3946 case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541);
3947 tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break;
3948 case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542);
3949 tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break;
3950 case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543);
3951 tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break;
3952 case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540);
3953 tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break;
3954 case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541);
3955 tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break;
3956 case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542);
3957 tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break;
3958 case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543);
3959 tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break;
3960 case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540);
3961 tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break;
3962 case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541);
3963 tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break;
3964 case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542);
3965 tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break;
3966 case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543);
3967 tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break;
3968 case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540);
3969 tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break;
3970 case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541);
3971 tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break;
3972 case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542);
3973 tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break;
3974 case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543);
3975 tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break;
3976 case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540);
3977 tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break;
3978 case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541);
3979 tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break;
3980 case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542);
3981 tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break;
3986 if (tmp0 == ' ') buf0[0] &= tmp1 ;
3990 if (tmp0 == ' ') buf0[1] &= tmp1 ;
3994 if (tmp0 == ' ') buf0[2] &= tmp1 ;
3998 if (tmp0 == ' ') buf0[3] &= tmp1 ;
4002 if (tmp0 == ' ') buf1[0] &= tmp1 ;
4006 if (tmp0 == ' ') buf1[1] &= tmp1 ;
4010 if (tmp0 == ' ') buf1[2] &= tmp1 ;
4014 if (tmp0 == ' ') buf1[3] &= tmp1 ;
4019 #if defined IS_AMD || defined IS_GENERIC
4023 const uchar4 tmp0 = (uchar4) (' ');
4024 const uchar4 tmp1 = (uchar4) (0x00);
4025 const uchar4 tmp2 = (uchar4) (0xff);
4029 tmp = as_uchar4 (buf0[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[0] = as_uint (tmp);
4030 tmp = as_uchar4 (buf0[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[1] = as_uint (tmp);
4031 tmp = as_uchar4 (buf0[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[2] = as_uint (tmp);
4032 tmp = as_uchar4 (buf0[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[3] = as_uint (tmp);
4033 tmp = as_uchar4 (buf1[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[0] = as_uint (tmp);
4034 tmp = as_uchar4 (buf1[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[1] = as_uint (tmp);
4035 tmp = as_uchar4 (buf1[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[2] = as_uint (tmp);
4036 tmp = as_uchar4 (buf1[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[3] = as_uint (tmp);
4038 rshift_block (tib40, tib41, tib40, tib41); tib40[0] |= 0xff;
4040 buf0[0] &= ~(generate_cmask (buf0[0]) & tib40[0]);
4041 buf0[1] &= ~(generate_cmask (buf0[1]) & tib40[1]);
4042 buf0[2] &= ~(generate_cmask (buf0[2]) & tib40[2]);
4043 buf0[3] &= ~(generate_cmask (buf0[3]) & tib40[3]);
4044 buf1[0] &= ~(generate_cmask (buf1[0]) & tib41[0]);
4045 buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]);
4046 buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]);
4047 buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]);
4053 inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
4055 u32 out_len = in_len;
4059 case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break;
4060 case RULE_OP_MANGLE_UREST: out_len = rule_op_mangle_urest (p0, p1, buf0, buf1, out_len); break;
4061 case RULE_OP_MANGLE_LREST_UFIRST: out_len = rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, out_len); break;
4062 case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break;
4063 case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break;
4064 case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break;
4065 case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break;
4066 case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break;
4067 case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break;
4068 case RULE_OP_MANGLE_REFLECT: out_len = rule_op_mangle_reflect (p0, p1, buf0, buf1, out_len); break;
4069 case RULE_OP_MANGLE_APPEND: out_len = rule_op_mangle_append (p0, p1, buf0, buf1, out_len); break;
4070 case RULE_OP_MANGLE_PREPEND: out_len = rule_op_mangle_prepend (p0, p1, buf0, buf1, out_len); break;
4071 case RULE_OP_MANGLE_ROTATE_LEFT: out_len = rule_op_mangle_rotate_left (p0, p1, buf0, buf1, out_len); break;
4072 case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = rule_op_mangle_rotate_right (p0, p1, buf0, buf1, out_len); break;
4073 case RULE_OP_MANGLE_DELETE_FIRST: out_len = rule_op_mangle_delete_first (p0, p1, buf0, buf1, out_len); break;
4074 case RULE_OP_MANGLE_DELETE_LAST: out_len = rule_op_mangle_delete_last (p0, p1, buf0, buf1, out_len); break;
4075 case RULE_OP_MANGLE_DELETE_AT: out_len = rule_op_mangle_delete_at (p0, p1, buf0, buf1, out_len); break;
4076 case RULE_OP_MANGLE_EXTRACT: out_len = rule_op_mangle_extract (p0, p1, buf0, buf1, out_len); break;
4077 case RULE_OP_MANGLE_OMIT: out_len = rule_op_mangle_omit (p0, p1, buf0, buf1, out_len); break;
4078 case RULE_OP_MANGLE_INSERT: out_len = rule_op_mangle_insert (p0, p1, buf0, buf1, out_len); break;
4079 case RULE_OP_MANGLE_OVERSTRIKE: out_len = rule_op_mangle_overstrike (p0, p1, buf0, buf1, out_len); break;
4080 case RULE_OP_MANGLE_TRUNCATE_AT: out_len = rule_op_mangle_truncate_at (p0, p1, buf0, buf1, out_len); break;
4081 case RULE_OP_MANGLE_REPLACE: out_len = rule_op_mangle_replace (p0, p1, buf0, buf1, out_len); break;
4082 //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
4083 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
4084 case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = rule_op_mangle_dupechar_first (p0, p1, buf0, buf1, out_len); break;
4085 case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = rule_op_mangle_dupechar_last (p0, p1, buf0, buf1, out_len); break;
4086 case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = rule_op_mangle_dupechar_all (p0, p1, buf0, buf1, out_len); break;
4087 case RULE_OP_MANGLE_SWITCH_FIRST: out_len = rule_op_mangle_switch_first (p0, p1, buf0, buf1, out_len); break;
4088 case RULE_OP_MANGLE_SWITCH_LAST: out_len = rule_op_mangle_switch_last (p0, p1, buf0, buf1, out_len); break;
4089 case RULE_OP_MANGLE_SWITCH_AT: out_len = rule_op_mangle_switch_at (p0, p1, buf0, buf1, out_len); break;
4090 case RULE_OP_MANGLE_CHR_SHIFTL: out_len = rule_op_mangle_chr_shiftl (p0, p1, buf0, buf1, out_len); break;
4091 case RULE_OP_MANGLE_CHR_SHIFTR: out_len = rule_op_mangle_chr_shiftr (p0, p1, buf0, buf1, out_len); break;
4092 case RULE_OP_MANGLE_CHR_INCR: out_len = rule_op_mangle_chr_incr (p0, p1, buf0, buf1, out_len); break;
4093 case RULE_OP_MANGLE_CHR_DECR: out_len = rule_op_mangle_chr_decr (p0, p1, buf0, buf1, out_len); break;
4094 case RULE_OP_MANGLE_REPLACE_NP1: out_len = rule_op_mangle_replace_np1 (p0, p1, buf0, buf1, out_len); break;
4095 case RULE_OP_MANGLE_REPLACE_NM1: out_len = rule_op_mangle_replace_nm1 (p0, p1, buf0, buf1, out_len); break;
4096 case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = rule_op_mangle_dupeblock_first (p0, p1, buf0, buf1, out_len); break;
4097 case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = rule_op_mangle_dupeblock_last (p0, p1, buf0, buf1, out_len); break;
4098 case RULE_OP_MANGLE_TITLE: out_len = rule_op_mangle_title (p0, p1, buf0, buf1, out_len); break;
4104 inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
4108 for (u32 i = 0; cmds[i] != 0; i++)
4110 const u32 cmd = cmds[i];
4112 const u32 name = (cmd >> 0) & 0xff;
4113 const u32 p0 = (cmd >> 8) & 0xff;
4114 const u32 p1 = (cmd >> 16) & 0xff;
4116 out_len = apply_rule (name, p0, p1, buf0, buf1, out_len);
4122 inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4])
4135 return apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
4144 for (int i = 0; i < VECT_SIZE; i++)
4149 tmp0[0] = pw_buf0[0];
4150 tmp0[1] = pw_buf0[1];
4151 tmp0[2] = pw_buf0[2];
4152 tmp0[3] = pw_buf0[3];
4153 tmp1[0] = pw_buf1[0];
4154 tmp1[1] = pw_buf1[1];
4155 tmp1[2] = pw_buf1[2];
4156 tmp1[3] = pw_buf1[3];
4158 const u32 tmp_len = apply_rules (rules_buf[il_pos + i].cmds, tmp0, tmp1, pw_len);
4172 out_len.s0 = tmp_len;
4184 out_len.s1 = tmp_len;
4198 out_len.s2 = tmp_len;
4210 out_len.s3 = tmp_len;
4224 out_len.s4 = tmp_len;
4236 out_len.s5 = tmp_len;
4248 out_len.s6 = tmp_len;
4260 out_len.s7 = tmp_len;
4274 out_len.s8 = tmp_len;
4286 out_len.s9 = tmp_len;
4298 out_len.sa = tmp_len;
4310 out_len.sb = tmp_len;
4322 out_len.sc = tmp_len;
4334 out_len.sd = tmp_len;
4346 out_len.se = tmp_len;
4358 out_len.sf = tmp_len;
4369 #ifdef DO_NOT_USE_BYTE_PERM