Merge pull request #167 from gm4tr1x/issue84
[hashcat.git] / OpenCL / rp.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len);
7 u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len);
8 u32 apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4]);
9
10 static u32 generate_cmask (u32 buf)
11 {
12 const u32 rmask = ((buf & 0x40404040) >> 1)
13 & ~((buf & 0x80808080) >> 2);
14
15 const u32 hmask = (buf & 0x1f1f1f1f) + 0x05050505;
16 const u32 lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f;
17
18 return rmask & ~hmask & lmask;
19 }
20
21 static void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
22 {
23 const u32 tmp = (1 << ((len % 4) * 8)) - 1;
24
25 switch (len / 4)
26 {
27 case 0: w0[0] &= tmp;
28 w0[1] = 0;
29 w0[2] = 0;
30 w0[3] = 0;
31 w1[0] = 0;
32 w1[1] = 0;
33 w1[2] = 0;
34 w1[3] = 0;
35 break;
36 case 1: w0[1] &= tmp;
37 w0[2] = 0;
38 w0[3] = 0;
39 w1[0] = 0;
40 w1[1] = 0;
41 w1[2] = 0;
42 w1[3] = 0;
43 break;
44 case 2: w0[2] &= tmp;
45 w0[3] = 0;
46 w1[0] = 0;
47 w1[1] = 0;
48 w1[2] = 0;
49 w1[3] = 0;
50 break;
51 case 3: w0[3] &= tmp;
52 w1[0] = 0;
53 w1[1] = 0;
54 w1[2] = 0;
55 w1[3] = 0;
56 break;
57 case 4: w1[0] &= tmp;
58 w1[1] = 0;
59 w1[2] = 0;
60 w1[3] = 0;
61 break;
62 case 5: w1[1] &= tmp;
63 w1[2] = 0;
64 w1[3] = 0;
65 break;
66 case 6: w1[2] &= tmp;
67 w1[3] = 0;
68 break;
69 case 7: w1[3] &= tmp;
70 break;
71 }
72 }
73
74 static void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
75 {
76 const u32 tmp = ~((1 << ((len % 4) * 8)) - 1);
77
78 switch (len / 4)
79 {
80 case 0: w0[0] &= tmp;
81 break;
82 case 1: w0[0] = 0;
83 w0[1] &= tmp;
84 break;
85 case 2: w0[0] = 0;
86 w0[1] = 0;
87 w0[2] &= tmp;
88 break;
89 case 3: w0[0] = 0;
90 w0[1] = 0;
91 w0[2] = 0;
92 w0[3] &= tmp;
93 break;
94 case 4: w0[0] = 0;
95 w0[1] = 0;
96 w0[2] = 0;
97 w0[3] = 0;
98 w1[0] &= tmp;
99 break;
100 case 5: w0[0] = 0;
101 w0[1] = 0;
102 w0[2] = 0;
103 w0[3] = 0;
104 w1[0] = 0;
105 w1[1] &= tmp;
106 break;
107 case 6: w0[0] = 0;
108 w0[1] = 0;
109 w0[2] = 0;
110 w0[3] = 0;
111 w1[0] = 0;
112 w1[1] = 0;
113 w1[2] &= tmp;
114 break;
115 case 7: w0[0] = 0;
116 w0[1] = 0;
117 w0[2] = 0;
118 w0[3] = 0;
119 w1[0] = 0;
120 w1[1] = 0;
121 w1[2] = 0;
122 w1[3] &= tmp;
123 break;
124 }
125 }
126
127 static void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
128 {
129 #ifdef IS_NV
130 out0[0] = __byte_perm (in0[0], in0[1], 0x4321);
131 out0[1] = __byte_perm (in0[1], in0[2], 0x4321);
132 out0[2] = __byte_perm (in0[2], in0[3], 0x4321);
133 out0[3] = __byte_perm (in0[3], in1[0], 0x4321);
134 out1[0] = __byte_perm (in1[0], in1[1], 0x4321);
135 out1[1] = __byte_perm (in1[1], in1[2], 0x4321);
136 out1[2] = __byte_perm (in1[2], in1[3], 0x4321);
137 out1[3] = __byte_perm (in1[3], 0, 0x4321);
138 #endif
139
140 #if defined IS_AMD || defined IS_GENERIC
141 out0[0] = amd_bytealign (in0[1], in0[0], 1);
142 out0[1] = amd_bytealign (in0[2], in0[1], 1);
143 out0[2] = amd_bytealign (in0[3], in0[2], 1);
144 out0[3] = amd_bytealign (in1[0], in0[3], 1);
145 out1[0] = amd_bytealign (in1[1], in1[0], 1);
146 out1[1] = amd_bytealign (in1[2], in1[1], 1);
147 out1[2] = amd_bytealign (in1[3], in1[2], 1);
148 out1[3] = amd_bytealign ( 0, in1[3], 1);
149 #endif
150 }
151
152 static void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
153 {
154 #ifdef IS_NV
155 out1[3] = __byte_perm (in1[2], in1[3], 0x6543);
156 out1[2] = __byte_perm (in1[1], in1[2], 0x6543);
157 out1[1] = __byte_perm (in1[0], in1[1], 0x6543);
158 out1[0] = __byte_perm (in0[3], in1[0], 0x6543);
159 out0[3] = __byte_perm (in0[2], in0[3], 0x6543);
160 out0[2] = __byte_perm (in0[1], in0[2], 0x6543);
161 out0[1] = __byte_perm (in0[0], in0[1], 0x6543);
162 out0[0] = __byte_perm ( 0, in0[0], 0x6543);
163 #endif
164
165 #if defined IS_AMD || defined IS_GENERIC
166 out1[3] = amd_bytealign (in1[3], in1[2], 3);
167 out1[2] = amd_bytealign (in1[2], in1[1], 3);
168 out1[1] = amd_bytealign (in1[1], in1[0], 3);
169 out1[0] = amd_bytealign (in1[0], in0[3], 3);
170 out0[3] = amd_bytealign (in0[3], in0[2], 3);
171 out0[2] = amd_bytealign (in0[2], in0[1], 3);
172 out0[1] = amd_bytealign (in0[1], in0[0], 3);
173 out0[0] = amd_bytealign (in0[0], 0, 3);
174 #endif
175 }
176
177 static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
178 {
179 #ifdef IS_NV
180 switch (num)
181 {
182 case 0: out0[0] = in0[0];
183 out0[1] = in0[1];
184 out0[2] = in0[2];
185 out0[3] = in0[3];
186 out1[0] = in1[0];
187 out1[1] = in1[1];
188 out1[2] = in1[2];
189 out1[3] = in1[3];
190 break;
191 case 1: out0[0] = __byte_perm (in0[0], in0[1], 0x4321);
192 out0[1] = __byte_perm (in0[1], in0[2], 0x4321);
193 out0[2] = __byte_perm (in0[2], in0[3], 0x4321);
194 out0[3] = __byte_perm (in0[3], in1[0], 0x4321);
195 out1[0] = __byte_perm (in1[0], in1[1], 0x4321);
196 out1[1] = __byte_perm (in1[1], in1[2], 0x4321);
197 out1[2] = __byte_perm (in1[2], in1[3], 0x4321);
198 out1[3] = __byte_perm (in1[3], 0, 0x4321);
199 break;
200 case 2: out0[0] = __byte_perm (in0[0], in0[1], 0x5432);
201 out0[1] = __byte_perm (in0[1], in0[2], 0x5432);
202 out0[2] = __byte_perm (in0[2], in0[3], 0x5432);
203 out0[3] = __byte_perm (in0[3], in1[0], 0x5432);
204 out1[0] = __byte_perm (in1[0], in1[1], 0x5432);
205 out1[1] = __byte_perm (in1[1], in1[2], 0x5432);
206 out1[2] = __byte_perm (in1[2], in1[3], 0x5432);
207 out1[3] = __byte_perm (in1[3], 0, 0x5432);
208 break;
209 case 3: out0[0] = __byte_perm (in0[0], in0[1], 0x6543);
210 out0[1] = __byte_perm (in0[1], in0[2], 0x6543);
211 out0[2] = __byte_perm (in0[2], in0[3], 0x6543);
212 out0[3] = __byte_perm (in0[3], in1[0], 0x6543);
213 out1[0] = __byte_perm (in1[0], in1[1], 0x6543);
214 out1[1] = __byte_perm (in1[1], in1[2], 0x6543);
215 out1[2] = __byte_perm (in1[2], in1[3], 0x6543);
216 out1[3] = __byte_perm (in1[3], 0, 0x6543);
217 break;
218 case 4: out0[0] = in0[1];
219 out0[1] = in0[2];
220 out0[2] = in0[3];
221 out0[3] = in1[0];
222 out1[0] = in1[1];
223 out1[1] = in1[2];
224 out1[2] = in1[3];
225 out1[3] = 0;
226 break;
227 case 5: out0[0] = __byte_perm (in0[1], in0[2], 0x4321);
228 out0[1] = __byte_perm (in0[2], in0[3], 0x4321);
229 out0[2] = __byte_perm (in0[3], in1[0], 0x4321);
230 out0[3] = __byte_perm (in1[0], in1[1], 0x4321);
231 out1[0] = __byte_perm (in1[1], in1[2], 0x4321);
232 out1[1] = __byte_perm (in1[2], in1[3], 0x4321);
233 out1[2] = __byte_perm (in1[3], 0, 0x4321);
234 out1[3] = 0;
235 break;
236 case 6: out0[0] = __byte_perm (in0[1], in0[2], 0x5432);
237 out0[1] = __byte_perm (in0[2], in0[3], 0x5432);
238 out0[2] = __byte_perm (in0[3], in1[0], 0x5432);
239 out0[3] = __byte_perm (in1[0], in1[1], 0x5432);
240 out1[0] = __byte_perm (in1[1], in1[2], 0x5432);
241 out1[1] = __byte_perm (in1[2], in1[3], 0x5432);
242 out1[2] = __byte_perm (in1[3], 0, 0x5432);
243 out1[3] = 0;
244 break;
245 case 7: out0[0] = __byte_perm (in0[1], in0[2], 0x6543);
246 out0[1] = __byte_perm (in0[2], in0[3], 0x6543);
247 out0[2] = __byte_perm (in0[3], in1[0], 0x6543);
248 out0[3] = __byte_perm (in1[0], in1[1], 0x6543);
249 out1[0] = __byte_perm (in1[1], in1[2], 0x6543);
250 out1[1] = __byte_perm (in1[2], in1[3], 0x6543);
251 out1[2] = __byte_perm (in1[3], 0, 0x6543);
252 out1[3] = 0;
253 break;
254 case 8: out0[0] = in0[2];
255 out0[1] = in0[3];
256 out0[2] = in1[0];
257 out0[3] = in1[1];
258 out1[0] = in1[2];
259 out1[1] = in1[3];
260 out1[2] = 0;
261 out1[3] = 0;
262 break;
263 case 9: out0[0] = __byte_perm (in0[2], in0[3], 0x4321);
264 out0[1] = __byte_perm (in0[3], in1[0], 0x4321);
265 out0[2] = __byte_perm (in1[0], in1[1], 0x4321);
266 out0[3] = __byte_perm (in1[1], in1[2], 0x4321);
267 out1[0] = __byte_perm (in1[2], in1[3], 0x4321);
268 out1[1] = __byte_perm (in1[3], 0, 0x4321);
269 out1[2] = 0;
270 out1[3] = 0;
271 break;
272 case 10: out0[0] = __byte_perm (in0[2], in0[3], 0x5432);
273 out0[1] = __byte_perm (in0[3], in1[0], 0x5432);
274 out0[2] = __byte_perm (in1[0], in1[1], 0x5432);
275 out0[3] = __byte_perm (in1[1], in1[2], 0x5432);
276 out1[0] = __byte_perm (in1[2], in1[3], 0x5432);
277 out1[1] = __byte_perm (in1[3], 0, 0x5432);
278 out1[2] = 0;
279 out1[3] = 0;
280 break;
281 case 11: out0[0] = __byte_perm (in0[2], in0[3], 0x6543);
282 out0[1] = __byte_perm (in0[3], in1[0], 0x6543);
283 out0[2] = __byte_perm (in1[0], in1[1], 0x6543);
284 out0[3] = __byte_perm (in1[1], in1[2], 0x6543);
285 out1[0] = __byte_perm (in1[2], in1[3], 0x6543);
286 out1[1] = __byte_perm (in1[3], 0, 0x6543);
287 out1[2] = 0;
288 out1[3] = 0;
289 break;
290 case 12: out0[0] = in0[3];
291 out0[1] = in1[0];
292 out0[2] = in1[1];
293 out0[3] = in1[2];
294 out1[0] = in1[3];
295 out1[1] = 0;
296 out1[2] = 0;
297 out1[3] = 0;
298 break;
299 case 13:
300 out0[0] = __byte_perm (in0[3], in1[0], 0x4321);
301 out0[1] = __byte_perm (in1[0], in1[1], 0x4321);
302 out0[2] = __byte_perm (in1[1], in1[2], 0x4321);
303 out0[3] = __byte_perm (in1[2], in1[3], 0x4321);
304 out1[0] = __byte_perm (in1[3], 0, 0x4321);
305 out1[1] = 0;
306 out1[2] = 0;
307 out1[3] = 0;
308 break;
309 case 14: out0[0] = __byte_perm (in0[3], in1[0], 0x5432);
310 out0[1] = __byte_perm (in1[0], in1[1], 0x5432);
311 out0[2] = __byte_perm (in1[1], in1[2], 0x5432);
312 out0[3] = __byte_perm (in1[2], in1[3], 0x5432);
313 out1[0] = __byte_perm (in1[3], 0, 0x5432);
314 out1[1] = 0;
315 out1[2] = 0;
316 out1[3] = 0;
317 break;
318 case 15: out0[0] = __byte_perm (in0[3], in1[0], 0x6543);
319 out0[1] = __byte_perm (in1[0], in1[1], 0x6543);
320 out0[2] = __byte_perm (in1[1], in1[2], 0x6543);
321 out0[3] = __byte_perm (in1[2], in1[3], 0x6543);
322 out1[0] = __byte_perm (in1[3], 0, 0x6543);
323 out1[1] = 0;
324 out1[2] = 0;
325 out1[3] = 0;
326 break;
327 case 16: out0[0] = in1[0];
328 out0[1] = in1[1];
329 out0[2] = in1[2];
330 out0[3] = in1[3];
331 out1[0] = 0;
332 out1[1] = 0;
333 out1[2] = 0;
334 out1[3] = 0;
335 break;
336 case 17: out0[0] = __byte_perm (in1[0], in1[1], 0x4321);
337 out0[1] = __byte_perm (in1[1], in1[2], 0x4321);
338 out0[2] = __byte_perm (in1[2], in1[3], 0x4321);
339 out0[3] = __byte_perm (in1[3], 0, 0x4321);
340 out1[0] = 0;
341 out1[1] = 0;
342 out1[2] = 0;
343 out1[3] = 0;
344 break;
345 case 18: out0[0] = __byte_perm (in1[0], in1[1], 0x5432);
346 out0[1] = __byte_perm (in1[1], in1[2], 0x5432);
347 out0[2] = __byte_perm (in1[2], in1[3], 0x5432);
348 out0[3] = __byte_perm (in1[3], 0, 0x5432);
349 out1[0] = 0;
350 out1[1] = 0;
351 out1[2] = 0;
352 out1[3] = 0;
353 break;
354 case 19: out0[0] = __byte_perm (in1[0], in1[1], 0x6543);
355 out0[1] = __byte_perm (in1[1], in1[2], 0x6543);
356 out0[2] = __byte_perm (in1[2], in1[3], 0x6543);
357 out0[3] = __byte_perm (in1[3], 0, 0x6543);
358 out1[0] = 0;
359 out1[1] = 0;
360 out1[2] = 0;
361 out1[3] = 0;
362 break;
363 case 20: out0[0] = in1[1];
364 out0[1] = in1[2];
365 out0[2] = in1[3];
366 out0[3] = 0;
367 out1[0] = 0;
368 out1[1] = 0;
369 out1[2] = 0;
370 out1[3] = 0;
371 break;
372 case 21: out0[0] = __byte_perm (in1[1], in1[2], 0x4321);
373 out0[1] = __byte_perm (in1[2], in1[3], 0x4321);
374 out0[2] = __byte_perm (in1[3], 0, 0x4321);
375 out0[3] = 0;
376 out1[0] = 0;
377 out1[1] = 0;
378 out1[2] = 0;
379 out1[3] = 0;
380 break;
381 case 22: out0[0] = __byte_perm (in1[1], in1[2], 0x5432);
382 out0[1] = __byte_perm (in1[2], in1[3], 0x5432);
383 out0[2] = __byte_perm (in1[3], 0, 0x5432);
384 out0[3] = 0;
385 out1[0] = 0;
386 out1[1] = 0;
387 out1[2] = 0;
388 out1[3] = 0;
389 break;
390 case 23: out0[0] = __byte_perm (in1[1], in1[2], 0x6543);
391 out0[1] = __byte_perm (in1[2], in1[3], 0x6543);
392 out0[2] = __byte_perm (in1[3], 0, 0x6543);
393 out0[3] = 0;
394 out1[0] = 0;
395 out1[1] = 0;
396 out1[2] = 0;
397 out1[3] = 0;
398 break;
399 case 24: out0[0] = in1[2];
400 out0[1] = in1[3];
401 out0[2] = 0;
402 out0[3] = 0;
403 out1[0] = 0;
404 out1[1] = 0;
405 out1[2] = 0;
406 out1[3] = 0;
407 break;
408 case 25: out0[0] = __byte_perm (in1[2], in1[3], 0x4321);
409 out0[1] = __byte_perm (in1[3], 0, 0x4321);
410 out0[2] = 0;
411 out0[3] = 0;
412 out1[0] = 0;
413 out1[1] = 0;
414 out1[2] = 0;
415 out1[3] = 0;
416 break;
417 case 26: out0[0] = __byte_perm (in1[2], in1[3], 0x5432);
418 out0[1] = __byte_perm (in1[3], 0, 0x5432);
419 out0[2] = 0;
420 out0[3] = 0;
421 out1[0] = 0;
422 out1[1] = 0;
423 out1[2] = 0;
424 out1[3] = 0;
425 break;
426 case 27: out0[0] = __byte_perm (in1[2], in1[3], 0x6543);
427 out0[1] = __byte_perm (in1[3], 0, 0x6543);
428 out0[2] = 0;
429 out0[3] = 0;
430 out1[0] = 0;
431 out1[1] = 0;
432 out1[2] = 0;
433 out1[3] = 0;
434 break;
435 case 28: out0[0] = in1[3];
436 out0[1] = 0;
437 out0[2] = 0;
438 out0[3] = 0;
439 out1[0] = 0;
440 out1[1] = 0;
441 out1[2] = 0;
442 out1[3] = 0;
443 break;
444 case 29: out0[0] = __byte_perm (in1[3], 0, 0x4321);
445 out0[1] = 0;
446 out0[2] = 0;
447 out0[3] = 0;
448 out1[0] = 0;
449 out1[1] = 0;
450 out1[2] = 0;
451 out1[3] = 0;
452 break;
453 case 30: out0[0] = __byte_perm (in1[3], 0, 0x5432);
454 out0[1] = 0;
455 out0[2] = 0;
456 out0[3] = 0;
457 out1[0] = 0;
458 out1[1] = 0;
459 out1[2] = 0;
460 out1[3] = 0;
461 break;
462 case 31: out0[0] = __byte_perm (in1[3], 0, 0x6543);
463 out0[1] = 0;
464 out0[2] = 0;
465 out0[3] = 0;
466 out1[0] = 0;
467 out1[1] = 0;
468 out1[2] = 0;
469 out1[3] = 0;
470 break;
471 }
472 #endif
473
474 #if defined IS_AMD || defined IS_GENERIC
475 switch (num)
476 {
477 case 0: out0[0] = in0[0];
478 out0[1] = in0[1];
479 out0[2] = in0[2];
480 out0[3] = in0[3];
481 out1[0] = in1[0];
482 out1[1] = in1[1];
483 out1[2] = in1[2];
484 out1[3] = in1[3];
485 break;
486 case 1: out0[0] = amd_bytealign (in0[1], in0[0], 1);
487 out0[1] = amd_bytealign (in0[2], in0[1], 1);
488 out0[2] = amd_bytealign (in0[3], in0[2], 1);
489 out0[3] = amd_bytealign (in1[0], in0[3], 1);
490 out1[0] = amd_bytealign (in1[1], in1[0], 1);
491 out1[1] = amd_bytealign (in1[2], in1[1], 1);
492 out1[2] = amd_bytealign (in1[3], in1[2], 1);
493 out1[3] = amd_bytealign ( 0, in1[3], 1);
494 break;
495 case 2: out0[0] = amd_bytealign (in0[1], in0[0], 2);
496 out0[1] = amd_bytealign (in0[2], in0[1], 2);
497 out0[2] = amd_bytealign (in0[3], in0[2], 2);
498 out0[3] = amd_bytealign (in1[0], in0[3], 2);
499 out1[0] = amd_bytealign (in1[1], in1[0], 2);
500 out1[1] = amd_bytealign (in1[2], in1[1], 2);
501 out1[2] = amd_bytealign (in1[3], in1[2], 2);
502 out1[3] = amd_bytealign ( 0, in1[3], 2);
503 break;
504 case 3: out0[0] = amd_bytealign (in0[1], in0[0], 3);
505 out0[1] = amd_bytealign (in0[2], in0[1], 3);
506 out0[2] = amd_bytealign (in0[3], in0[2], 3);
507 out0[3] = amd_bytealign (in1[0], in0[3], 3);
508 out1[0] = amd_bytealign (in1[1], in1[0], 3);
509 out1[1] = amd_bytealign (in1[2], in1[1], 3);
510 out1[2] = amd_bytealign (in1[3], in1[2], 3);
511 out1[3] = amd_bytealign ( 0, in1[3], 3);
512 break;
513 case 4: out0[0] = in0[1];
514 out0[1] = in0[2];
515 out0[2] = in0[3];
516 out0[3] = in1[0];
517 out1[0] = in1[1];
518 out1[1] = in1[2];
519 out1[2] = in1[3];
520 out1[3] = 0;
521 break;
522 case 5: out0[0] = amd_bytealign (in0[2], in0[1], 1);
523 out0[1] = amd_bytealign (in0[3], in0[2], 1);
524 out0[2] = amd_bytealign (in1[0], in0[3], 1);
525 out0[3] = amd_bytealign (in1[1], in1[0], 1);
526 out1[0] = amd_bytealign (in1[2], in1[1], 1);
527 out1[1] = amd_bytealign (in1[3], in1[2], 1);
528 out1[2] = amd_bytealign ( 0, in1[3], 1);
529 out1[3] = 0;
530 break;
531 case 6: out0[0] = amd_bytealign (in0[2], in0[1], 2);
532 out0[1] = amd_bytealign (in0[3], in0[2], 2);
533 out0[2] = amd_bytealign (in1[0], in0[3], 2);
534 out0[3] = amd_bytealign (in1[1], in1[0], 2);
535 out1[0] = amd_bytealign (in1[2], in1[1], 2);
536 out1[1] = amd_bytealign (in1[3], in1[2], 2);
537 out1[2] = amd_bytealign ( 0, in1[3], 2);
538 out1[3] = 0;
539 break;
540 case 7: out0[0] = amd_bytealign (in0[2], in0[1], 3);
541 out0[1] = amd_bytealign (in0[3], in0[2], 3);
542 out0[2] = amd_bytealign (in1[0], in0[3], 3);
543 out0[3] = amd_bytealign (in1[1], in1[0], 3);
544 out1[0] = amd_bytealign (in1[2], in1[1], 3);
545 out1[1] = amd_bytealign (in1[3], in1[2], 3);
546 out1[2] = amd_bytealign ( 0, in1[3], 3);
547 out1[3] = 0;
548 break;
549 case 8: out0[0] = in0[2];
550 out0[1] = in0[3];
551 out0[2] = in1[0];
552 out0[3] = in1[1];
553 out1[0] = in1[2];
554 out1[1] = in1[3];
555 out1[2] = 0;
556 out1[3] = 0;
557 break;
558 case 9: out0[0] = amd_bytealign (in0[3], in0[2], 1);
559 out0[1] = amd_bytealign (in1[0], in0[3], 1);
560 out0[2] = amd_bytealign (in1[1], in1[0], 1);
561 out0[3] = amd_bytealign (in1[2], in1[1], 1);
562 out1[0] = amd_bytealign (in1[3], in1[2], 1);
563 out1[1] = amd_bytealign ( 0, in1[3], 1);
564 out1[2] = 0;
565 out1[3] = 0;
566 break;
567 case 10: out0[0] = amd_bytealign (in0[3], in0[2], 2);
568 out0[1] = amd_bytealign (in1[0], in0[3], 2);
569 out0[2] = amd_bytealign (in1[1], in1[0], 2);
570 out0[3] = amd_bytealign (in1[2], in1[1], 2);
571 out1[0] = amd_bytealign (in1[3], in1[2], 2);
572 out1[1] = amd_bytealign ( 0, in1[3], 2);
573 out1[2] = 0;
574 out1[3] = 0;
575 break;
576 case 11: out0[0] = amd_bytealign (in0[3], in0[2], 3);
577 out0[1] = amd_bytealign (in1[0], in0[3], 3);
578 out0[2] = amd_bytealign (in1[1], in1[0], 3);
579 out0[3] = amd_bytealign (in1[2], in1[1], 3);
580 out1[0] = amd_bytealign (in1[3], in1[2], 3);
581 out1[1] = amd_bytealign ( 0, in1[3], 3);
582 out1[2] = 0;
583 out1[3] = 0;
584 break;
585 case 12: out0[0] = in0[3];
586 out0[1] = in1[0];
587 out0[2] = in1[1];
588 out0[3] = in1[2];
589 out1[0] = in1[3];
590 out1[1] = 0;
591 out1[2] = 0;
592 out1[3] = 0;
593 break;
594 case 13: out0[0] = amd_bytealign (in1[0], in0[3], 1);
595 out0[1] = amd_bytealign (in1[1], in1[0], 1);
596 out0[2] = amd_bytealign (in1[2], in1[1], 1);
597 out0[3] = amd_bytealign (in1[3], in1[2], 1);
598 out1[0] = amd_bytealign ( 0, in1[3], 1);
599 out1[1] = 0;
600 out1[2] = 0;
601 out1[3] = 0;
602 break;
603 case 14: out0[0] = amd_bytealign (in1[0], in0[3], 2);
604 out0[1] = amd_bytealign (in1[1], in1[0], 2);
605 out0[2] = amd_bytealign (in1[2], in1[1], 2);
606 out0[3] = amd_bytealign (in1[3], in1[2], 2);
607 out1[0] = amd_bytealign ( 0, in1[3], 2);
608 out1[1] = 0;
609 out1[2] = 0;
610 out1[3] = 0;
611 break;
612 case 15: out0[0] = amd_bytealign (in1[0], in0[3], 3);
613 out0[1] = amd_bytealign (in1[1], in1[0], 3);
614 out0[2] = amd_bytealign (in1[2], in1[1], 3);
615 out0[3] = amd_bytealign (in1[3], in1[2], 3);
616 out1[0] = amd_bytealign ( 0, in1[3], 3);
617 out1[1] = 0;
618 out1[2] = 0;
619 out1[3] = 0;
620 break;
621 case 16: out0[0] = in1[0];
622 out0[1] = in1[1];
623 out0[2] = in1[2];
624 out0[3] = in1[3];
625 out1[0] = 0;
626 out1[1] = 0;
627 out1[2] = 0;
628 out1[3] = 0;
629 break;
630 case 17: out0[0] = amd_bytealign (in1[1], in1[0], 1);
631 out0[1] = amd_bytealign (in1[2], in1[1], 1);
632 out0[2] = amd_bytealign (in1[3], in1[2], 1);
633 out0[3] = amd_bytealign ( 0, in1[3], 1);
634 out1[0] = 0;
635 out1[1] = 0;
636 out1[2] = 0;
637 out1[3] = 0;
638 break;
639 case 18: out0[0] = amd_bytealign (in1[1], in1[0], 2);
640 out0[1] = amd_bytealign (in1[2], in1[1], 2);
641 out0[2] = amd_bytealign (in1[3], in1[2], 2);
642 out0[3] = amd_bytealign ( 0, in1[3], 2);
643 out1[0] = 0;
644 out1[1] = 0;
645 out1[2] = 0;
646 out1[3] = 0;
647 break;
648 case 19: out0[0] = amd_bytealign (in1[1], in1[0], 3);
649 out0[1] = amd_bytealign (in1[2], in1[1], 3);
650 out0[2] = amd_bytealign (in1[3], in1[2], 3);
651 out0[3] = amd_bytealign ( 0, in1[3], 3);
652 out1[0] = 0;
653 out1[1] = 0;
654 out1[2] = 0;
655 out1[3] = 0;
656 break;
657 case 20: out0[0] = in1[1];
658 out0[1] = in1[2];
659 out0[2] = in1[3];
660 out0[3] = 0;
661 out1[0] = 0;
662 out1[1] = 0;
663 out1[2] = 0;
664 out1[3] = 0;
665 break;
666 case 21: out0[0] = amd_bytealign (in1[2], in1[1], 1);
667 out0[1] = amd_bytealign (in1[3], in1[2], 1);
668 out0[2] = amd_bytealign ( 0, in1[3], 1);
669 out0[3] = 0;
670 out1[0] = 0;
671 out1[1] = 0;
672 out1[2] = 0;
673 out1[3] = 0;
674 break;
675 case 22: out0[0] = amd_bytealign (in1[2], in1[1], 2);
676 out0[1] = amd_bytealign (in1[3], in1[2], 2);
677 out0[2] = amd_bytealign ( 0, in1[3], 2);
678 out0[3] = 0;
679 out1[0] = 0;
680 out1[1] = 0;
681 out1[2] = 0;
682 out1[3] = 0;
683 break;
684 case 23: out0[0] = amd_bytealign (in1[2], in1[1], 3);
685 out0[1] = amd_bytealign (in1[3], in1[2], 3);
686 out0[2] = amd_bytealign ( 0, in1[3], 3);
687 out0[3] = 0;
688 out1[0] = 0;
689 out1[1] = 0;
690 out1[2] = 0;
691 out1[3] = 0;
692 break;
693 case 24: out0[0] = in1[2];
694 out0[1] = in1[3];
695 out0[2] = 0;
696 out0[3] = 0;
697 out1[0] = 0;
698 out1[1] = 0;
699 out1[2] = 0;
700 out1[3] = 0;
701 break;
702 case 25: out0[0] = amd_bytealign (in1[3], in1[2], 1);
703 out0[1] = amd_bytealign ( 0, in1[3], 1);
704 out0[2] = 0;
705 out0[3] = 0;
706 out1[0] = 0;
707 out1[1] = 0;
708 out1[2] = 0;
709 out1[3] = 0;
710 break;
711 case 26: out0[0] = amd_bytealign (in1[3], in1[2], 2);
712 out0[1] = amd_bytealign ( 0, in1[3], 2);
713 out0[2] = 0;
714 out0[3] = 0;
715 out1[0] = 0;
716 out1[1] = 0;
717 out1[2] = 0;
718 out1[3] = 0;
719 break;
720 case 27: out0[0] = amd_bytealign (in1[3], in1[2], 3);
721 out0[1] = amd_bytealign ( 0, in1[3], 3);
722 out0[2] = 0;
723 out0[3] = 0;
724 out1[0] = 0;
725 out1[1] = 0;
726 out1[2] = 0;
727 out1[3] = 0;
728 break;
729 case 28: out0[0] = in1[3];
730 out0[1] = 0;
731 out0[2] = 0;
732 out0[3] = 0;
733 out1[0] = 0;
734 out1[1] = 0;
735 out1[2] = 0;
736 out1[3] = 0;
737 break;
738 case 29: out0[0] = amd_bytealign ( 0, in1[3], 1);
739 out0[1] = 0;
740 out0[2] = 0;
741 out0[3] = 0;
742 out1[0] = 0;
743 out1[1] = 0;
744 out1[2] = 0;
745 out1[3] = 0;
746 break;
747 case 30: out0[0] = amd_bytealign ( 0, in1[3], 2);
748 out0[1] = 0;
749 out0[2] = 0;
750 out0[3] = 0;
751 out1[0] = 0;
752 out1[1] = 0;
753 out1[2] = 0;
754 out1[3] = 0;
755 break;
756 case 31: out0[0] = amd_bytealign ( 0, in1[3], 3);
757 out0[1] = 0;
758 out0[2] = 0;
759 out0[3] = 0;
760 out1[0] = 0;
761 out1[1] = 0;
762 out1[2] = 0;
763 out1[3] = 0;
764 break;
765 }
766 #endif
767 }
768
769 static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
770 {
771 #ifdef IS_NV
772 switch (num)
773 {
774 case 0: out1[3] = in1[3];
775 out1[2] = in1[2];
776 out1[1] = in1[1];
777 out1[0] = in1[0];
778 out0[3] = in0[3];
779 out0[2] = in0[2];
780 out0[1] = in0[1];
781 out0[0] = in0[0];
782 break;
783 case 1: out1[3] = __byte_perm (in1[2], in1[3], 0x6543);
784 out1[2] = __byte_perm (in1[1], in1[2], 0x6543);
785 out1[1] = __byte_perm (in1[0], in1[1], 0x6543);
786 out1[0] = __byte_perm (in0[3], in1[0], 0x6543);
787 out0[3] = __byte_perm (in0[2], in0[3], 0x6543);
788 out0[2] = __byte_perm (in0[1], in0[2], 0x6543);
789 out0[1] = __byte_perm (in0[0], in0[1], 0x6543);
790 out0[0] = __byte_perm ( 0, in0[0], 0x6543);
791 break;
792 case 2: out1[3] = __byte_perm (in1[2], in1[3], 0x5432);
793 out1[2] = __byte_perm (in1[1], in1[2], 0x5432);
794 out1[1] = __byte_perm (in1[0], in1[1], 0x5432);
795 out1[0] = __byte_perm (in0[3], in1[0], 0x5432);
796 out0[3] = __byte_perm (in0[2], in0[3], 0x5432);
797 out0[2] = __byte_perm (in0[1], in0[2], 0x5432);
798 out0[1] = __byte_perm (in0[0], in0[1], 0x5432);
799 out0[0] = __byte_perm ( 0, in0[0], 0x5432);
800 break;
801 case 3: out1[3] = __byte_perm (in1[2], in1[3], 0x4321);
802 out1[2] = __byte_perm (in1[1], in1[2], 0x4321);
803 out1[1] = __byte_perm (in1[0], in1[1], 0x4321);
804 out1[0] = __byte_perm (in0[3], in1[0], 0x4321);
805 out0[3] = __byte_perm (in0[2], in0[3], 0x4321);
806 out0[2] = __byte_perm (in0[1], in0[2], 0x4321);
807 out0[1] = __byte_perm (in0[0], in0[1], 0x4321);
808 out0[0] = __byte_perm ( 0, in0[0], 0x4321);
809 break;
810 case 4: out1[3] = in1[2];
811 out1[2] = in1[1];
812 out1[1] = in1[0];
813 out1[0] = in0[3];
814 out0[3] = in0[2];
815 out0[2] = in0[1];
816 out0[1] = in0[0];
817 out0[0] = 0;
818 break;
819 case 5: out1[3] = __byte_perm (in1[1], in1[2], 0x6543);
820 out1[2] = __byte_perm (in1[0], in1[1], 0x6543);
821 out1[1] = __byte_perm (in0[3], in1[0], 0x6543);
822 out1[0] = __byte_perm (in0[2], in0[3], 0x6543);
823 out0[3] = __byte_perm (in0[1], in0[2], 0x6543);
824 out0[2] = __byte_perm (in0[0], in0[1], 0x6543);
825 out0[1] = __byte_perm ( 0, in0[0], 0x6543);
826 out0[0] = 0;
827 break;
828 case 6: out1[3] = __byte_perm (in1[1], in1[2], 0x5432);
829 out1[2] = __byte_perm (in1[0], in1[1], 0x5432);
830 out1[1] = __byte_perm (in0[3], in1[0], 0x5432);
831 out1[0] = __byte_perm (in0[2], in0[3], 0x5432);
832 out0[3] = __byte_perm (in0[1], in0[2], 0x5432);
833 out0[2] = __byte_perm (in0[0], in0[1], 0x5432);
834 out0[1] = __byte_perm ( 0, in0[0], 0x5432);
835 out0[0] = 0;
836 break;
837 case 7: out1[3] = __byte_perm (in1[1], in1[2], 0x4321);
838 out1[2] = __byte_perm (in1[0], in1[1], 0x4321);
839 out1[1] = __byte_perm (in0[3], in1[0], 0x4321);
840 out1[0] = __byte_perm (in0[2], in0[3], 0x4321);
841 out0[3] = __byte_perm (in0[1], in0[2], 0x4321);
842 out0[2] = __byte_perm (in0[0], in0[1], 0x4321);
843 out0[1] = __byte_perm ( 0, in0[0], 0x4321);
844 out0[0] = 0;
845 break;
846 case 8: out1[3] = in1[1];
847 out1[2] = in1[0];
848 out1[1] = in0[3];
849 out1[0] = in0[2];
850 out0[3] = in0[1];
851 out0[2] = in0[0];
852 out0[1] = 0;
853 out0[0] = 0;
854 break;
855 case 9: out1[3] = __byte_perm (in1[0], in1[1], 0x6543);
856 out1[2] = __byte_perm (in0[3], in1[0], 0x6543);
857 out1[1] = __byte_perm (in0[2], in0[3], 0x6543);
858 out1[0] = __byte_perm (in0[1], in0[2], 0x6543);
859 out0[3] = __byte_perm (in0[0], in0[1], 0x6543);
860 out0[2] = __byte_perm ( 0, in0[0], 0x6543);
861 out0[1] = 0;
862 out0[0] = 0;
863 break;
864 case 10: out1[3] = __byte_perm (in1[0], in1[1], 0x5432);
865 out1[2] = __byte_perm (in0[3], in1[0], 0x5432);
866 out1[1] = __byte_perm (in0[2], in0[3], 0x5432);
867 out1[0] = __byte_perm (in0[1], in0[2], 0x5432);
868 out0[3] = __byte_perm (in0[0], in0[1], 0x5432);
869 out0[2] = __byte_perm ( 0, in0[0], 0x5432);
870 out0[1] = 0;
871 out0[0] = 0;
872 break;
873 case 11: out1[3] = __byte_perm (in1[0], in1[1], 0x4321);
874 out1[2] = __byte_perm (in0[3], in1[0], 0x4321);
875 out1[1] = __byte_perm (in0[2], in0[3], 0x4321);
876 out1[0] = __byte_perm (in0[1], in0[2], 0x4321);
877 out0[3] = __byte_perm (in0[0], in0[1], 0x4321);
878 out0[2] = __byte_perm ( 0, in0[0], 0x4321);
879 out0[1] = 0;
880 out0[0] = 0;
881 break;
882 case 12: out1[3] = in1[0];
883 out1[2] = in0[3];
884 out1[1] = in0[2];
885 out1[0] = in0[1];
886 out0[3] = in0[0];
887 out0[2] = 0;
888 out0[1] = 0;
889 out0[0] = 0;
890 break;
891 case 13: out1[3] = __byte_perm (in0[3], in1[0], 0x6543);
892 out1[2] = __byte_perm (in0[2], in0[3], 0x6543);
893 out1[1] = __byte_perm (in0[1], in0[2], 0x6543);
894 out1[0] = __byte_perm (in0[0], in0[1], 0x6543);
895 out0[3] = __byte_perm ( 0, in0[0], 0x6543);
896 out0[2] = 0;
897 out0[1] = 0;
898 out0[0] = 0;
899 break;
900 case 14: out1[3] = __byte_perm (in0[3], in1[0], 0x5432);
901 out1[2] = __byte_perm (in0[2], in0[3], 0x5432);
902 out1[1] = __byte_perm (in0[1], in0[2], 0x5432);
903 out1[0] = __byte_perm (in0[0], in0[1], 0x5432);
904 out0[3] = __byte_perm ( 0, in0[0], 0x5432);
905 out0[2] = 0;
906 out0[1] = 0;
907 out0[0] = 0;
908 break;
909 case 15: out1[3] = __byte_perm (in0[3], in1[0], 0x4321);
910 out1[2] = __byte_perm (in0[2], in0[3], 0x4321);
911 out1[1] = __byte_perm (in0[1], in0[2], 0x4321);
912 out1[0] = __byte_perm (in0[0], in0[1], 0x4321);
913 out0[3] = __byte_perm ( 0, in0[0], 0x4321);
914 out0[2] = 0;
915 out0[1] = 0;
916 out0[0] = 0;
917 break;
918 case 16: out1[3] = in0[3];
919 out1[2] = in0[2];
920 out1[1] = in0[1];
921 out1[0] = in0[0];
922 out0[3] = 0;
923 out0[2] = 0;
924 out0[1] = 0;
925 out0[0] = 0;
926 break;
927 case 17: out1[3] = __byte_perm (in0[2], in0[3], 0x6543);
928 out1[2] = __byte_perm (in0[1], in0[2], 0x6543);
929 out1[1] = __byte_perm (in0[0], in0[1], 0x6543);
930 out1[0] = __byte_perm ( 0, in0[0], 0x6543);
931 out0[3] = 0;
932 out0[2] = 0;
933 out0[1] = 0;
934 out0[0] = 0;
935 break;
936 case 18: out1[3] = __byte_perm (in0[2], in0[3], 0x5432);
937 out1[2] = __byte_perm (in0[1], in0[2], 0x5432);
938 out1[1] = __byte_perm (in0[0], in0[1], 0x5432);
939 out1[0] = __byte_perm ( 0, in0[0], 0x5432);
940 out0[3] = 0;
941 out0[2] = 0;
942 out0[1] = 0;
943 out0[0] = 0;
944 break;
945 case 19: out1[3] = __byte_perm (in0[2], in0[3], 0x4321);
946 out1[2] = __byte_perm (in0[1], in0[2], 0x4321);
947 out1[1] = __byte_perm (in0[0], in0[1], 0x4321);
948 out1[0] = __byte_perm ( 0, in0[0], 0x4321);
949 out0[3] = 0;
950 out0[2] = 0;
951 out0[1] = 0;
952 out0[0] = 0;
953 break;
954 case 20: out1[3] = in0[2];
955 out1[2] = in0[1];
956 out1[1] = in0[0];
957 out1[0] = 0;
958 out0[3] = 0;
959 out0[2] = 0;
960 out0[1] = 0;
961 out0[0] = 0;
962 break;
963 case 21: out1[3] = __byte_perm (in0[1], in0[2], 0x6543);
964 out1[2] = __byte_perm (in0[0], in0[1], 0x6543);
965 out1[1] = __byte_perm ( 0, in0[0], 0x6543);
966 out1[0] = 0;
967 out0[3] = 0;
968 out0[2] = 0;
969 out0[1] = 0;
970 out0[0] = 0;
971 break;
972 case 22: out1[3] = __byte_perm (in0[1], in0[2], 0x5432);
973 out1[2] = __byte_perm (in0[0], in0[1], 0x5432);
974 out1[1] = __byte_perm ( 0, in0[0], 0x5432);
975 out1[0] = 0;
976 out0[3] = 0;
977 out0[2] = 0;
978 out0[1] = 0;
979 out0[0] = 0;
980 break;
981 case 23: out1[3] = __byte_perm (in0[1], in0[2], 0x4321);
982 out1[2] = __byte_perm (in0[0], in0[1], 0x4321);
983 out1[1] = __byte_perm ( 0, in0[0], 0x4321);
984 out1[0] = 0;
985 out0[3] = 0;
986 out0[2] = 0;
987 out0[1] = 0;
988 out0[0] = 0;
989 break;
990 case 24: out1[3] = in0[1];
991 out1[2] = in0[0];
992 out1[1] = 0;
993 out1[0] = 0;
994 out0[3] = 0;
995 out0[2] = 0;
996 out0[1] = 0;
997 out0[0] = 0;
998 break;
999 case 25: out1[3] = __byte_perm (in0[0], in0[1], 0x6543);
1000 out1[2] = __byte_perm ( 0, in0[0], 0x6543);
1001 out1[1] = 0;
1002 out1[0] = 0;
1003 out0[3] = 0;
1004 out0[2] = 0;
1005 out0[1] = 0;
1006 out0[0] = 0;
1007 break;
1008 case 26: out1[3] = __byte_perm (in0[0], in0[1], 0x5432);
1009 out1[2] = __byte_perm ( 0, in0[0], 0x5432);
1010 out1[1] = 0;
1011 out1[0] = 0;
1012 out0[3] = 0;
1013 out0[2] = 0;
1014 out0[1] = 0;
1015 out0[0] = 0;
1016 break;
1017 case 27: out1[3] = __byte_perm (in0[0], in0[1], 0x4321);
1018 out1[2] = __byte_perm ( 0, in0[0], 0x4321);
1019 out1[1] = 0;
1020 out1[0] = 0;
1021 out0[3] = 0;
1022 out0[2] = 0;
1023 out0[1] = 0;
1024 out0[0] = 0;
1025 break;
1026 case 28: out1[3] = in0[0];
1027 out1[2] = 0;
1028 out1[1] = 0;
1029 out1[0] = 0;
1030 out0[3] = 0;
1031 out0[2] = 0;
1032 out0[1] = 0;
1033 out0[0] = 0;
1034 break;
1035 case 29: out1[3] = __byte_perm ( 0, in0[0], 0x6543);
1036 out1[2] = 0;
1037 out1[1] = 0;
1038 out1[0] = 0;
1039 out0[3] = 0;
1040 out0[2] = 0;
1041 out0[1] = 0;
1042 out0[0] = 0;
1043 break;
1044 case 30: out1[3] = __byte_perm ( 0, in0[0], 0x5432);
1045 out1[2] = 0;
1046 out1[1] = 0;
1047 out1[0] = 0;
1048 out0[3] = 0;
1049 out0[2] = 0;
1050 out0[1] = 0;
1051 out0[0] = 0;
1052 break;
1053 case 31: out1[3] = __byte_perm ( 0, in0[0], 0x4321);
1054 out1[2] = 0;
1055 out1[1] = 0;
1056 out1[0] = 0;
1057 out0[3] = 0;
1058 out0[2] = 0;
1059 out0[1] = 0;
1060 out0[0] = 0;
1061 break;
1062 }
1063 #endif
1064
1065 #if defined IS_AMD || defined IS_GENERIC
1066 switch (num)
1067 {
1068 case 0: out1[3] = in1[3];
1069 out1[2] = in1[2];
1070 out1[1] = in1[1];
1071 out1[0] = in1[0];
1072 out0[3] = in0[3];
1073 out0[2] = in0[2];
1074 out0[1] = in0[1];
1075 out0[0] = in0[0];
1076 break;
1077 case 1: out1[3] = amd_bytealign (in1[3], in1[2], 3);
1078 out1[2] = amd_bytealign (in1[2], in1[1], 3);
1079 out1[1] = amd_bytealign (in1[1], in1[0], 3);
1080 out1[0] = amd_bytealign (in1[0], in0[3], 3);
1081 out0[3] = amd_bytealign (in0[3], in0[2], 3);
1082 out0[2] = amd_bytealign (in0[2], in0[1], 3);
1083 out0[1] = amd_bytealign (in0[1], in0[0], 3);
1084 out0[0] = amd_bytealign (in0[0], 0, 3);
1085 break;
1086 case 2: out1[3] = amd_bytealign (in1[3], in1[2], 2);
1087 out1[2] = amd_bytealign (in1[2], in1[1], 2);
1088 out1[1] = amd_bytealign (in1[1], in1[0], 2);
1089 out1[0] = amd_bytealign (in1[0], in0[3], 2);
1090 out0[3] = amd_bytealign (in0[3], in0[2], 2);
1091 out0[2] = amd_bytealign (in0[2], in0[1], 2);
1092 out0[1] = amd_bytealign (in0[1], in0[0], 2);
1093 out0[0] = amd_bytealign (in0[0], 0, 2);
1094 break;
1095 case 3: out1[3] = amd_bytealign (in1[3], in1[2], 1);
1096 out1[2] = amd_bytealign (in1[2], in1[1], 1);
1097 out1[1] = amd_bytealign (in1[1], in1[0], 1);
1098 out1[0] = amd_bytealign (in1[0], in0[3], 1);
1099 out0[3] = amd_bytealign (in0[3], in0[2], 1);
1100 out0[2] = amd_bytealign (in0[2], in0[1], 1);
1101 out0[1] = amd_bytealign (in0[1], in0[0], 1);
1102 out0[0] = amd_bytealign (in0[0], 0, 1);
1103 break;
1104 case 4: out1[3] = in1[2];
1105 out1[2] = in1[1];
1106 out1[1] = in1[0];
1107 out1[0] = in0[3];
1108 out0[3] = in0[2];
1109 out0[2] = in0[1];
1110 out0[1] = in0[0];
1111 out0[0] = 0;
1112 break;
1113 case 5: out1[3] = amd_bytealign (in1[2], in1[1], 3);
1114 out1[2] = amd_bytealign (in1[1], in1[0], 3);
1115 out1[1] = amd_bytealign (in1[0], in0[3], 3);
1116 out1[0] = amd_bytealign (in0[3], in0[2], 3);
1117 out0[3] = amd_bytealign (in0[2], in0[1], 3);
1118 out0[2] = amd_bytealign (in0[1], in0[0], 3);
1119 out0[1] = amd_bytealign (in0[0], 0, 3);
1120 out0[0] = 0;
1121 break;
1122 case 6: out1[3] = amd_bytealign (in1[2], in1[1], 2);
1123 out1[2] = amd_bytealign (in1[1], in1[0], 2);
1124 out1[1] = amd_bytealign (in1[0], in0[3], 2);
1125 out1[0] = amd_bytealign (in0[3], in0[2], 2);
1126 out0[3] = amd_bytealign (in0[2], in0[1], 2);
1127 out0[2] = amd_bytealign (in0[1], in0[0], 2);
1128 out0[1] = amd_bytealign (in0[0], 0, 2);
1129 out0[0] = 0;
1130 break;
1131 case 7: out1[3] = amd_bytealign (in1[2], in1[1], 1);
1132 out1[2] = amd_bytealign (in1[1], in1[0], 1);
1133 out1[1] = amd_bytealign (in1[0], in0[3], 1);
1134 out1[0] = amd_bytealign (in0[3], in0[2], 1);
1135 out0[3] = amd_bytealign (in0[2], in0[1], 1);
1136 out0[2] = amd_bytealign (in0[1], in0[0], 1);
1137 out0[1] = amd_bytealign (in0[0], 0, 1);
1138 out0[0] = 0;
1139 break;
1140 case 8: out1[3] = in1[1];
1141 out1[2] = in1[0];
1142 out1[1] = in0[3];
1143 out1[0] = in0[2];
1144 out0[3] = in0[1];
1145 out0[2] = in0[0];
1146 out0[1] = 0;
1147 out0[0] = 0;
1148 break;
1149 case 9: out1[3] = amd_bytealign (in1[1], in1[0], 3);
1150 out1[2] = amd_bytealign (in1[0], in0[3], 3);
1151 out1[1] = amd_bytealign (in0[3], in0[2], 3);
1152 out1[0] = amd_bytealign (in0[2], in0[1], 3);
1153 out0[3] = amd_bytealign (in0[1], in0[0], 3);
1154 out0[2] = amd_bytealign (in0[0], 0, 3);
1155 out0[1] = 0;
1156 out0[0] = 0;
1157 break;
1158 case 10: out1[3] = amd_bytealign (in1[1], in1[0], 2);
1159 out1[2] = amd_bytealign (in1[0], in0[3], 2);
1160 out1[1] = amd_bytealign (in0[3], in0[2], 2);
1161 out1[0] = amd_bytealign (in0[2], in0[1], 2);
1162 out0[3] = amd_bytealign (in0[1], in0[0], 2);
1163 out0[2] = amd_bytealign (in0[0], 0, 2);
1164 out0[1] = 0;
1165 out0[0] = 0;
1166 break;
1167 case 11: out1[3] = amd_bytealign (in1[1], in1[0], 1);
1168 out1[2] = amd_bytealign (in1[0], in0[3], 1);
1169 out1[1] = amd_bytealign (in0[3], in0[2], 1);
1170 out1[0] = amd_bytealign (in0[2], in0[1], 1);
1171 out0[3] = amd_bytealign (in0[1], in0[0], 1);
1172 out0[2] = amd_bytealign (in0[0], 0, 1);
1173 out0[1] = 0;
1174 out0[0] = 0;
1175 break;
1176 case 12: out1[3] = in1[0];
1177 out1[2] = in0[3];
1178 out1[1] = in0[2];
1179 out1[0] = in0[1];
1180 out0[3] = in0[0];
1181 out0[2] = 0;
1182 out0[1] = 0;
1183 out0[0] = 0;
1184 break;
1185 case 13: out1[3] = amd_bytealign (in1[0], in0[3], 3);
1186 out1[2] = amd_bytealign (in0[3], in0[2], 3);
1187 out1[1] = amd_bytealign (in0[2], in0[1], 3);
1188 out1[0] = amd_bytealign (in0[1], in0[0], 3);
1189 out0[3] = amd_bytealign (in0[0], 0, 3);
1190 out0[2] = 0;
1191 out0[1] = 0;
1192 out0[0] = 0;
1193 break;
1194 case 14: out1[3] = amd_bytealign (in1[0], in0[3], 2);
1195 out1[2] = amd_bytealign (in0[3], in0[2], 2);
1196 out1[1] = amd_bytealign (in0[2], in0[1], 2);
1197 out1[0] = amd_bytealign (in0[1], in0[0], 2);
1198 out0[3] = amd_bytealign (in0[0], 0, 2);
1199 out0[2] = 0;
1200 out0[1] = 0;
1201 out0[0] = 0;
1202 break;
1203 case 15: out1[3] = amd_bytealign (in1[0], in0[3], 1);
1204 out1[2] = amd_bytealign (in0[3], in0[2], 1);
1205 out1[1] = amd_bytealign (in0[2], in0[1], 1);
1206 out1[0] = amd_bytealign (in0[1], in0[0], 1);
1207 out0[3] = amd_bytealign (in0[0], 0, 1);
1208 out0[2] = 0;
1209 out0[1] = 0;
1210 out0[0] = 0;
1211 break;
1212 case 16: out1[3] = in0[3];
1213 out1[2] = in0[2];
1214 out1[1] = in0[1];
1215 out1[0] = in0[0];
1216 out0[3] = 0;
1217 out0[2] = 0;
1218 out0[1] = 0;
1219 out0[0] = 0;
1220 break;
1221 case 17: out1[3] = amd_bytealign (in0[3], in0[2], 3);
1222 out1[2] = amd_bytealign (in0[2], in0[1], 3);
1223 out1[1] = amd_bytealign (in0[1], in0[0], 3);
1224 out1[0] = amd_bytealign (in0[0], 0, 3);
1225 out0[3] = 0;
1226 out0[2] = 0;
1227 out0[1] = 0;
1228 out0[0] = 0;
1229 break;
1230 case 18: out1[3] = amd_bytealign (in0[3], in0[2], 2);
1231 out1[2] = amd_bytealign (in0[2], in0[1], 2);
1232 out1[1] = amd_bytealign (in0[1], in0[0], 2);
1233 out1[0] = amd_bytealign (in0[0], 0, 2);
1234 out0[3] = 0;
1235 out0[2] = 0;
1236 out0[1] = 0;
1237 out0[0] = 0;
1238 break;
1239 case 19: out1[3] = amd_bytealign (in0[3], in0[2], 1);
1240 out1[2] = amd_bytealign (in0[2], in0[1], 1);
1241 out1[1] = amd_bytealign (in0[1], in0[0], 1);
1242 out1[0] = amd_bytealign (in0[0], 0, 1);
1243 out0[3] = 0;
1244 out0[2] = 0;
1245 out0[1] = 0;
1246 out0[0] = 0;
1247 break;
1248 case 20: out1[3] = in0[2];
1249 out1[2] = in0[1];
1250 out1[1] = in0[0];
1251 out1[0] = 0;
1252 out0[3] = 0;
1253 out0[2] = 0;
1254 out0[1] = 0;
1255 out0[0] = 0;
1256 break;
1257 case 21: out1[3] = amd_bytealign (in0[2], in0[1], 3);
1258 out1[2] = amd_bytealign (in0[1], in0[0], 3);
1259 out1[1] = amd_bytealign (in0[0], 0, 3);
1260 out1[0] = 0;
1261 out0[3] = 0;
1262 out0[2] = 0;
1263 out0[1] = 0;
1264 out0[0] = 0;
1265 break;
1266 case 22: out1[3] = amd_bytealign (in0[2], in0[1], 2);
1267 out1[2] = amd_bytealign (in0[1], in0[0], 2);
1268 out1[1] = amd_bytealign (in0[0], 0, 2);
1269 out1[0] = 0;
1270 out0[3] = 0;
1271 out0[2] = 0;
1272 out0[1] = 0;
1273 out0[0] = 0;
1274 break;
1275 case 23: out1[3] = amd_bytealign (in0[2], in0[1], 1);
1276 out1[2] = amd_bytealign (in0[1], in0[0], 1);
1277 out1[1] = amd_bytealign (in0[0], 0, 1);
1278 out1[0] = 0;
1279 out0[3] = 0;
1280 out0[2] = 0;
1281 out0[1] = 0;
1282 out0[0] = 0;
1283 break;
1284 case 24: out1[3] = in0[1];
1285 out1[2] = in0[0];
1286 out1[1] = 0;
1287 out1[0] = 0;
1288 out0[3] = 0;
1289 out0[2] = 0;
1290 out0[1] = 0;
1291 out0[0] = 0;
1292 break;
1293 case 25: out1[3] = amd_bytealign (in0[1], in0[0], 3);
1294 out1[2] = amd_bytealign (in0[0], 0, 3);
1295 out1[1] = 0;
1296 out1[0] = 0;
1297 out0[3] = 0;
1298 out0[2] = 0;
1299 out0[1] = 0;
1300 out0[0] = 0;
1301 break;
1302 case 26: out1[3] = amd_bytealign (in0[1], in0[0], 2);
1303 out1[2] = amd_bytealign (in0[0], 0, 2);
1304 out1[1] = 0;
1305 out1[0] = 0;
1306 out0[3] = 0;
1307 out0[2] = 0;
1308 out0[1] = 0;
1309 out0[0] = 0;
1310 break;
1311 case 27: out1[3] = amd_bytealign (in0[1], in0[0], 1);
1312 out1[2] = amd_bytealign (in0[0], 0, 1);
1313 out1[1] = 0;
1314 out1[0] = 0;
1315 out0[3] = 0;
1316 out0[2] = 0;
1317 out0[1] = 0;
1318 out0[0] = 0;
1319 break;
1320 case 28: out1[3] = in0[0];
1321 out1[2] = 0;
1322 out1[1] = 0;
1323 out1[0] = 0;
1324 out0[3] = 0;
1325 out0[2] = 0;
1326 out0[1] = 0;
1327 out0[0] = 0;
1328 break;
1329 case 29: out1[3] = amd_bytealign (in0[0], 0, 3);
1330 out1[2] = 0;
1331 out1[1] = 0;
1332 out1[0] = 0;
1333 out0[3] = 0;
1334 out0[2] = 0;
1335 out0[1] = 0;
1336 out0[0] = 0;
1337 break;
1338 case 30: out1[3] = amd_bytealign (in0[0], 0, 2);
1339 out1[2] = 0;
1340 out1[1] = 0;
1341 out1[0] = 0;
1342 out0[3] = 0;
1343 out0[2] = 0;
1344 out0[1] = 0;
1345 out0[0] = 0;
1346 break;
1347 case 31: out1[3] = amd_bytealign (in0[0], 0, 1);
1348 out1[2] = 0;
1349 out1[1] = 0;
1350 out1[0] = 0;
1351 out0[3] = 0;
1352 out0[2] = 0;
1353 out0[1] = 0;
1354 out0[0] = 0;
1355 break;
1356 }
1357 #endif
1358 }
1359
1360 static void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
1361 {
1362 u32 tmp[2];
1363
1364 switch (offset & 3)
1365 {
1366 case 0: tmp[0] = src_r0;
1367 tmp[1] = 0;
1368 break;
1369 case 1: tmp[0] = src_r0 << 8;
1370 tmp[1] = src_r0 >> 24;
1371 break;
1372 case 2: tmp[0] = src_r0 << 16;
1373 tmp[1] = src_r0 >> 16;
1374 break;
1375 case 3: tmp[0] = src_r0 << 24;
1376 tmp[1] = src_r0 >> 8;
1377 break;
1378 }
1379
1380 switch (offset / 4)
1381 {
1382 case 0: dst0[0] |= tmp[0];
1383 dst0[1] = tmp[1];
1384 break;
1385 case 1: dst0[1] |= tmp[0];
1386 dst0[2] = tmp[1];
1387 break;
1388 case 2: dst0[2] |= tmp[0];
1389 dst0[3] = tmp[1];
1390 break;
1391 case 3: dst0[3] |= tmp[0];
1392 dst1[0] = tmp[1];
1393 break;
1394 case 4: dst1[0] |= tmp[0];
1395 dst1[1] = tmp[1];
1396 break;
1397 case 5: dst1[1] |= tmp[0];
1398 dst1[2] = tmp[1];
1399 break;
1400 case 6: dst1[2] |= tmp[0];
1401 dst1[3] = tmp[1];
1402 break;
1403 case 7: dst1[3] |= tmp[0];
1404 break;
1405 }
1406 }
1407
1408 static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
1409 {
1410 #ifdef IS_NV
1411 switch (offset)
1412 {
1413 case 0:
1414 dst0[0] = src_r0[0];
1415 dst0[1] = src_r0[1];
1416 dst0[2] = src_r0[2];
1417 dst0[3] = src_r0[3];
1418 dst1[0] = src_r1[0];
1419 dst1[1] = src_r1[1];
1420 dst1[2] = src_r1[2];
1421 dst1[3] = src_r1[3];
1422 break;
1423
1424 case 1:
1425 dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x6540);
1426 dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1427 dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1428 dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1429 dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1430 dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
1431 dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x6543);
1432 dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x6543);
1433 break;
1434
1435 case 2:
1436 dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x5410);
1437 dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1438 dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1439 dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1440 dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1441 dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
1442 dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x5432);
1443 dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x5432);
1444 break;
1445
1446 case 3:
1447 dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x4210);
1448 dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1449 dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1450 dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1451 dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1452 dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
1453 dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x4321);
1454 dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x4321);
1455 break;
1456
1457 case 4:
1458 dst0[1] = src_r0[0];
1459 dst0[2] = src_r0[1];
1460 dst0[3] = src_r0[2];
1461 dst1[0] = src_r0[3];
1462 dst1[1] = src_r1[0];
1463 dst1[2] = src_r1[1];
1464 dst1[3] = src_r1[2];
1465 break;
1466
1467 case 5:
1468 dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x6540);
1469 dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1470 dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1471 dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1472 dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1473 dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
1474 dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x6543);
1475 break;
1476
1477 case 6:
1478 dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x5410);
1479 dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1480 dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1481 dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1482 dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1483 dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
1484 dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x5432);
1485 break;
1486
1487 case 7:
1488 dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x4210);
1489 dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1490 dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1491 dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1492 dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1493 dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
1494 dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x4321);
1495 break;
1496
1497 case 8:
1498 dst0[2] = src_r0[0];
1499 dst0[3] = src_r0[1];
1500 dst1[0] = src_r0[2];
1501 dst1[1] = src_r0[3];
1502 dst1[2] = src_r1[0];
1503 dst1[3] = src_r1[1];
1504 break;
1505
1506 case 9:
1507 dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x6540);
1508 dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1509 dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1510 dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1511 dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1512 dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
1513 break;
1514
1515 case 10:
1516 dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x5410);
1517 dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1518 dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1519 dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1520 dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1521 dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
1522 break;
1523
1524 case 11:
1525 dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x4210);
1526 dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1527 dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1528 dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1529 dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1530 dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
1531 break;
1532
1533 case 12:
1534 dst0[3] = src_r0[0];
1535 dst1[0] = src_r0[1];
1536 dst1[1] = src_r0[2];
1537 dst1[2] = src_r0[3];
1538 dst1[3] = src_r1[0];
1539 break;
1540
1541 case 13:
1542 dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x6540);
1543 dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1544 dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1545 dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1546 dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1547 break;
1548
1549 case 14:
1550 dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x5410);
1551 dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1552 dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1553 dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1554 dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1555 break;
1556
1557 case 15:
1558 dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x4210);
1559 dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1560 dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1561 dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1562 dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1563 break;
1564
1565 case 16:
1566 dst1[0] = src_r0[0];
1567 dst1[1] = src_r0[1];
1568 dst1[2] = src_r0[2];
1569 dst1[3] = src_r0[3];
1570 break;
1571
1572 case 17:
1573 dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x6540);
1574 dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1575 dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1576 dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1577 break;
1578
1579 case 18:
1580 dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x5410);
1581 dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1582 dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1583 dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1584 break;
1585
1586 case 19:
1587 dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x4210);
1588 dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1589 dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1590 dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1591 break;
1592
1593 case 20:
1594 dst1[1] = src_r0[0];
1595 dst1[2] = src_r0[1];
1596 dst1[3] = src_r0[2];
1597 break;
1598
1599 case 21:
1600 dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x6540);
1601 dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1602 dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1603 break;
1604
1605 case 22:
1606 dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x5410);
1607 dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1608 dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1609 break;
1610
1611 case 23:
1612 dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x4210);
1613 dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1614 dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1615 break;
1616
1617 case 24:
1618 dst1[2] = src_r0[0];
1619 dst1[3] = src_r0[1];
1620 break;
1621
1622 case 25:
1623 dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x6540);
1624 dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1625 break;
1626
1627 case 26:
1628 dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x5410);
1629 dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1630 break;
1631
1632 case 27:
1633 dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x4210);
1634 dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1635 break;
1636
1637 case 28:
1638 dst1[3] = src_r0[0];
1639 break;
1640
1641 case 29:
1642 dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x6540);
1643 break;
1644
1645 case 30:
1646 dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x5410);
1647 break;
1648
1649 case 31:
1650 dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x4210);
1651 break;
1652 }
1653 #endif
1654
1655 #if defined IS_AMD || defined IS_GENERIC
1656 switch (offset)
1657 {
1658 case 31:
1659 dst1[3] = src_l1[3] | src_r0[0] << 24;
1660 break;
1661 case 30:
1662 dst1[3] = src_l1[3] | src_r0[0] << 16;
1663 break;
1664 case 29:
1665 dst1[3] = src_l1[3] | src_r0[0] << 8;
1666 break;
1667 case 28:
1668 dst1[3] = src_r0[0];
1669 break;
1670 case 27:
1671 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
1672 dst1[2] = src_l1[2] | src_r0[0] << 24;
1673 break;
1674 case 26:
1675 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
1676 dst1[2] = src_l1[2] | src_r0[0] << 16;
1677 break;
1678 case 25:
1679 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
1680 dst1[2] = src_l1[2] | src_r0[0] << 8;
1681 break;
1682 case 24:
1683 dst1[3] = src_r0[1];
1684 dst1[2] = src_r0[0];
1685 break;
1686 case 23:
1687 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
1688 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
1689 dst1[1] = src_l1[1] | src_r0[0] << 24;
1690 break;
1691 case 22:
1692 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
1693 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
1694 dst1[1] = src_l1[1] | src_r0[0] << 16;
1695 break;
1696 case 21:
1697 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
1698 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
1699 dst1[1] = src_l1[1] | src_r0[0] << 8;
1700 break;
1701 case 20:
1702 dst1[3] = src_r0[2];
1703 dst1[2] = src_r0[1];
1704 dst1[1] = src_r0[0];
1705 break;
1706 case 19:
1707 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
1708 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
1709 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
1710 dst1[0] = src_l1[0] | src_r0[0] << 24;
1711 break;
1712 case 18:
1713 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
1714 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
1715 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
1716 dst1[0] = src_l1[0] | src_r0[0] << 16;
1717 break;
1718 case 17:
1719 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
1720 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
1721 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
1722 dst1[0] = src_l1[0] | src_r0[0] << 8;
1723 break;
1724 case 16:
1725 dst1[3] = src_r0[3];
1726 dst1[2] = src_r0[2];
1727 dst1[1] = src_r0[1];
1728 dst1[0] = src_r0[0];
1729 break;
1730 case 15:
1731 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 1);
1732 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 1);
1733 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 1);
1734 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 1);
1735 dst0[3] = src_l0[3] | src_r0[0] << 24;
1736 break;
1737 case 14:
1738 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 2);
1739 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 2);
1740 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 2);
1741 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 2);
1742 dst0[3] = src_l0[3] | src_r0[0] << 16;
1743 break;
1744 case 13:
1745 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 3);
1746 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 3);
1747 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 3);
1748 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 3);
1749 dst0[3] = src_l0[3] | src_r0[0] << 8;
1750 break;
1751 case 12:
1752 dst1[3] = src_r1[0];
1753 dst1[2] = src_r0[3];
1754 dst1[1] = src_r0[2];
1755 dst1[0] = src_r0[1];
1756 dst0[3] = src_r0[0];
1757 break;
1758 case 11:
1759 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 1);
1760 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 1);
1761 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 1);
1762 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 1);
1763 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
1764 dst0[2] = src_l0[2] | src_r0[0] << 24;
1765 break;
1766 case 10:
1767 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 2);
1768 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 2);
1769 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 2);
1770 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 2);
1771 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
1772 dst0[2] = src_l0[2] | src_r0[0] << 16;
1773 break;
1774 case 9:
1775 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 3);
1776 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 3);
1777 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 3);
1778 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 3);
1779 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
1780 dst0[2] = src_l0[2] | src_r0[0] << 8;
1781 break;
1782 case 8:
1783 dst1[3] = src_r1[1];
1784 dst1[2] = src_r1[0];
1785 dst1[1] = src_r0[3];
1786 dst1[0] = src_r0[2];
1787 dst0[3] = src_r0[1];
1788 dst0[2] = src_r0[0];
1789 break;
1790 case 7:
1791 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 1);
1792 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 1);
1793 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 1);
1794 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 1);
1795 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
1796 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
1797 dst0[1] = src_l0[1] | src_r0[0] << 24;
1798 break;
1799 case 6:
1800 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 2);
1801 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 2);
1802 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 2);
1803 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 2);
1804 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
1805 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
1806 dst0[1] = src_l0[1] | src_r0[0] << 16;
1807 break;
1808 case 5:
1809 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 3);
1810 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 3);
1811 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 3);
1812 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 3);
1813 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
1814 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
1815 dst0[1] = src_l0[1] | src_r0[0] << 8;
1816 break;
1817 case 4:
1818 dst1[3] = src_r1[2];
1819 dst1[2] = src_r1[1];
1820 dst1[1] = src_r1[0];
1821 dst1[0] = src_r0[3];
1822 dst0[3] = src_r0[2];
1823 dst0[2] = src_r0[1];
1824 dst0[1] = src_r0[0];
1825 break;
1826 case 3:
1827 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 1);
1828 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 1);
1829 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 1);
1830 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 1);
1831 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
1832 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
1833 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
1834 dst0[0] = src_l0[0] | src_r0[0] << 24;
1835 break;
1836 case 2:
1837 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 2);
1838 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 2);
1839 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 2);
1840 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 2);
1841 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
1842 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
1843 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
1844 dst0[0] = src_l0[0] | src_r0[0] << 16;
1845 break;
1846 case 1:
1847 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 3);
1848 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 3);
1849 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 3);
1850 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 3);
1851 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
1852 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
1853 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
1854 dst0[0] = src_l0[0] | src_r0[0] << 8;
1855 break;
1856 case 0:
1857 dst1[3] = src_r1[3];
1858 dst1[2] = src_r1[2];
1859 dst1[1] = src_r1[1];
1860 dst1[0] = src_r1[0];
1861 dst0[3] = src_r0[3];
1862 dst0[2] = src_r0[2];
1863 dst0[1] = src_r0[1];
1864 dst0[0] = src_r0[0];
1865 break;
1866 }
1867 #endif
1868 }
1869
1870 static void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
1871 {
1872 rshift_block_N (in0, in1, out0, out1, 32 - len);
1873
1874 u32 tib40[4];
1875 u32 tib41[4];
1876
1877 tib40[0] = out1[3];
1878 tib40[1] = out1[2];
1879 tib40[2] = out1[1];
1880 tib40[3] = out1[0];
1881 tib41[0] = out0[3];
1882 tib41[1] = out0[2];
1883 tib41[2] = out0[1];
1884 tib41[3] = out0[0];
1885
1886 out0[0] = swap32 (tib40[0]);
1887 out0[1] = swap32 (tib40[1]);
1888 out0[2] = swap32 (tib40[2]);
1889 out0[3] = swap32 (tib40[3]);
1890 out1[0] = swap32 (tib41[0]);
1891 out1[1] = swap32 (tib41[1]);
1892 out1[2] = swap32 (tib41[2]);
1893 out1[3] = swap32 (tib41[3]);
1894 }
1895
1896 static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1897 {
1898 buf0[0] |= (generate_cmask (buf0[0]));
1899 buf0[1] |= (generate_cmask (buf0[1]));
1900 buf0[2] |= (generate_cmask (buf0[2]));
1901 buf0[3] |= (generate_cmask (buf0[3]));
1902 buf1[0] |= (generate_cmask (buf1[0]));
1903 buf1[1] |= (generate_cmask (buf1[1]));
1904 buf1[2] |= (generate_cmask (buf1[2]));
1905 buf1[3] |= (generate_cmask (buf1[3]));
1906
1907 return in_len;
1908 }
1909
1910 static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1911 {
1912 buf0[0] &= ~(generate_cmask (buf0[0]));
1913 buf0[1] &= ~(generate_cmask (buf0[1]));
1914 buf0[2] &= ~(generate_cmask (buf0[2]));
1915 buf0[3] &= ~(generate_cmask (buf0[3]));
1916 buf1[0] &= ~(generate_cmask (buf1[0]));
1917 buf1[1] &= ~(generate_cmask (buf1[1]));
1918 buf1[2] &= ~(generate_cmask (buf1[2]));
1919 buf1[3] &= ~(generate_cmask (buf1[3]));
1920
1921 return in_len;
1922 }
1923
1924 static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1925 {
1926 rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len);
1927
1928 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
1929
1930 return in_len;
1931 }
1932
1933 static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1934 {
1935 rule_op_mangle_urest (p0, p1, buf0, buf1, in_len);
1936
1937 buf0[0] |= (0x00000020 & generate_cmask (buf0[0]));
1938
1939 return in_len;
1940 }
1941
1942 static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1943 {
1944 buf0[0] ^= (generate_cmask (buf0[0]));
1945 buf0[1] ^= (generate_cmask (buf0[1]));
1946 buf0[2] ^= (generate_cmask (buf0[2]));
1947 buf0[3] ^= (generate_cmask (buf0[3]));
1948 buf1[0] ^= (generate_cmask (buf1[0]));
1949 buf1[1] ^= (generate_cmask (buf1[1]));
1950 buf1[2] ^= (generate_cmask (buf1[2]));
1951 buf1[3] ^= (generate_cmask (buf1[3]));
1952
1953 return in_len;
1954 }
1955
1956 static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1957 {
1958 if (p0 >= in_len) return (in_len);
1959
1960 const u32 tmp = 0x20u << ((p0 & 3) * 8);
1961
1962 switch (p0 / 4)
1963 {
1964 case 0: buf0[0] ^= (tmp & generate_cmask (buf0[0])); break;
1965 case 1: buf0[1] ^= (tmp & generate_cmask (buf0[1])); break;
1966 case 2: buf0[2] ^= (tmp & generate_cmask (buf0[2])); break;
1967 case 3: buf0[3] ^= (tmp & generate_cmask (buf0[3])); break;
1968 case 4: buf1[0] ^= (tmp & generate_cmask (buf1[0])); break;
1969 case 5: buf1[1] ^= (tmp & generate_cmask (buf1[1])); break;
1970 case 6: buf1[2] ^= (tmp & generate_cmask (buf1[2])); break;
1971 case 7: buf1[3] ^= (tmp & generate_cmask (buf1[3])); break;
1972 }
1973
1974 return in_len;
1975 }
1976
1977 static u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1978 {
1979 reverse_block (buf0, buf1, buf0, buf1, in_len);
1980
1981 return in_len;
1982 }
1983
1984 static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1985 {
1986 if ((in_len + in_len) >= 32) return (in_len);
1987
1988 u32 out_len = in_len;
1989
1990 append_block8 (out_len, buf0, buf1, buf0, buf1, buf0, buf1);
1991
1992 out_len += in_len;
1993
1994 return out_len;
1995 }
1996
1997 static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1998 {
1999 if (((in_len * p0) + in_len) >= 32) return (in_len);
2000
2001 u32 out_len = in_len;
2002
2003 u32 tib40[4];
2004 u32 tib41[4];
2005
2006 tib40[0] = buf0[0];
2007 tib40[1] = buf0[1];
2008 tib40[2] = buf0[2];
2009 tib40[3] = buf0[3];
2010 tib41[0] = buf1[0];
2011 tib41[1] = buf1[1];
2012 tib41[2] = buf1[2];
2013 tib41[3] = buf1[3];
2014
2015 for (u32 i = 0; i < p0; i++)
2016 {
2017 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2018
2019 out_len += in_len;
2020 }
2021
2022 return out_len;
2023 }
2024
2025 static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2026 {
2027 if ((in_len + in_len) >= 32) return (in_len);
2028
2029 u32 out_len = in_len;
2030
2031 u32 tib40[4];
2032 u32 tib41[4];
2033
2034 reverse_block (buf0, buf1, tib40, tib41, out_len);
2035
2036 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2037
2038 out_len += in_len;
2039
2040 return out_len;
2041 }
2042
2043 static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2044 {
2045 if ((in_len + 1) >= 32) return (in_len);
2046
2047 u32 out_len = in_len;
2048
2049 append_block1 (out_len, buf0, buf1, p0);
2050
2051 out_len++;
2052
2053 return out_len;
2054 }
2055
2056 static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2057 {
2058 if ((in_len + 1) >= 32) return (in_len);
2059
2060 u32 out_len = in_len;
2061
2062 rshift_block (buf0, buf1, buf0, buf1);
2063
2064 buf0[0] = buf0[0] | p0;
2065
2066 out_len++;
2067
2068 return out_len;
2069 }
2070
2071 static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2072 {
2073 if (in_len == 0) return (in_len);
2074
2075 const u32 in_len1 = in_len - 1;
2076
2077 const u32 sh = (in_len1 & 3) * 8;
2078
2079 const u32 tmp = (buf0[0] & 0xff) << sh;
2080
2081 lshift_block (buf0, buf1, buf0, buf1);
2082
2083 switch (in_len1 / 4)
2084 {
2085 case 0: buf0[0] |= tmp; break;
2086 case 1: buf0[1] |= tmp; break;
2087 case 2: buf0[2] |= tmp; break;
2088 case 3: buf0[3] |= tmp; break;
2089 case 4: buf1[0] |= tmp; break;
2090 case 5: buf1[1] |= tmp; break;
2091 case 6: buf1[2] |= tmp; break;
2092 case 7: buf1[3] |= tmp; break;
2093 }
2094
2095 return in_len;
2096 }
2097
2098 static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2099 {
2100 if (in_len == 0) return (in_len);
2101
2102 const u32 in_len1 = in_len - 1;
2103
2104 const u32 sh = (in_len1 & 3) * 8;
2105
2106 u32 tmp = 0;
2107
2108 switch (in_len1 / 4)
2109 {
2110 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2111 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2112 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2113 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2114 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2115 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2116 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2117 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2118 }
2119
2120 rshift_block (buf0, buf1, buf0, buf1);
2121
2122 buf0[0] |= tmp;
2123
2124 truncate_right (buf0, buf1, in_len);
2125
2126 return in_len;
2127 }
2128
2129 static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2130 {
2131 if (in_len == 0) return (in_len);
2132
2133 const u32 in_len1 = in_len - 1;
2134
2135 lshift_block (buf0, buf1, buf0, buf1);
2136
2137 return in_len1;
2138 }
2139
2140 static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2141 {
2142 if (in_len == 0) return (in_len);
2143
2144 const u32 in_len1 = in_len - 1;
2145
2146 const u32 tmp = (1 << ((in_len1 & 3) * 8)) - 1;
2147
2148 switch (in_len1 / 4)
2149 {
2150 case 0: buf0[0] &= tmp; break;
2151 case 1: buf0[1] &= tmp; break;
2152 case 2: buf0[2] &= tmp; break;
2153 case 3: buf0[3] &= tmp; break;
2154 case 4: buf1[0] &= tmp; break;
2155 case 5: buf1[1] &= tmp; break;
2156 case 6: buf1[2] &= tmp; break;
2157 case 7: buf1[3] &= tmp; break;
2158 }
2159
2160 return in_len1;
2161 }
2162
2163 static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2164 {
2165 if (p0 >= in_len) return (in_len);
2166
2167 u32 out_len = in_len;
2168
2169 u32 tib40[4];
2170 u32 tib41[4];
2171
2172 lshift_block (buf0, buf1, tib40, tib41);
2173
2174 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2175 const u32 mr = ~ml;
2176
2177 switch (p0 / 4)
2178 {
2179 case 0: buf0[0] = (buf0[0] & ml)
2180 | (tib40[0] & mr);
2181 buf0[1] = tib40[1];
2182 buf0[2] = tib40[2];
2183 buf0[3] = tib40[3];
2184 buf1[0] = tib41[0];
2185 buf1[1] = tib41[1];
2186 buf1[2] = tib41[2];
2187 buf1[3] = tib41[3];
2188 break;
2189 case 1: buf0[1] = (buf0[1] & ml)
2190 | (tib40[1] & mr);
2191 buf0[2] = tib40[2];
2192 buf0[3] = tib40[3];
2193 buf1[0] = tib41[0];
2194 buf1[1] = tib41[1];
2195 buf1[2] = tib41[2];
2196 buf1[3] = tib41[3];
2197 break;
2198 case 2: buf0[2] = (buf0[2] & ml)
2199 | (tib40[2] & mr);
2200 buf0[3] = tib40[3];
2201 buf1[0] = tib41[0];
2202 buf1[1] = tib41[1];
2203 buf1[2] = tib41[2];
2204 buf1[3] = tib41[3];
2205 break;
2206 case 3: buf0[3] = (buf0[3] & ml)
2207 | (tib40[3] & mr);
2208 buf1[0] = tib41[0];
2209 buf1[1] = tib41[1];
2210 buf1[2] = tib41[2];
2211 buf1[3] = tib41[3];
2212 break;
2213 case 4: buf1[0] = (buf1[0] & ml)
2214 | (tib41[0] & mr);
2215 buf1[1] = tib41[1];
2216 buf1[2] = tib41[2];
2217 buf1[3] = tib41[3];
2218 break;
2219 case 5: buf1[1] = (buf1[1] & ml)
2220 | (tib41[1] & mr);
2221 buf1[2] = tib41[2];
2222 buf1[3] = tib41[3];
2223 break;
2224 case 6: buf1[2] = (buf1[2] & ml)
2225 | (tib41[2] & mr);
2226 buf1[3] = tib41[3];
2227 break;
2228 case 7: buf1[3] = (buf1[3] & ml)
2229 | (tib41[3] & mr);
2230 break;
2231 }
2232
2233 out_len--;
2234
2235 return out_len;
2236 }
2237
2238 static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2239 {
2240 if (p0 >= in_len) return (in_len);
2241
2242 if ((p0 + p1) > in_len) return (in_len);
2243
2244 u32 out_len = p1;
2245
2246 lshift_block_N (buf0, buf1, buf0, buf1, p0);
2247
2248 truncate_right (buf0, buf1, out_len);
2249
2250 return out_len;
2251 }
2252
2253 static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2254 {
2255 if (p0 >= in_len) return (in_len);
2256
2257 if ((p0 + p1) > in_len) return (in_len);
2258
2259 u32 out_len = in_len;
2260
2261 u32 tib40[4];
2262 u32 tib41[4];
2263
2264 tib40[0] = 0;
2265 tib40[1] = 0;
2266 tib40[2] = 0;
2267 tib40[3] = 0;
2268 tib41[0] = 0;
2269 tib41[1] = 0;
2270 tib41[2] = 0;
2271 tib41[3] = 0;
2272
2273 lshift_block_N (buf0, buf1, tib40, tib41, p1);
2274
2275 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2276 const u32 mr = ~ml;
2277
2278 switch (p0 / 4)
2279 {
2280 case 0: buf0[0] = (buf0[0] & ml)
2281 | (tib40[0] & mr);
2282 buf0[1] = tib40[1];
2283 buf0[2] = tib40[2];
2284 buf0[3] = tib40[3];
2285 buf1[0] = tib41[0];
2286 buf1[1] = tib41[1];
2287 buf1[2] = tib41[2];
2288 buf1[3] = tib41[3];
2289 break;
2290 case 1: buf0[1] = (buf0[1] & ml)
2291 | (tib40[1] & mr);
2292 buf0[2] = tib40[2];
2293 buf0[3] = tib40[3];
2294 buf1[0] = tib41[0];
2295 buf1[1] = tib41[1];
2296 buf1[2] = tib41[2];
2297 buf1[3] = tib41[3];
2298 break;
2299 case 2: buf0[2] = (buf0[2] & ml)
2300 | (tib40[2] & mr);
2301 buf0[3] = tib40[3];
2302 buf1[0] = tib41[0];
2303 buf1[1] = tib41[1];
2304 buf1[2] = tib41[2];
2305 buf1[3] = tib41[3];
2306 break;
2307 case 3: buf0[3] = (buf0[3] & ml)
2308 | (tib40[3] & mr);
2309 buf1[0] = tib41[0];
2310 buf1[1] = tib41[1];
2311 buf1[2] = tib41[2];
2312 buf1[3] = tib41[3];
2313 break;
2314 case 4: buf1[0] = (buf1[0] & ml)
2315 | (tib41[0] & mr);
2316 buf1[1] = tib41[1];
2317 buf1[2] = tib41[2];
2318 buf1[3] = tib41[3];
2319 break;
2320 case 5: buf1[1] = (buf1[1] & ml)
2321 | (tib41[1] & mr);
2322 buf1[2] = tib41[2];
2323 buf1[3] = tib41[3];
2324 break;
2325 case 6: buf1[2] = (buf1[2] & ml)
2326 | (tib41[2] & mr);
2327 buf1[3] = tib41[3];
2328 break;
2329 case 7: buf1[3] = (buf1[3] & ml)
2330 | (tib41[3] & mr);
2331 break;
2332 }
2333
2334 out_len -= p1;
2335
2336 return out_len;
2337 }
2338
2339 static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2340 {
2341 if (p0 > in_len) return (in_len);
2342
2343 if ((in_len + 1) >= 32) return (in_len);
2344
2345 u32 out_len = in_len;
2346
2347 u32 tib40[4];
2348 u32 tib41[4];
2349
2350 rshift_block (buf0, buf1, tib40, tib41);
2351
2352 const u32 p1n = p1 << ((p0 & 3) * 8);
2353
2354 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2355
2356 const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
2357
2358 switch (p0 / 4)
2359 {
2360 case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr);
2361 buf0[1] = tib40[1];
2362 buf0[2] = tib40[2];
2363 buf0[3] = tib40[3];
2364 buf1[0] = tib41[0];
2365 buf1[1] = tib41[1];
2366 buf1[2] = tib41[2];
2367 buf1[3] = tib41[3];
2368 break;
2369 case 1: buf0[1] = (buf0[1] & ml) | p1n | (tib40[1] & mr);
2370 buf0[2] = tib40[2];
2371 buf0[3] = tib40[3];
2372 buf1[0] = tib41[0];
2373 buf1[1] = tib41[1];
2374 buf1[2] = tib41[2];
2375 buf1[3] = tib41[3];
2376 break;
2377 case 2: buf0[2] = (buf0[2] & ml) | p1n | (tib40[2] & mr);
2378 buf0[3] = tib40[3];
2379 buf1[0] = tib41[0];
2380 buf1[1] = tib41[1];
2381 buf1[2] = tib41[2];
2382 buf1[3] = tib41[3];
2383 break;
2384 case 3: buf0[3] = (buf0[3] & ml) | p1n | (tib40[3] & mr);
2385 buf1[0] = tib41[0];
2386 buf1[1] = tib41[1];
2387 buf1[2] = tib41[2];
2388 buf1[3] = tib41[3];
2389 break;
2390 case 4: buf1[0] = (buf1[0] & ml) | p1n | (tib41[0] & mr);
2391 buf1[1] = tib41[1];
2392 buf1[2] = tib41[2];
2393 buf1[3] = tib41[3];
2394 break;
2395 case 5: buf1[1] = (buf1[1] & ml) | p1n | (tib41[1] & mr);
2396 buf1[2] = tib41[2];
2397 buf1[3] = tib41[3];
2398 break;
2399 case 6: buf1[2] = (buf1[2] & ml) | p1n | (tib41[2] & mr);
2400 buf1[3] = tib41[3];
2401 break;
2402 case 7: buf1[3] = (buf1[3] & ml) | p1n | (tib41[3] & mr);
2403 break;
2404 }
2405
2406 out_len++;
2407
2408 return out_len;
2409 }
2410
2411 static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2412 {
2413 if (p0 >= in_len) return (in_len);
2414
2415 const u32 p1n = p1 << ((p0 & 3) * 8);
2416
2417 const u32 m = ~(0xffu << ((p0 & 3) * 8));
2418
2419 switch (p0 / 4)
2420 {
2421 case 0: buf0[0] = (buf0[0] & m) | p1n; break;
2422 case 1: buf0[1] = (buf0[1] & m) | p1n; break;
2423 case 2: buf0[2] = (buf0[2] & m) | p1n; break;
2424 case 3: buf0[3] = (buf0[3] & m) | p1n; break;
2425 case 4: buf1[0] = (buf1[0] & m) | p1n; break;
2426 case 5: buf1[1] = (buf1[1] & m) | p1n; break;
2427 case 6: buf1[2] = (buf1[2] & m) | p1n; break;
2428 case 7: buf1[3] = (buf1[3] & m) | p1n; break;
2429 }
2430
2431 return in_len;
2432 }
2433
2434 static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2435 {
2436 if (p0 >= in_len) return (in_len);
2437
2438 truncate_right (buf0, buf1, p0);
2439
2440 return p0;
2441 }
2442
2443 static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2444 {
2445 #ifdef IS_NV
2446 for (u32 i = 0; i < in_len; i++)
2447 {
2448 switch (i)
2449 {
2450 case 0: if ((__byte_perm (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7650);
2451 break;
2452 case 1: if ((__byte_perm (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7604);
2453 break;
2454 case 2: if ((__byte_perm (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7054);
2455 break;
2456 case 3: if ((__byte_perm (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x0654);
2457 break;
2458 case 4: if ((__byte_perm (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7650);
2459 break;
2460 case 5: if ((__byte_perm (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7604);
2461 break;
2462 case 6: if ((__byte_perm (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7054);
2463 break;
2464 case 7: if ((__byte_perm (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x0654);
2465 break;
2466 case 8: if ((__byte_perm (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7650);
2467 break;
2468 case 9: if ((__byte_perm (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7604);
2469 break;
2470 case 10: if ((__byte_perm (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7054);
2471 break;
2472 case 11: if ((__byte_perm (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x0654);
2473 break;
2474 case 12: if ((__byte_perm (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7650);
2475 break;
2476 case 13: if ((__byte_perm (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7604);
2477 break;
2478 case 14: if ((__byte_perm (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7054);
2479 break;
2480 case 15: if ((__byte_perm (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x0654);
2481 break;
2482 case 16: if ((__byte_perm (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7650);
2483 break;
2484 case 17: if ((__byte_perm (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7604);
2485 break;
2486 case 18: if ((__byte_perm (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7054);
2487 break;
2488 case 19: if ((__byte_perm (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x0654);
2489 break;
2490 case 20: if ((__byte_perm (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7650);
2491 break;
2492 case 21: if ((__byte_perm (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7604);
2493 break;
2494 case 22: if ((__byte_perm (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7054);
2495 break;
2496 case 23: if ((__byte_perm (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x0654);
2497 break;
2498 case 24: if ((__byte_perm (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7650);
2499 break;
2500 case 25: if ((__byte_perm (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7604);
2501 break;
2502 case 26: if ((__byte_perm (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7054);
2503 break;
2504 case 27: if ((__byte_perm (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x0654);
2505 break;
2506 case 28: if ((__byte_perm (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7650);
2507 break;
2508 case 29: if ((__byte_perm (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7604);
2509 break;
2510 case 30: if ((__byte_perm (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7054);
2511 break;
2512 case 31: if ((__byte_perm (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x0654);
2513 break;
2514 }
2515 }
2516 #endif
2517
2518 #if defined IS_AMD || defined IS_GENERIC
2519 const uchar4 tmp0 = (uchar4) (p0);
2520 const uchar4 tmp1 = (uchar4) (p1);
2521
2522 uchar4 tmp;
2523
2524 tmp = as_uchar4 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint (tmp);
2525 tmp = as_uchar4 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint (tmp);
2526 tmp = as_uchar4 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint (tmp);
2527 tmp = as_uchar4 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint (tmp);
2528 tmp = as_uchar4 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint (tmp);
2529 tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp);
2530 tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp);
2531 tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp);
2532 #endif
2533
2534 return in_len;
2535 }
2536
2537 static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2538 {
2539 // TODO
2540 return in_len;
2541 }
2542
2543 static u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2544 {
2545 // TODO
2546 return in_len;
2547 }
2548
2549 static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2550 {
2551 if ( in_len == 0) return (in_len);
2552 if ((in_len + p0) >= 32) return (in_len);
2553
2554 u32 out_len = in_len;
2555
2556 const u32 tmp = buf0[0] & 0xFF;
2557
2558 rshift_block_N (buf0, buf1, buf0, buf1, p0);
2559
2560 #ifdef IS_NV
2561 switch (p0)
2562 {
2563 case 1: buf0[0] |= tmp;
2564 break;
2565 case 2: buf0[0] |= __byte_perm (tmp, 0, 0x5400);
2566 break;
2567 case 3: buf0[0] |= __byte_perm (tmp, 0, 0x4000);
2568 break;
2569 case 4: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2570 break;
2571 case 5: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2572 buf0[1] |= tmp;
2573 break;
2574 case 6: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2575 buf0[1] |= __byte_perm (tmp, 0, 0x5400);
2576 break;
2577 case 7: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2578 buf0[1] |= __byte_perm (tmp, 0, 0x4000);
2579 break;
2580 case 8: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2581 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2582 break;
2583 case 9: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2584 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2585 buf0[2] |= tmp;
2586 break;
2587 case 10: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2588 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2589 buf0[2] |= __byte_perm (tmp, 0, 0x5400);
2590 break;
2591 case 11: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2592 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2593 buf0[2] |= __byte_perm (tmp, 0, 0x4000);
2594 break;
2595 case 12: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2596 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2597 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2598 break;
2599 case 13: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2600 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2601 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2602 buf0[3] |= tmp;
2603 break;
2604 case 14: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2605 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2606 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2607 buf0[3] |= __byte_perm (tmp, 0, 0x5400);
2608 break;
2609 case 15: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2610 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2611 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2612 buf0[3] |= __byte_perm (tmp, 0, 0x4000);
2613 break;
2614 case 16: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2615 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2616 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2617 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2618 break;
2619 case 17: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2620 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2621 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2622 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2623 buf1[0] |= tmp;
2624 break;
2625 case 18: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2626 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2627 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2628 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2629 buf1[0] |= __byte_perm (tmp, 0, 0x5400);
2630 break;
2631 case 19: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2632 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2633 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2634 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2635 buf1[0] |= __byte_perm (tmp, 0, 0x4000);
2636 break;
2637 case 20: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2638 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2639 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2640 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2641 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2642 break;
2643 case 21: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2644 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2645 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2646 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2647 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2648 buf1[1] |= tmp;
2649 break;
2650 case 22: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2651 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2652 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2653 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2654 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2655 buf1[1] |= __byte_perm (tmp, 0, 0x5400);
2656 break;
2657 case 23: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2658 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2659 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2660 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2661 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2662 buf1[1] |= __byte_perm (tmp, 0, 0x4000);
2663 break;
2664 case 24: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2665 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2666 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2667 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2668 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2669 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2670 break;
2671 case 25: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2672 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2673 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2674 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2675 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2676 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2677 buf1[2] |= tmp;
2678 break;
2679 case 26: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2680 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2681 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2682 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2683 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2684 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2685 buf1[2] |= __byte_perm (tmp, 0, 0x5400);
2686 break;
2687 case 27: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2688 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2689 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2690 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2691 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2692 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2693 buf1[2] |= __byte_perm (tmp, 0, 0x4000);
2694 break;
2695 case 28: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2696 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2697 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2698 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2699 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2700 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2701 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2702 break;
2703 case 29: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2704 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2705 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2706 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2707 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2708 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2709 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2710 buf1[3] |= tmp;
2711 break;
2712 case 30: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2713 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2714 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2715 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2716 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2717 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2718 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2719 buf1[3] |= __byte_perm (tmp, 0, 0x5400);
2720 break;
2721 case 31: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2722 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2723 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2724 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2725 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2726 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2727 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2728 buf1[3] |= __byte_perm (tmp, 0, 0x4000);
2729 break;
2730 }
2731 #endif
2732
2733 #if defined IS_AMD || defined IS_GENERIC
2734 switch (p0)
2735 {
2736 case 1: buf0[0] |= tmp << 0;
2737 break;
2738 case 2: buf0[0] |= tmp << 0 | tmp << 8;
2739 break;
2740 case 3: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2741 break;
2742 case 4: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2743 break;
2744 case 5: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2745 buf0[1] |= tmp << 0;
2746 break;
2747 case 6: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2748 buf0[1] |= tmp << 0 | tmp << 8;
2749 break;
2750 case 7: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2751 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2752 break;
2753 case 8: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2754 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2755 break;
2756 case 9: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2757 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2758 buf0[2] |= tmp << 0;
2759 break;
2760 case 10: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2761 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2762 buf0[2] |= tmp << 0 | tmp << 8;
2763 break;
2764 case 11: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2765 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2766 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2767 break;
2768 case 12: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2769 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2770 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2771 break;
2772 case 13: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2773 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2774 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2775 buf0[3] |= tmp << 0;
2776 break;
2777 case 14: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2778 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2779 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2780 buf0[3] |= tmp << 0 | tmp << 8;
2781 break;
2782 case 15: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2783 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2784 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2785 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2786 break;
2787 case 16: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2788 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2789 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2790 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2791 break;
2792 case 17: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2793 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2794 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2795 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2796 buf1[0] |= tmp << 0;
2797 break;
2798 case 18: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2799 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2800 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2801 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2802 buf1[0] |= tmp << 0 | tmp << 8;
2803 break;
2804 case 19: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2805 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2806 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2807 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2808 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2809 break;
2810 case 20: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2811 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2812 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2813 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2814 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2815 break;
2816 case 21: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2817 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2818 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2819 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2820 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2821 buf1[1] |= tmp << 0;
2822 break;
2823 case 22: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2824 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2825 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2826 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2827 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2828 buf1[1] |= tmp << 0 | tmp << 8;
2829 break;
2830 case 23: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2831 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2832 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2833 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2834 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2835 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2836 break;
2837 case 24: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2838 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2839 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2840 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2841 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2842 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2843 break;
2844 case 25: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2845 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2846 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2847 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2848 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2849 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2850 buf1[2] |= tmp << 0;
2851 break;
2852 case 26: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2853 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2854 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2855 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2856 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2857 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2858 buf1[2] |= tmp << 0 | tmp << 8;
2859 break;
2860 case 27: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2861 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2862 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2863 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2864 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2865 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2866 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2867 break;
2868 case 28: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2869 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2870 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2871 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2872 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2873 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2874 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2875 break;
2876 case 29: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2877 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2878 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2879 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2880 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2881 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2882 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2883 buf1[3] |= tmp << 0;
2884 break;
2885 case 30: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2886 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2887 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2888 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2889 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2890 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2891 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2892 buf1[3] |= tmp << 0 | tmp << 8;
2893 break;
2894 case 31: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2895 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2896 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2897 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2898 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2899 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2900 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2901 buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2902 break;
2903 }
2904 #endif
2905
2906 out_len += p0;
2907
2908 return out_len;
2909 }
2910
2911 static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2912 {
2913 if ( in_len == 0) return (in_len);
2914 if ((in_len + p0) >= 32) return (in_len);
2915
2916 const u32 in_len1 = in_len - 1;
2917
2918 const u32 sh = (in_len1 & 3) * 8;
2919
2920 u32 tmp = 0;
2921
2922 switch (in_len1 / 4)
2923 {
2924 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2925 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2926 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2927 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2928 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2929 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2930 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2931 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2932 }
2933
2934 u32 out_len = in_len;
2935
2936 for (u32 i = 0; i < p0; i++)
2937 {
2938 append_block1 (out_len, buf0, buf1, tmp);
2939
2940 out_len++;
2941 }
2942
2943 return out_len;
2944 }
2945
2946 static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2947 {
2948 if ( in_len == 0) return (in_len);
2949 if ((in_len + in_len) >= 32) return (in_len);
2950
2951 u32 out_len = in_len;
2952
2953 u32 tib40[4];
2954 u32 tib41[4];
2955
2956 #ifdef IS_NV
2957 tib40[0] = __byte_perm (buf0[0], 0, 0x1100);
2958 tib40[1] = __byte_perm (buf0[0], 0, 0x3322);
2959 tib40[2] = __byte_perm (buf0[1], 0, 0x1100);
2960 tib40[3] = __byte_perm (buf0[1], 0, 0x3322);
2961 tib41[0] = __byte_perm (buf0[2], 0, 0x1100);
2962 tib41[1] = __byte_perm (buf0[2], 0, 0x3322);
2963 tib41[2] = __byte_perm (buf0[3], 0, 0x1100);
2964 tib41[3] = __byte_perm (buf0[3], 0, 0x3322);
2965
2966 buf0[0] = tib40[0];
2967 buf0[1] = tib40[1];
2968 buf0[2] = tib40[2];
2969 buf0[3] = tib40[3];
2970 buf1[0] = tib41[0];
2971 buf1[1] = tib41[1];
2972 buf1[2] = tib41[2];
2973 buf1[3] = tib41[3];
2974 #endif
2975
2976 #if defined IS_AMD || defined IS_GENERIC
2977 tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8);
2978 tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8);
2979 tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8);
2980 tib40[3] = ((buf0[1] & 0x00FF0000) >> 16) | ((buf0[1] & 0xFF000000) >> 8);
2981 tib41[0] = ((buf0[2] & 0x000000FF) << 0) | ((buf0[2] & 0x0000FF00) << 8);
2982 tib41[1] = ((buf0[2] & 0x00FF0000) >> 16) | ((buf0[2] & 0xFF000000) >> 8);
2983 tib41[2] = ((buf0[3] & 0x000000FF) << 0) | ((buf0[3] & 0x0000FF00) << 8);
2984 tib41[3] = ((buf0[3] & 0x00FF0000) >> 16) | ((buf0[3] & 0xFF000000) >> 8);
2985
2986 buf0[0] = tib40[0] | (tib40[0] << 8);
2987 buf0[1] = tib40[1] | (tib40[1] << 8);
2988 buf0[2] = tib40[2] | (tib40[2] << 8);
2989 buf0[3] = tib40[3] | (tib40[3] << 8);
2990 buf1[0] = tib41[0] | (tib41[0] << 8);
2991 buf1[1] = tib41[1] | (tib41[1] << 8);
2992 buf1[2] = tib41[2] | (tib41[2] << 8);
2993 buf1[3] = tib41[3] | (tib41[3] << 8);
2994 #endif
2995
2996 out_len = out_len + out_len;
2997
2998 return out_len;
2999 }
3000
3001 static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3002 {
3003 if (in_len < 2) return (in_len);
3004
3005 #ifdef IS_NV
3006 buf0[0] = __byte_perm (buf0[0], 0, 0x3201);
3007 #endif
3008
3009 #if defined IS_AMD || defined IS_GENERIC
3010 buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3011 #endif
3012
3013 return in_len;
3014 }
3015
3016 static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3017 {
3018 if (in_len < 2) return (in_len);
3019
3020 #ifdef IS_NV
3021 switch (in_len)
3022 {
3023 case 2: buf0[0] = __byte_perm (buf0[0], 0, 0x5401);
3024 break;
3025 case 3: buf0[0] = __byte_perm (buf0[0], 0, 0x4120);
3026 break;
3027 case 4: buf0[0] = __byte_perm (buf0[0], 0, 0x2310);
3028 break;
3029 case 5: buf0[1] = __byte_perm (buf0[1], buf0[0], 0x7210);
3030 buf0[0] = __byte_perm (buf0[0], buf0[1], 0x4210);
3031 buf0[1] = __byte_perm (buf0[1], 0, 0x6543);
3032 break;
3033 case 6: buf0[1] = __byte_perm (buf0[1], 0, 0x5401);
3034 break;
3035 case 7: buf0[1] = __byte_perm (buf0[1], 0, 0x4120);
3036 break;
3037 case 8: buf0[1] = __byte_perm (buf0[1], 0, 0x2310);
3038 break;
3039 case 9: buf0[2] = __byte_perm (buf0[2], buf0[1], 0x7210);
3040 buf0[1] = __byte_perm (buf0[1], buf0[2], 0x4210);
3041 buf0[2] = __byte_perm (buf0[2], 0, 0x6543);
3042 break;
3043 case 10: buf0[2] = __byte_perm (buf0[2], 0, 0x5401);
3044 break;
3045 case 11: buf0[2] = __byte_perm (buf0[2], 0, 0x4120);
3046 break;
3047 case 12: buf0[2] = __byte_perm (buf0[2], 0, 0x2310);
3048 break;
3049 case 13: buf0[3] = __byte_perm (buf0[3], buf0[2], 0x7210);
3050 buf0[2] = __byte_perm (buf0[2], buf0[3], 0x4210);
3051 buf0[3] = __byte_perm (buf0[3], 0, 0x6543);
3052 break;
3053 case 14: buf0[3] = __byte_perm (buf0[3], 0, 0x5401);
3054 break;
3055 case 15: buf0[3] = __byte_perm (buf0[3], 0, 0x4120);
3056 break;
3057 case 16: buf0[3] = __byte_perm (buf0[3], 0, 0x2310);
3058 break;
3059 case 17: buf1[0] = __byte_perm (buf1[0], buf0[3], 0x7210);
3060 buf0[3] = __byte_perm (buf0[3], buf1[0], 0x4210);
3061 buf1[0] = __byte_perm (buf1[0], 0, 0x6543);
3062 break;
3063 case 18: buf1[0] = __byte_perm (buf1[0], 0, 0x5401);
3064 break;
3065 case 19: buf1[0] = __byte_perm (buf1[0], 0, 0x4120);
3066 break;
3067 case 20: buf1[0] = __byte_perm (buf1[0], 0, 0x2310);
3068 break;
3069 case 21: buf1[1] = __byte_perm (buf1[1], buf1[0], 0x7210);
3070 buf1[0] = __byte_perm (buf1[0], buf1[1], 0x4210);
3071 buf1[1] = __byte_perm (buf1[1], 0, 0x6543);
3072 break;
3073 case 22: buf1[1] = __byte_perm (buf1[1], 0, 0x5401);
3074 break;
3075 case 23: buf1[1] = __byte_perm (buf1[1], 0, 0x4120);
3076 break;
3077 case 24: buf1[1] = __byte_perm (buf1[1], 0, 0x2310);
3078 break;
3079 case 25: buf1[2] = __byte_perm (buf1[2], buf1[1], 0x7210);
3080 buf1[1] = __byte_perm (buf1[1], buf1[2], 0x4210);
3081 buf1[2] = __byte_perm (buf1[2], 0, 0x6543);
3082 break;
3083 case 26: buf1[2] = __byte_perm (buf1[2], 0, 0x5401);
3084 break;
3085 case 27: buf1[2] = __byte_perm (buf1[2], 0, 0x4120);
3086 break;
3087 case 28: buf1[2] = __byte_perm (buf1[2], 0, 0x2310);
3088 break;
3089 case 29: buf1[3] = __byte_perm (buf1[3], buf1[2], 0x7210);
3090 buf1[2] = __byte_perm (buf1[2], buf1[3], 0x4210);
3091 buf1[3] = __byte_perm (buf1[3], 0, 0x6543);
3092 break;
3093 case 30: buf1[3] = __byte_perm (buf1[3], 0, 0x5401);
3094 break;
3095 case 31: buf1[3] = __byte_perm (buf1[3], 0, 0x4120);
3096 break;
3097 }
3098 #endif
3099
3100 #if defined IS_AMD || defined IS_GENERIC
3101 switch (in_len)
3102 {
3103 case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3104 break;
3105 case 3: buf0[0] = (buf0[0] & 0x000000FF) | ((buf0[0] << 8) & 0x00FF0000) | ((buf0[0] >> 8) & 0x0000FF00);
3106 break;
3107 case 4: buf0[0] = (buf0[0] & 0x0000FFFF) | ((buf0[0] << 8) & 0xFF000000) | ((buf0[0] >> 8) & 0x00FF0000);
3108 break;
3109 case 5: buf0[1] = (buf0[0] & 0xFF000000) | buf0[1];
3110 buf0[0] = (buf0[0] & 0x00FFFFFF) | (buf0[1] << 24);
3111 buf0[1] = (buf0[1] >> 24);
3112 break;
3113 case 6: buf0[1] = ((buf0[1] << 8) & 0x0000FF00) | ((buf0[1] >> 8) & 0x000000FF);
3114 break;
3115 case 7: buf0[1] = (buf0[1] & 0x000000FF) | ((buf0[1] << 8) & 0x00FF0000) | ((buf0[1] >> 8) & 0x0000FF00);
3116 break;
3117 case 8: buf0[1] = (buf0[1] & 0x0000FFFF) | ((buf0[1] << 8) & 0xFF000000) | ((buf0[1] >> 8) & 0x00FF0000);
3118 break;
3119 case 9: buf0[2] = (buf0[1] & 0xFF000000) | buf0[2];
3120 buf0[1] = (buf0[1] & 0x00FFFFFF) | (buf0[2] << 24);
3121 buf0[2] = (buf0[2] >> 24);
3122 break;
3123 case 10: buf0[2] = ((buf0[2] << 8) & 0x0000FF00) | ((buf0[2] >> 8) & 0x000000FF);
3124 break;
3125 case 11: buf0[2] = (buf0[2] & 0x000000FF) | ((buf0[2] << 8) & 0x00FF0000) | ((buf0[2] >> 8) & 0x0000FF00);
3126 break;
3127 case 12: buf0[2] = (buf0[2] & 0x0000FFFF) | ((buf0[2] << 8) & 0xFF000000) | ((buf0[2] >> 8) & 0x00FF0000);
3128 break;
3129 case 13: buf0[3] = (buf0[2] & 0xFF000000) | buf0[3];
3130 buf0[2] = (buf0[2] & 0x00FFFFFF) | (buf0[3] << 24);
3131 buf0[3] = (buf0[3] >> 24);
3132 break;
3133 case 14: buf0[3] = ((buf0[3] << 8) & 0x0000FF00) | ((buf0[3] >> 8) & 0x000000FF);
3134 break;
3135 case 15: buf0[3] = (buf0[3] & 0x000000FF) | ((buf0[3] << 8) & 0x00FF0000) | ((buf0[3] >> 8) & 0x0000FF00);
3136 break;
3137 case 16: buf0[3] = (buf0[3] & 0x0000FFFF) | ((buf0[3] << 8) & 0xFF000000) | ((buf0[3] >> 8) & 0x00FF0000);
3138 break;
3139 case 17: buf1[0] = (buf0[3] & 0xFF000000) | buf1[0];
3140 buf0[3] = (buf0[3] & 0x00FFFFFF) | (buf1[0] << 24);
3141 buf1[0] = (buf1[0] >> 24);
3142 break;
3143 case 18: buf1[0] = ((buf1[0] << 8) & 0x0000FF00) | ((buf1[0] >> 8) & 0x000000FF);
3144 break;
3145 case 19: buf1[0] = (buf1[0] & 0x000000FF) | ((buf1[0] << 8) & 0x00FF0000) | ((buf1[0] >> 8) & 0x0000FF00);
3146 break;
3147 case 20: buf1[0] = (buf1[0] & 0x0000FFFF) | ((buf1[0] << 8) & 0xFF000000) | ((buf1[0] >> 8) & 0x00FF0000);
3148 break;
3149 case 21: buf1[1] = (buf1[0] & 0xFF000000) | buf1[1];
3150 buf1[0] = (buf1[0] & 0x00FFFFFF) | (buf1[1] << 24);
3151 buf1[1] = (buf1[1] >> 24);
3152 break;
3153 case 22: buf1[1] = ((buf1[1] << 8) & 0x0000FF00) | ((buf1[1] >> 8) & 0x000000FF);
3154 break;
3155 case 23: buf1[1] = (buf1[1] & 0x000000FF) | ((buf1[1] << 8) & 0x00FF0000) | ((buf1[1] >> 8) & 0x0000FF00);
3156 break;
3157 case 24: buf1[1] = (buf1[1] & 0x0000FFFF) | ((buf1[1] << 8) & 0xFF000000) | ((buf1[1] >> 8) & 0x00FF0000);
3158 break;
3159 case 25: buf1[2] = (buf1[1] & 0xFF000000) | buf1[2];
3160 buf1[1] = (buf1[1] & 0x00FFFFFF) | (buf1[2] << 24);
3161 buf1[2] = (buf1[2] >> 24);
3162 break;
3163 case 26: buf1[2] = ((buf1[2] << 8) & 0x0000FF00) | ((buf1[2] >> 8) & 0x000000FF);
3164 break;
3165 case 27: buf1[2] = (buf1[2] & 0x000000FF) | ((buf1[2] << 8) & 0x00FF0000) | ((buf1[2] >> 8) & 0x0000FF00);
3166 break;
3167 case 28: buf1[2] = (buf1[2] & 0x0000FFFF) | ((buf1[2] << 8) & 0xFF000000) | ((buf1[2] >> 8) & 0x00FF0000);
3168 break;
3169 case 29: buf1[3] = (buf1[2] & 0xFF000000) | buf1[3];
3170 buf1[2] = (buf1[2] & 0x00FFFFFF) | (buf1[3] << 24);
3171 buf1[3] = (buf1[3] >> 24);
3172 break;
3173 case 30: buf1[3] = ((buf1[3] << 8) & 0x0000FF00) | ((buf1[3] >> 8) & 0x000000FF);
3174 break;
3175 case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00);
3176 break;
3177 }
3178 #endif
3179
3180 return in_len;
3181 }
3182
3183 static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3184 {
3185 if (p0 >= in_len) return (in_len);
3186 if (p1 >= in_len) return (in_len);
3187
3188 u32 tmp0 = 0;
3189 u32 tmp1 = 0;
3190
3191 #ifdef IS_NV
3192 switch (p0)
3193 {
3194 case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540);
3195 break;
3196 case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541);
3197 break;
3198 case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542);
3199 break;
3200 case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543);
3201 break;
3202 case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540);
3203 break;
3204 case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541);
3205 break;
3206 case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542);
3207 break;
3208 case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543);
3209 break;
3210 case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540);
3211 break;
3212 case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541);
3213 break;
3214 case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542);
3215 break;
3216 case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543);
3217 break;
3218 case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540);
3219 break;
3220 case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541);
3221 break;
3222 case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542);
3223 break;
3224 case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543);
3225 break;
3226 case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540);
3227 break;
3228 case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541);
3229 break;
3230 case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542);
3231 break;
3232 case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543);
3233 break;
3234 case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540);
3235 break;
3236 case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541);
3237 break;
3238 case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542);
3239 break;
3240 case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543);
3241 break;
3242 case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540);
3243 break;
3244 case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541);
3245 break;
3246 case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542);
3247 break;
3248 case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543);
3249 break;
3250 case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540);
3251 break;
3252 case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541);
3253 break;
3254 case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542);
3255 break;
3256 case 31: tmp0 = __byte_perm (buf1[3], 0, 0x6543);
3257 break;
3258 }
3259
3260 switch (p1)
3261 {
3262 case 0: tmp1 = __byte_perm (buf0[0], 0, 0x6540);
3263 buf0[0] = __byte_perm (tmp0, buf0[0], 0x7650);
3264 break;
3265 case 1: tmp1 = __byte_perm (buf0[0], 0, 0x6541);
3266 buf0[0] = __byte_perm (tmp0, buf0[0], 0x7604);
3267 break;
3268 case 2: tmp1 = __byte_perm (buf0[0], 0, 0x6542);
3269 buf0[0] = __byte_perm (tmp0, buf0[0], 0x7054);
3270 break;
3271 case 3: tmp1 = __byte_perm (buf0[0], 0, 0x6543);
3272 buf0[0] = __byte_perm (tmp0, buf0[0], 0x0654);
3273 break;
3274 case 4: tmp1 = __byte_perm (buf0[1], 0, 0x6540);
3275 buf0[1] = __byte_perm (tmp0, buf0[1], 0x7650);
3276 break;
3277 case 5: tmp1 = __byte_perm (buf0[1], 0, 0x6541);
3278 buf0[1] = __byte_perm (tmp0, buf0[1], 0x7604);
3279 break;
3280 case 6: tmp1 = __byte_perm (buf0[1], 0, 0x6542);
3281 buf0[1] = __byte_perm (tmp0, buf0[1], 0x7054);
3282 break;
3283 case 7: tmp1 = __byte_perm (buf0[1], 0, 0x6543);
3284 buf0[1] = __byte_perm (tmp0, buf0[1], 0x0654);
3285 break;
3286 case 8: tmp1 = __byte_perm (buf0[2], 0, 0x6540);
3287 buf0[2] = __byte_perm (tmp0, buf0[2], 0x7650);
3288 break;
3289 case 9: tmp1 = __byte_perm (buf0[2], 0, 0x6541);
3290 buf0[2] = __byte_perm (tmp0, buf0[2], 0x7604);
3291 break;
3292 case 10: tmp1 = __byte_perm (buf0[2], 0, 0x6542);
3293 buf0[2] = __byte_perm (tmp0, buf0[2], 0x7054);
3294 break;
3295 case 11: tmp1 = __byte_perm (buf0[2], 0, 0x6543);
3296 buf0[2] = __byte_perm (tmp0, buf0[2], 0x0654);
3297 break;
3298 case 12: tmp1 = __byte_perm (buf0[3], 0, 0x6540);
3299 buf0[3] = __byte_perm (tmp0, buf0[3], 0x7650);
3300 break;
3301 case 13: tmp1 = __byte_perm (buf0[3], 0, 0x6541);
3302 buf0[3] = __byte_perm (tmp0, buf0[3], 0x7604);
3303 break;
3304 case 14: tmp1 = __byte_perm (buf0[3], 0, 0x6542);
3305 buf0[3] = __byte_perm (tmp0, buf0[3], 0x7054);
3306 break;
3307 case 15: tmp1 = __byte_perm (buf0[3], 0, 0x6543);
3308 buf0[3] = __byte_perm (tmp0, buf0[3], 0x0654);
3309 break;
3310 case 16: tmp1 = __byte_perm (buf1[0], 0, 0x6540);
3311 buf1[0] = __byte_perm (tmp0, buf1[0], 0x7650);
3312 break;
3313 case 17: tmp1 = __byte_perm (buf1[0], 0, 0x6541);
3314 buf1[0] = __byte_perm (tmp0, buf1[0], 0x7604);
3315 break;
3316 case 18: tmp1 = __byte_perm (buf1[0], 0, 0x6542);
3317 buf1[0] = __byte_perm (tmp0, buf1[0], 0x7054);
3318 break;
3319 case 19: tmp1 = __byte_perm (buf1[0], 0, 0x6543);
3320 buf1[0] = __byte_perm (tmp0, buf1[0], 0x0654);
3321 break;
3322 case 20: tmp1 = __byte_perm (buf1[1], 0, 0x6540);
3323 buf1[1] = __byte_perm (tmp0, buf1[1], 0x7650);
3324 break;
3325 case 21: tmp1 = __byte_perm (buf1[1], 0, 0x6541);
3326 buf1[1] = __byte_perm (tmp0, buf1[1], 0x7604);
3327 break;
3328 case 22: tmp1 = __byte_perm (buf1[1], 0, 0x6542);
3329 buf1[1] = __byte_perm (tmp0, buf1[1], 0x7054);
3330 break;
3331 case 23: tmp1 = __byte_perm (buf1[1], 0, 0x6543);
3332 buf1[1] = __byte_perm (tmp0, buf1[1], 0x0654);
3333 break;
3334 case 24: tmp1 = __byte_perm (buf1[2], 0, 0x6540);
3335 buf1[2] = __byte_perm (tmp0, buf1[2], 0x7650);
3336 break;
3337 case 25: tmp1 = __byte_perm (buf1[2], 0, 0x6541);
3338 buf1[2] = __byte_perm (tmp0, buf1[2], 0x7604);
3339 break;
3340 case 26: tmp1 = __byte_perm (buf1[2], 0, 0x6542);
3341 buf1[2] = __byte_perm (tmp0, buf1[2], 0x7054);
3342 break;
3343 case 27: tmp1 = __byte_perm (buf1[2], 0, 0x6543);
3344 buf1[2] = __byte_perm (tmp0, buf1[2], 0x0654);
3345 break;
3346 case 28: tmp1 = __byte_perm (buf1[3], 0, 0x6540);
3347 buf1[3] = __byte_perm (tmp0, buf1[3], 0x7650);
3348 break;
3349 case 29: tmp1 = __byte_perm (buf1[3], 0, 0x6541);
3350 buf1[3] = __byte_perm (tmp0, buf1[3], 0x7604);
3351 break;
3352 case 30: tmp1 = __byte_perm (buf1[3], 0, 0x6542);
3353 buf1[3] = __byte_perm (tmp0, buf1[3], 0x7054);
3354 break;
3355 case 31: tmp1 = __byte_perm (buf1[3], 0, 0x6543);
3356 buf1[3] = __byte_perm (tmp0, buf1[3], 0x0654);
3357 break;
3358 }
3359
3360 switch (p0)
3361 {
3362 case 0: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7650);
3363 break;
3364 case 1: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7604);
3365 break;
3366 case 2: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7054);
3367 break;
3368 case 3: buf0[0] = __byte_perm (tmp1, buf0[0], 0x0654);
3369 break;
3370 case 4: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7650);
3371 break;
3372 case 5: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7604);
3373 break;
3374 case 6: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7054);
3375 break;
3376 case 7: buf0[1] = __byte_perm (tmp1, buf0[1], 0x0654);
3377 break;
3378 case 8: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7650);
3379 break;
3380 case 9: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7604);
3381 break;
3382 case 10: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7054);
3383 break;
3384 case 11: buf0[2] = __byte_perm (tmp1, buf0[2], 0x0654);
3385 break;
3386 case 12: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7650);
3387 break;
3388 case 13: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7604);
3389 break;
3390 case 14: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7054);
3391 break;
3392 case 15: buf0[3] = __byte_perm (tmp1, buf0[3], 0x0654);
3393 break;
3394 case 16: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7650);
3395 break;
3396 case 17: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7604);
3397 break;
3398 case 18: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7054);
3399 break;
3400 case 19: buf1[0] = __byte_perm (tmp1, buf1[0], 0x0654);
3401 break;
3402 case 20: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7650);
3403 break;
3404 case 21: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7604);
3405 break;
3406 case 22: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7054);
3407 break;
3408 case 23: buf1[1] = __byte_perm (tmp1, buf1[1], 0x0654);
3409 break;
3410 case 24: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7650);
3411 break;
3412 case 25: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7604);
3413 break;
3414 case 26: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7054);
3415 break;
3416 case 27: buf1[2] = __byte_perm (tmp1, buf1[2], 0x0654);
3417 break;
3418 case 28: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7650);
3419 break;
3420 case 29: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7604);
3421 break;
3422 case 30: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7054);
3423 break;
3424 case 31: buf1[3] = __byte_perm (tmp1, buf1[3], 0x0654);
3425 break;
3426 }
3427 #endif
3428
3429 #if defined IS_AMD || defined IS_GENERIC
3430 switch (p0)
3431 {
3432 case 0: tmp0 = (buf0[0] >> 0) & 0xFF;
3433 break;
3434 case 1: tmp0 = (buf0[0] >> 8) & 0xFF;
3435 break;
3436 case 2: tmp0 = (buf0[0] >> 16) & 0xFF;
3437 break;
3438 case 3: tmp0 = (buf0[0] >> 24) & 0xFF;
3439 break;
3440 case 4: tmp0 = (buf0[1] >> 0) & 0xFF;
3441 break;
3442 case 5: tmp0 = (buf0[1] >> 8) & 0xFF;
3443 break;
3444 case 6: tmp0 = (buf0[1] >> 16) & 0xFF;
3445 break;
3446 case 7: tmp0 = (buf0[1] >> 24) & 0xFF;
3447 break;
3448 case 8: tmp0 = (buf0[2] >> 0) & 0xFF;
3449 break;
3450 case 9: tmp0 = (buf0[2] >> 8) & 0xFF;
3451 break;
3452 case 10: tmp0 = (buf0[2] >> 16) & 0xFF;
3453 break;
3454 case 11: tmp0 = (buf0[2] >> 24) & 0xFF;
3455 break;
3456 case 12: tmp0 = (buf0[3] >> 0) & 0xFF;
3457 break;
3458 case 13: tmp0 = (buf0[3] >> 8) & 0xFF;
3459 break;
3460 case 14: tmp0 = (buf0[3] >> 16) & 0xFF;
3461 break;
3462 case 15: tmp0 = (buf0[3] >> 24) & 0xFF;
3463 break;
3464 case 16: tmp0 = (buf1[0] >> 0) & 0xFF;
3465 break;
3466 case 17: tmp0 = (buf1[0] >> 8) & 0xFF;
3467 break;
3468 case 18: tmp0 = (buf1[0] >> 16) & 0xFF;
3469 break;
3470 case 19: tmp0 = (buf1[0] >> 24) & 0xFF;
3471 break;
3472 case 20: tmp0 = (buf1[1] >> 0) & 0xFF;
3473 break;
3474 case 21: tmp0 = (buf1[1] >> 8) & 0xFF;
3475 break;
3476 case 22: tmp0 = (buf1[1] >> 16) & 0xFF;
3477 break;
3478 case 23: tmp0 = (buf1[1] >> 24) & 0xFF;
3479 break;
3480 case 24: tmp0 = (buf1[2] >> 0) & 0xFF;
3481 break;
3482 case 25: tmp0 = (buf1[2] >> 8) & 0xFF;
3483 break;
3484 case 26: tmp0 = (buf1[2] >> 16) & 0xFF;
3485 break;
3486 case 27: tmp0 = (buf1[2] >> 24) & 0xFF;
3487 break;
3488 case 28: tmp0 = (buf1[3] >> 0) & 0xFF;
3489 break;
3490 case 29: tmp0 = (buf1[3] >> 8) & 0xFF;
3491 break;
3492 case 30: tmp0 = (buf1[3] >> 16) & 0xFF;
3493 break;
3494 case 31: tmp0 = (buf1[3] >> 24) & 0xFF;
3495 break;
3496 }
3497
3498 switch (p1)
3499 {
3500 case 0: tmp1 = (buf0[0] >> 0) & 0xff;
3501 buf0[0] = (buf0[0] & 0xffffff00) | tmp0 << 0;
3502 break;
3503 case 1: tmp1 = (buf0[0] >> 8) & 0xff;
3504 buf0[0] = (buf0[0] & 0xffff00ff) | tmp0 << 8;
3505 break;
3506 case 2: tmp1 = (buf0[0] >> 16) & 0xff;
3507 buf0[0] = (buf0[0] & 0xff00ffff) | tmp0 << 16;
3508 break;
3509 case 3: tmp1 = (buf0[0] >> 24) & 0xff;
3510 buf0[0] = (buf0[0] & 0x00ffffff) | tmp0 << 24;
3511 break;
3512 case 4: tmp1 = (buf0[1] >> 0) & 0xff;
3513 buf0[1] = (buf0[1] & 0xffffff00) | tmp0 << 0;
3514 break;
3515 case 5: tmp1 = (buf0[1] >> 8) & 0xff;
3516 buf0[1] = (buf0[1] & 0xffff00ff) | tmp0 << 8;
3517 break;
3518 case 6: tmp1 = (buf0[1] >> 16) & 0xff;
3519 buf0[1] = (buf0[1] & 0xff00ffff) | tmp0 << 16;
3520 break;
3521 case 7: tmp1 = (buf0[1] >> 24) & 0xff;
3522 buf0[1] = (buf0[1] & 0x00ffffff) | tmp0 << 24;
3523 break;
3524 case 8: tmp1 = (buf0[2] >> 0) & 0xff;
3525 buf0[2] = (buf0[2] & 0xffffff00) | tmp0 << 0;
3526 break;
3527 case 9: tmp1 = (buf0[2] >> 8) & 0xff;
3528 buf0[2] = (buf0[2] & 0xffff00ff) | tmp0 << 8;
3529 break;
3530 case 10: tmp1 = (buf0[2] >> 16) & 0xff;
3531 buf0[2] = (buf0[2] & 0xff00ffff) | tmp0 << 16;
3532 break;
3533 case 11: tmp1 = (buf0[2] >> 24) & 0xff;
3534 buf0[2] = (buf0[2] & 0x00ffffff) | tmp0 << 24;
3535 break;
3536 case 12: tmp1 = (buf0[3] >> 0) & 0xff;
3537 buf0[3] = (buf0[3] & 0xffffff00) | tmp0 << 0;
3538 break;
3539 case 13: tmp1 = (buf0[3] >> 8) & 0xff;
3540 buf0[3] = (buf0[3] & 0xffff00ff) | tmp0 << 8;
3541 break;
3542 case 14: tmp1 = (buf0[3] >> 16) & 0xff;
3543 buf0[3] = (buf0[3] & 0xff00ffff) | tmp0 << 16;
3544 break;
3545 case 15: tmp1 = (buf0[3] >> 24) & 0xff;
3546 buf0[3] = (buf0[3] & 0x00ffffff) | tmp0 << 24;
3547 break;
3548 case 16: tmp1 = (buf1[0] >> 0) & 0xff;
3549 buf1[0] = (buf1[0] & 0xffffff00) | tmp0 << 0;
3550 break;
3551 case 17: tmp1 = (buf1[0] >> 8) & 0xff;
3552 buf1[0] = (buf1[0] & 0xffff00ff) | tmp0 << 8;
3553 break;
3554 case 18: tmp1 = (buf1[0] >> 16) & 0xff;
3555 buf1[0] = (buf1[0] & 0xff00ffff) | tmp0 << 16;
3556 break;
3557 case 19: tmp1 = (buf1[0] >> 24) & 0xff;
3558 buf1[0] = (buf1[0] & 0x00ffffff) | tmp0 << 24;
3559 break;
3560 case 20: tmp1 = (buf1[1] >> 0) & 0xff;
3561 buf1[1] = (buf1[1] & 0xffffff00) | tmp0 << 0;
3562 break;
3563 case 21: tmp1 = (buf1[1] >> 8) & 0xff;
3564 buf1[1] = (buf1[1] & 0xffff00ff) | tmp0 << 8;
3565 break;
3566 case 22: tmp1 = (buf1[1] >> 16) & 0xff;
3567 buf1[1] = (buf1[1] & 0xff00ffff) | tmp0 << 16;
3568 break;
3569 case 23: tmp1 = (buf1[1] >> 24) & 0xff;
3570 buf1[1] = (buf1[1] & 0x00ffffff) | tmp0 << 24;
3571 break;
3572 case 24: tmp1 = (buf1[2] >> 0) & 0xff;
3573 buf1[2] = (buf1[2] & 0xffffff00) | tmp0 << 0;
3574 break;
3575 case 25: tmp1 = (buf1[2] >> 8) & 0xff;
3576 buf1[2] = (buf1[2] & 0xffff00ff) | tmp0 << 8;
3577 break;
3578 case 26: tmp1 = (buf1[2] >> 16) & 0xff;
3579 buf1[2] = (buf1[2] & 0xff00ffff) | tmp0 << 16;
3580 break;
3581 case 27: tmp1 = (buf1[2] >> 24) & 0xff;
3582 buf1[2] = (buf1[2] & 0x00ffffff) | tmp0 << 24;
3583 break;
3584 case 28: tmp1 = (buf1[3] >> 0) & 0xff;
3585 buf1[3] = (buf1[3] & 0xffffff00) | tmp0 << 0;
3586 break;
3587 case 29: tmp1 = (buf1[3] >> 8) & 0xff;
3588 buf1[3] = (buf1[3] & 0xffff00ff) | tmp0 << 8;
3589 break;
3590 case 30: tmp1 = (buf1[3] >> 16) & 0xff;
3591 buf1[3] = (buf1[3] & 0xff00ffff) | tmp0 << 16;
3592 break;
3593 case 31: tmp1 = (buf1[3] >> 24) & 0xff;
3594 buf1[3] = (buf1[3] & 0x00ffffff) | tmp0 << 24;
3595 break;
3596 }
3597
3598 switch (p0)
3599 {
3600 case 0: buf0[0] = (buf0[0] & 0xffffff00) | tmp1 << 0;
3601 break;
3602 case 1: buf0[0] = (buf0[0] & 0xffff00ff) | tmp1 << 8;
3603 break;
3604 case 2: buf0[0] = (buf0[0] & 0xff00ffff) | tmp1 << 16;
3605 break;
3606 case 3: buf0[0] = (buf0[0] & 0x00ffffff) | tmp1 << 24;
3607 break;
3608 case 4: buf0[1] = (buf0[1] & 0xffffff00) | tmp1 << 0;
3609 break;
3610 case 5: buf0[1] = (buf0[1] & 0xffff00ff) | tmp1 << 8;
3611 break;
3612 case 6: buf0[1] = (buf0[1] & 0xff00ffff) | tmp1 << 16;
3613 break;
3614 case 7: buf0[1] = (buf0[1] & 0x00ffffff) | tmp1 << 24;
3615 break;
3616 case 8: buf0[2] = (buf0[2] & 0xffffff00) | tmp1 << 0;
3617 break;
3618 case 9: buf0[2] = (buf0[2] & 0xffff00ff) | tmp1 << 8;
3619 break;
3620 case 10: buf0[2] = (buf0[2] & 0xff00ffff) | tmp1 << 16;
3621 break;
3622 case 11: buf0[2] = (buf0[2] & 0x00ffffff) | tmp1 << 24;
3623 break;
3624 case 12: buf0[3] = (buf0[3] & 0xffffff00) | tmp1 << 0;
3625 break;
3626 case 13: buf0[3] = (buf0[3] & 0xffff00ff) | tmp1 << 8;
3627 break;
3628 case 14: buf0[3] = (buf0[3] & 0xff00ffff) | tmp1 << 16;
3629 break;
3630 case 15: buf0[3] = (buf0[3] & 0x00ffffff) | tmp1 << 24;
3631 break;
3632 case 16: buf1[0] = (buf1[0] & 0xffffff00) | tmp1 << 0;
3633 break;
3634 case 17: buf1[0] = (buf1[0] & 0xffff00ff) | tmp1 << 8;
3635 break;
3636 case 18: buf1[0] = (buf1[0] & 0xff00ffff) | tmp1 << 16;
3637 break;
3638 case 19: buf1[0] = (buf1[0] & 0x00ffffff) | tmp1 << 24;
3639 break;
3640 case 20: buf1[1] = (buf1[1] & 0xffffff00) | tmp1 << 0;
3641 break;
3642 case 21: buf1[1] = (buf1[1] & 0xffff00ff) | tmp1 << 8;
3643 break;
3644 case 22: buf1[1] = (buf1[1] & 0xff00ffff) | tmp1 << 16;
3645 break;
3646 case 23: buf1[1] = (buf1[1] & 0x00ffffff) | tmp1 << 24;
3647 break;
3648 case 24: buf1[2] = (buf1[2] & 0xffffff00) | tmp1 << 0;
3649 break;
3650 case 25: buf1[2] = (buf1[2] & 0xffff00ff) | tmp1 << 8;
3651 break;
3652 case 26: buf1[2] = (buf1[2] & 0xff00ffff) | tmp1 << 16;
3653 break;
3654 case 27: buf1[2] = (buf1[2] & 0x00ffffff) | tmp1 << 24;
3655 break;
3656 case 28: buf1[3] = (buf1[3] & 0xffffff00) | tmp1 << 0;
3657 break;
3658 case 29: buf1[3] = (buf1[3] & 0xffff00ff) | tmp1 << 8;
3659 break;
3660 case 30: buf1[3] = (buf1[3] & 0xff00ffff) | tmp1 << 16;
3661 break;
3662 case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24;
3663 break;
3664 }
3665 #endif
3666
3667 return in_len;
3668 }
3669
3670 static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3671 {
3672 if (p0 >= in_len) return (in_len);
3673
3674 const u32 mr = 0xffu << ((p0 & 3) * 8);
3675 const u32 ml = ~mr;
3676
3677 switch (p0 / 4)
3678 {
3679 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) << 1) & mr); break;
3680 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) << 1) & mr); break;
3681 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) << 1) & mr); break;
3682 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) << 1) & mr); break;
3683 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) << 1) & mr); break;
3684 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) << 1) & mr); break;
3685 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) << 1) & mr); break;
3686 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) << 1) & mr); break;
3687 }
3688
3689 return in_len;
3690 }
3691
3692 static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3693 {
3694 if (p0 >= in_len) return (in_len);
3695
3696 const u32 mr = 0xffu << ((p0 & 3) * 8);
3697 const u32 ml = ~mr;
3698
3699 switch (p0 / 4)
3700 {
3701 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) >> 1) & mr); break;
3702 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) >> 1) & mr); break;
3703 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) >> 1) & mr); break;
3704 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) >> 1) & mr); break;
3705 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) >> 1) & mr); break;
3706 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) >> 1) & mr); break;
3707 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) >> 1) & mr); break;
3708 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) >> 1) & mr); break;
3709 }
3710
3711 return in_len;
3712 }
3713
3714 static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3715 {
3716 if (p0 >= in_len) return (in_len);
3717
3718 const u32 mr = 0xffu << ((p0 & 3) * 8);
3719 const u32 ml = ~mr;
3720
3721 const u32 n = 0x01010101 & mr;
3722
3723 switch (p0 / 4)
3724 {
3725 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) + n) & mr); break;
3726 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) + n) & mr); break;
3727 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) + n) & mr); break;
3728 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) + n) & mr); break;
3729 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) + n) & mr); break;
3730 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) + n) & mr); break;
3731 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) + n) & mr); break;
3732 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) + n) & mr); break;
3733 }
3734
3735 return in_len;
3736 }
3737
3738 static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3739 {
3740 if (p0 >= in_len) return (in_len);
3741
3742 const u32 mr = 0xffu << ((p0 & 3) * 8);
3743 const u32 ml = ~mr;
3744
3745 const u32 n = 0x01010101 & mr;
3746
3747 switch (p0 / 4)
3748 {
3749 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) - n) & mr); break;
3750 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) - n) & mr); break;
3751 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) - n) & mr); break;
3752 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) - n) & mr); break;
3753 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) - n) & mr); break;
3754 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) - n) & mr); break;
3755 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) - n) & mr); break;
3756 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) - n) & mr); break;
3757 }
3758
3759 return in_len;
3760 }
3761
3762 static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3763 {
3764 if ((p0 + 1) >= in_len) return (in_len);
3765
3766 u32 tib40[4];
3767 u32 tib41[4];
3768
3769 lshift_block (buf0, buf1, tib40, tib41);
3770
3771 const u32 mr = 0xffu << ((p0 & 3) * 8);
3772 const u32 ml = ~mr;
3773
3774 switch (p0 / 4)
3775 {
3776 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3777 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3778 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3779 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3780 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3781 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3782 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3783 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3784 }
3785
3786 return in_len;
3787 }
3788
3789 static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3790 {
3791 if (p0 == 0) return (in_len);
3792
3793 if (p0 >= in_len) return (in_len);
3794
3795 u32 tib40[4];
3796 u32 tib41[4];
3797
3798 rshift_block (buf0, buf1, tib40, tib41);
3799
3800 const u32 mr = 0xffu << ((p0 & 3) * 8);
3801 const u32 ml = ~mr;
3802
3803 switch (p0 / 4)
3804 {
3805 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3806 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3807 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3808 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3809 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3810 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3811 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3812 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3813 }
3814
3815 return in_len;
3816 }
3817
3818 static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3819 {
3820 if (p0 > in_len) return (in_len);
3821
3822 if ((in_len + p0) >= 32) return (in_len);
3823
3824 u32 out_len = in_len;
3825
3826 u32 tib40[4];
3827 u32 tib41[4];
3828
3829 tib40[0] = buf0[0];
3830 tib40[1] = buf0[1];
3831 tib40[2] = buf0[2];
3832 tib40[3] = buf0[3];
3833 tib41[0] = buf1[0];
3834 tib41[1] = buf1[1];
3835 tib41[2] = buf1[2];
3836 tib41[3] = buf1[3];
3837
3838 truncate_right (tib40, tib41, p0);
3839
3840 rshift_block_N (buf0, buf1, buf0, buf1, p0);
3841
3842 buf0[0] |= tib40[0];
3843 buf0[1] |= tib40[1];
3844 buf0[2] |= tib40[2];
3845 buf0[3] |= tib40[3];
3846 buf1[0] |= tib41[0];
3847 buf1[1] |= tib41[1];
3848 buf1[2] |= tib41[2];
3849 buf1[3] |= tib41[3];
3850
3851 out_len += p0;
3852
3853 return out_len;
3854 }
3855
3856 static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3857 {
3858 if (p0 > in_len) return (in_len);
3859
3860 if ((in_len + p0) >= 32) return (in_len);
3861
3862 u32 out_len = in_len;
3863
3864 u32 tib40[4];
3865 u32 tib41[4];
3866
3867 rshift_block_N (buf0, buf1, tib40, tib41, p0);
3868
3869 truncate_left (tib40, tib41, out_len);
3870
3871 buf0[0] |= tib40[0];
3872 buf0[1] |= tib40[1];
3873 buf0[2] |= tib40[2];
3874 buf0[3] |= tib40[3];
3875 buf1[0] |= tib41[0];
3876 buf1[1] |= tib41[1];
3877 buf1[2] |= tib41[2];
3878 buf1[3] |= tib41[3];
3879
3880 out_len += p0;
3881
3882 return out_len;
3883 }
3884
3885 static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3886 {
3887 buf0[0] |= (generate_cmask (buf0[0]));
3888 buf0[1] |= (generate_cmask (buf0[1]));
3889 buf0[2] |= (generate_cmask (buf0[2]));
3890 buf0[3] |= (generate_cmask (buf0[3]));
3891 buf1[0] |= (generate_cmask (buf1[0]));
3892 buf1[1] |= (generate_cmask (buf1[1]));
3893 buf1[2] |= (generate_cmask (buf1[2]));
3894 buf1[3] |= (generate_cmask (buf1[3]));
3895
3896 #ifdef IS_NV
3897 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
3898
3899 for (u32 i = 0; i < in_len; i++)
3900 {
3901 u32 tmp0;
3902 u32 tmp1;
3903
3904 switch (i)
3905 {
3906 case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540);
3907 tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break;
3908 case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541);
3909 tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break;
3910 case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542);
3911 tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break;
3912 case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543);
3913 tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break;
3914 case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540);
3915 tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break;
3916 case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541);
3917 tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break;
3918 case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542);
3919 tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break;
3920 case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543);
3921 tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break;
3922 case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540);
3923 tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break;
3924 case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541);
3925 tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break;
3926 case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542);
3927 tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break;
3928 case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543);
3929 tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break;
3930 case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540);
3931 tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break;
3932 case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541);
3933 tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break;
3934 case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542);
3935 tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break;
3936 case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543);
3937 tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break;
3938 case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540);
3939 tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break;
3940 case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541);
3941 tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break;
3942 case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542);
3943 tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break;
3944 case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543);
3945 tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break;
3946 case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540);
3947 tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break;
3948 case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541);
3949 tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break;
3950 case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542);
3951 tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break;
3952 case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543);
3953 tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break;
3954 case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540);
3955 tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break;
3956 case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541);
3957 tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break;
3958 case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542);
3959 tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break;
3960 case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543);
3961 tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break;
3962 case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540);
3963 tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break;
3964 case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541);
3965 tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break;
3966 case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542);
3967 tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break;
3968 }
3969
3970 if (i < 3)
3971 {
3972 if (tmp0 == ' ') buf0[0] &= tmp1 ;
3973 }
3974 else if (i < 7)
3975 {
3976 if (tmp0 == ' ') buf0[1] &= tmp1 ;
3977 }
3978 else if (i < 11)
3979 {
3980 if (tmp0 == ' ') buf0[2] &= tmp1 ;
3981 }
3982 else if (i < 15)
3983 {
3984 if (tmp0 == ' ') buf0[3] &= tmp1 ;
3985 }
3986 else if (i < 19)
3987 {
3988 if (tmp0 == ' ') buf1[0] &= tmp1 ;
3989 }
3990 else if (i < 23)
3991 {
3992 if (tmp0 == ' ') buf1[1] &= tmp1 ;
3993 }
3994 else if (i < 27)
3995 {
3996 if (tmp0 == ' ') buf1[2] &= tmp1 ;
3997 }
3998 else if (i < 31)
3999 {
4000 if (tmp0 == ' ') buf1[3] &= tmp1 ;
4001 }
4002 }
4003 #endif
4004
4005 #if defined IS_AMD || defined IS_GENERIC
4006 u32 tib40[4];
4007 u32 tib41[4];
4008
4009 const uchar4 tmp0 = (uchar4) (' ');
4010 const uchar4 tmp1 = (uchar4) (0x00);
4011 const uchar4 tmp2 = (uchar4) (0xff);
4012
4013 uchar4 tmp;
4014
4015 tmp = as_uchar4 (buf0[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[0] = as_uint (tmp);
4016 tmp = as_uchar4 (buf0[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[1] = as_uint (tmp);
4017 tmp = as_uchar4 (buf0[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[2] = as_uint (tmp);
4018 tmp = as_uchar4 (buf0[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[3] = as_uint (tmp);
4019 tmp = as_uchar4 (buf1[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[0] = as_uint (tmp);
4020 tmp = as_uchar4 (buf1[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[1] = as_uint (tmp);
4021 tmp = as_uchar4 (buf1[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[2] = as_uint (tmp);
4022 tmp = as_uchar4 (buf1[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[3] = as_uint (tmp);
4023
4024 rshift_block (tib40, tib41, tib40, tib41); tib40[0] |= 0xff;
4025
4026 buf0[0] &= ~(generate_cmask (buf0[0]) & tib40[0]);
4027 buf0[1] &= ~(generate_cmask (buf0[1]) & tib40[1]);
4028 buf0[2] &= ~(generate_cmask (buf0[2]) & tib40[2]);
4029 buf0[3] &= ~(generate_cmask (buf0[3]) & tib40[3]);
4030 buf1[0] &= ~(generate_cmask (buf1[0]) & tib41[0]);
4031 buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]);
4032 buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]);
4033 buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]);
4034 #endif
4035
4036 return in_len;
4037 }
4038
4039 u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
4040 {
4041 u32 out_len = in_len;
4042
4043 switch (name)
4044 {
4045 case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break;
4046 case RULE_OP_MANGLE_UREST: out_len = rule_op_mangle_urest (p0, p1, buf0, buf1, out_len); break;
4047 case RULE_OP_MANGLE_LREST_UFIRST: out_len = rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, out_len); break;
4048 case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break;
4049 case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break;
4050 case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break;
4051 case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break;
4052 case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break;
4053 case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break;
4054 case RULE_OP_MANGLE_REFLECT: out_len = rule_op_mangle_reflect (p0, p1, buf0, buf1, out_len); break;
4055 case RULE_OP_MANGLE_APPEND: out_len = rule_op_mangle_append (p0, p1, buf0, buf1, out_len); break;
4056 case RULE_OP_MANGLE_PREPEND: out_len = rule_op_mangle_prepend (p0, p1, buf0, buf1, out_len); break;
4057 case RULE_OP_MANGLE_ROTATE_LEFT: out_len = rule_op_mangle_rotate_left (p0, p1, buf0, buf1, out_len); break;
4058 case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = rule_op_mangle_rotate_right (p0, p1, buf0, buf1, out_len); break;
4059 case RULE_OP_MANGLE_DELETE_FIRST: out_len = rule_op_mangle_delete_first (p0, p1, buf0, buf1, out_len); break;
4060 case RULE_OP_MANGLE_DELETE_LAST: out_len = rule_op_mangle_delete_last (p0, p1, buf0, buf1, out_len); break;
4061 case RULE_OP_MANGLE_DELETE_AT: out_len = rule_op_mangle_delete_at (p0, p1, buf0, buf1, out_len); break;
4062 case RULE_OP_MANGLE_EXTRACT: out_len = rule_op_mangle_extract (p0, p1, buf0, buf1, out_len); break;
4063 case RULE_OP_MANGLE_OMIT: out_len = rule_op_mangle_omit (p0, p1, buf0, buf1, out_len); break;
4064 case RULE_OP_MANGLE_INSERT: out_len = rule_op_mangle_insert (p0, p1, buf0, buf1, out_len); break;
4065 case RULE_OP_MANGLE_OVERSTRIKE: out_len = rule_op_mangle_overstrike (p0, p1, buf0, buf1, out_len); break;
4066 case RULE_OP_MANGLE_TRUNCATE_AT: out_len = rule_op_mangle_truncate_at (p0, p1, buf0, buf1, out_len); break;
4067 case RULE_OP_MANGLE_REPLACE: out_len = rule_op_mangle_replace (p0, p1, buf0, buf1, out_len); break;
4068 //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
4069 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
4070 case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = rule_op_mangle_dupechar_first (p0, p1, buf0, buf1, out_len); break;
4071 case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = rule_op_mangle_dupechar_last (p0, p1, buf0, buf1, out_len); break;
4072 case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = rule_op_mangle_dupechar_all (p0, p1, buf0, buf1, out_len); break;
4073 case RULE_OP_MANGLE_SWITCH_FIRST: out_len = rule_op_mangle_switch_first (p0, p1, buf0, buf1, out_len); break;
4074 case RULE_OP_MANGLE_SWITCH_LAST: out_len = rule_op_mangle_switch_last (p0, p1, buf0, buf1, out_len); break;
4075 case RULE_OP_MANGLE_SWITCH_AT: out_len = rule_op_mangle_switch_at (p0, p1, buf0, buf1, out_len); break;
4076 case RULE_OP_MANGLE_CHR_SHIFTL: out_len = rule_op_mangle_chr_shiftl (p0, p1, buf0, buf1, out_len); break;
4077 case RULE_OP_MANGLE_CHR_SHIFTR: out_len = rule_op_mangle_chr_shiftr (p0, p1, buf0, buf1, out_len); break;
4078 case RULE_OP_MANGLE_CHR_INCR: out_len = rule_op_mangle_chr_incr (p0, p1, buf0, buf1, out_len); break;
4079 case RULE_OP_MANGLE_CHR_DECR: out_len = rule_op_mangle_chr_decr (p0, p1, buf0, buf1, out_len); break;
4080 case RULE_OP_MANGLE_REPLACE_NP1: out_len = rule_op_mangle_replace_np1 (p0, p1, buf0, buf1, out_len); break;
4081 case RULE_OP_MANGLE_REPLACE_NM1: out_len = rule_op_mangle_replace_nm1 (p0, p1, buf0, buf1, out_len); break;
4082 case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = rule_op_mangle_dupeblock_first (p0, p1, buf0, buf1, out_len); break;
4083 case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = rule_op_mangle_dupeblock_last (p0, p1, buf0, buf1, out_len); break;
4084 case RULE_OP_MANGLE_TITLE: out_len = rule_op_mangle_title (p0, p1, buf0, buf1, out_len); break;
4085 }
4086
4087 return out_len;
4088 }
4089
4090 u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
4091 {
4092 u32 out_len = len;
4093
4094 for (u32 i = 0; cmds[i] != 0; i++)
4095 {
4096 const u32 cmd = cmds[i];
4097
4098 const u32 name = (cmd >> 0) & 0xff;
4099 const u32 p0 = (cmd >> 8) & 0xff;
4100 const u32 p1 = (cmd >> 16) & 0xff;
4101
4102 out_len = apply_rule (name, p0, p1, buf0, buf1, out_len);
4103 }
4104
4105 return out_len;
4106 }
4107
4108 u32 apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4])
4109 {
4110 #if VECT_SIZE == 1
4111
4112 w0[0] = pw_buf0[0];
4113 w0[1] = pw_buf0[1];
4114 w0[2] = pw_buf0[2];
4115 w0[3] = pw_buf0[3];
4116 w1[0] = pw_buf1[0];
4117 w1[1] = pw_buf1[1];
4118 w1[2] = pw_buf1[2];
4119 w1[3] = pw_buf1[3];
4120
4121 return apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
4122
4123 #else
4124
4125 u32 out_len = 0;
4126
4127 #pragma unroll
4128 for (int i = 0; i < VECT_SIZE; i++)
4129 {
4130 u32 tmp0[4];
4131 u32 tmp1[4];
4132
4133 tmp0[0] = pw_buf0[0];
4134 tmp0[1] = pw_buf0[1];
4135 tmp0[2] = pw_buf0[2];
4136 tmp0[3] = pw_buf0[3];
4137 tmp1[0] = pw_buf1[0];
4138 tmp1[1] = pw_buf1[1];
4139 tmp1[2] = pw_buf1[2];
4140 tmp1[3] = pw_buf1[3];
4141
4142 out_len = apply_rules (rules_buf[il_pos + i].cmds, tmp0, tmp1, pw_len);
4143
4144 // it's guaranteed to have out_len always the same for each call in the loop
4145
4146 switch (i)
4147 {
4148 #if VECT_SIZE >= 2
4149 case 0:
4150 w0[0].s0 = tmp0[0];
4151 w0[1].s0 = tmp0[1];
4152 w0[2].s0 = tmp0[2];
4153 w0[3].s0 = tmp0[3];
4154 w1[0].s0 = tmp1[0];
4155 w1[1].s0 = tmp1[1];
4156 w1[2].s0 = tmp1[2];
4157 w1[3].s0 = tmp1[3];
4158 break;
4159
4160 case 1:
4161 w0[0].s1 = tmp0[0];
4162 w0[1].s1 = tmp0[1];
4163 w0[2].s1 = tmp0[2];
4164 w0[3].s1 = tmp0[3];
4165 w1[0].s1 = tmp1[0];
4166 w1[1].s1 = tmp1[1];
4167 w1[2].s1 = tmp1[2];
4168 w1[3].s1 = tmp1[3];
4169 break;
4170 #endif
4171
4172 #if VECT_SIZE >= 4
4173 case 2:
4174 w0[0].s2 = tmp0[0];
4175 w0[1].s2 = tmp0[1];
4176 w0[2].s2 = tmp0[2];
4177 w0[3].s2 = tmp0[3];
4178 w1[0].s2 = tmp1[0];
4179 w1[1].s2 = tmp1[1];
4180 w1[2].s2 = tmp1[2];
4181 w1[3].s2 = tmp1[3];
4182 break;
4183
4184 case 3:
4185 w0[0].s3 = tmp0[0];
4186 w0[1].s3 = tmp0[1];
4187 w0[2].s3 = tmp0[2];
4188 w0[3].s3 = tmp0[3];
4189 w1[0].s3 = tmp1[0];
4190 w1[1].s3 = tmp1[1];
4191 w1[2].s3 = tmp1[2];
4192 w1[3].s3 = tmp1[3];
4193 break;
4194 #endif
4195
4196 #if VECT_SIZE >= 8
4197 case 4:
4198 w0[0].s4 = tmp0[0];
4199 w0[1].s4 = tmp0[1];
4200 w0[2].s4 = tmp0[2];
4201 w0[3].s4 = tmp0[3];
4202 w1[0].s4 = tmp1[0];
4203 w1[1].s4 = tmp1[1];
4204 w1[2].s4 = tmp1[2];
4205 w1[3].s4 = tmp1[3];
4206 break;
4207
4208 case 5:
4209 w0[0].s5 = tmp0[0];
4210 w0[1].s5 = tmp0[1];
4211 w0[2].s5 = tmp0[2];
4212 w0[3].s5 = tmp0[3];
4213 w1[0].s5 = tmp1[0];
4214 w1[1].s5 = tmp1[1];
4215 w1[2].s5 = tmp1[2];
4216 w1[3].s5 = tmp1[3];
4217 break;
4218
4219 case 6:
4220 w0[0].s6 = tmp0[0];
4221 w0[1].s6 = tmp0[1];
4222 w0[2].s6 = tmp0[2];
4223 w0[3].s6 = tmp0[3];
4224 w1[0].s6 = tmp1[0];
4225 w1[1].s6 = tmp1[1];
4226 w1[2].s6 = tmp1[2];
4227 w1[3].s6 = tmp1[3];
4228 break;
4229
4230 case 7:
4231 w0[0].s7 = tmp0[0];
4232 w0[1].s7 = tmp0[1];
4233 w0[2].s7 = tmp0[2];
4234 w0[3].s7 = tmp0[3];
4235 w1[0].s7 = tmp1[0];
4236 w1[1].s7 = tmp1[1];
4237 w1[2].s7 = tmp1[2];
4238 w1[3].s7 = tmp1[3];
4239 break;
4240 #endif
4241 }
4242 }
4243
4244 return out_len;
4245
4246 #endif
4247 }