Fix bug in rule-engine for NV, left shifts and right shifts were switched
[hashcat.git] / OpenCL / rp.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 static u32 generate_cmask (u32 buf)
7 {
8 const u32 rmask = ((buf & 0x40404040) >> 1)
9 & ~((buf & 0x80808080) >> 2);
10
11 const u32 hmask = (buf & 0x1f1f1f1f) + 0x05050505;
12 const u32 lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f;
13
14 return rmask & ~hmask & lmask;
15 }
16
17 static void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
18 {
19 const u32 tmp = (1 << ((len % 4) * 8)) - 1;
20
21 switch (len / 4)
22 {
23 case 0: w0[0] &= tmp;
24 w0[1] = 0;
25 w0[2] = 0;
26 w0[3] = 0;
27 w1[0] = 0;
28 w1[1] = 0;
29 w1[2] = 0;
30 w1[3] = 0;
31 break;
32 case 1: w0[1] &= tmp;
33 w0[2] = 0;
34 w0[3] = 0;
35 w1[0] = 0;
36 w1[1] = 0;
37 w1[2] = 0;
38 w1[3] = 0;
39 break;
40 case 2: w0[2] &= tmp;
41 w0[3] = 0;
42 w1[0] = 0;
43 w1[1] = 0;
44 w1[2] = 0;
45 w1[3] = 0;
46 break;
47 case 3: w0[3] &= tmp;
48 w1[0] = 0;
49 w1[1] = 0;
50 w1[2] = 0;
51 w1[3] = 0;
52 break;
53 case 4: w1[0] &= tmp;
54 w1[1] = 0;
55 w1[2] = 0;
56 w1[3] = 0;
57 break;
58 case 5: w1[1] &= tmp;
59 w1[2] = 0;
60 w1[3] = 0;
61 break;
62 case 6: w1[2] &= tmp;
63 w1[3] = 0;
64 break;
65 case 7: w1[3] &= tmp;
66 break;
67 }
68 }
69
70 static void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
71 {
72 const u32 tmp = ~((1 << ((len % 4) * 8)) - 1);
73
74 switch (len / 4)
75 {
76 case 0: w0[0] &= tmp;
77 break;
78 case 1: w0[0] = 0;
79 w0[1] &= tmp;
80 break;
81 case 2: w0[0] = 0;
82 w0[1] = 0;
83 w0[2] &= tmp;
84 break;
85 case 3: w0[0] = 0;
86 w0[1] = 0;
87 w0[2] = 0;
88 w0[3] &= tmp;
89 break;
90 case 4: w0[0] = 0;
91 w0[1] = 0;
92 w0[2] = 0;
93 w0[3] = 0;
94 w1[0] &= tmp;
95 break;
96 case 5: w0[0] = 0;
97 w0[1] = 0;
98 w0[2] = 0;
99 w0[3] = 0;
100 w1[0] = 0;
101 w1[1] &= tmp;
102 break;
103 case 6: w0[0] = 0;
104 w0[1] = 0;
105 w0[2] = 0;
106 w0[3] = 0;
107 w1[0] = 0;
108 w1[1] = 0;
109 w1[2] &= tmp;
110 break;
111 case 7: w0[0] = 0;
112 w0[1] = 0;
113 w0[2] = 0;
114 w0[3] = 0;
115 w1[0] = 0;
116 w1[1] = 0;
117 w1[2] = 0;
118 w1[3] &= tmp;
119 break;
120 }
121 }
122
123 static void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
124 {
125 #ifdef IS_NV
126 out0[0] = __byte_perm (in0[0], in0[1], 0x4321);
127 out0[1] = __byte_perm (in0[1], in0[2], 0x4321);
128 out0[2] = __byte_perm (in0[2], in0[3], 0x4321);
129 out0[3] = __byte_perm (in0[3], in1[0], 0x4321);
130 out1[0] = __byte_perm (in1[0], in1[1], 0x4321);
131 out1[1] = __byte_perm (in1[1], in1[2], 0x4321);
132 out1[2] = __byte_perm (in1[2], in1[3], 0x4321);
133 out1[3] = __byte_perm (in1[3], 0, 0x4321);
134 #endif
135
136 #if defined IS_AMD || defined IS_GENERIC
137 out0[0] = amd_bytealign (in0[1], in0[0], 1);
138 out0[1] = amd_bytealign (in0[2], in0[1], 1);
139 out0[2] = amd_bytealign (in0[3], in0[2], 1);
140 out0[3] = amd_bytealign (in1[0], in0[3], 1);
141 out1[0] = amd_bytealign (in1[1], in1[0], 1);
142 out1[1] = amd_bytealign (in1[2], in1[1], 1);
143 out1[2] = amd_bytealign (in1[3], in1[2], 1);
144 out1[3] = amd_bytealign ( 0, in1[3], 1);
145 #endif
146 }
147
148 static void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
149 {
150 #ifdef IS_NV
151 out1[3] = __byte_perm (in1[2], in1[3], 0x6543);
152 out1[2] = __byte_perm (in1[1], in1[2], 0x6543);
153 out1[1] = __byte_perm (in1[0], in1[1], 0x6543);
154 out1[0] = __byte_perm (in0[3], in1[0], 0x6543);
155 out0[3] = __byte_perm (in0[2], in0[3], 0x6543);
156 out0[2] = __byte_perm (in0[1], in0[2], 0x6543);
157 out0[1] = __byte_perm (in0[0], in0[1], 0x6543);
158 out0[0] = __byte_perm ( 0, in0[0], 0x6543);
159 #endif
160
161 #if defined IS_AMD || defined IS_GENERIC
162 out1[3] = amd_bytealign (in1[3], in1[2], 3);
163 out1[2] = amd_bytealign (in1[2], in1[1], 3);
164 out1[1] = amd_bytealign (in1[1], in1[0], 3);
165 out1[0] = amd_bytealign (in1[0], in0[3], 3);
166 out0[3] = amd_bytealign (in0[3], in0[2], 3);
167 out0[2] = amd_bytealign (in0[2], in0[1], 3);
168 out0[1] = amd_bytealign (in0[1], in0[0], 3);
169 out0[0] = amd_bytealign (in0[0], 0, 3);
170 #endif
171 }
172
173 static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
174 {
175 #ifdef IS_NV
176 switch (num)
177 {
178 case 0: out0[0] = in0[0];
179 out0[1] = in0[1];
180 out0[2] = in0[2];
181 out0[3] = in0[3];
182 out1[0] = in1[0];
183 out1[1] = in1[1];
184 out1[2] = in1[2];
185 out1[3] = in1[3];
186 break;
187 case 1: out0[0] = __byte_perm (in0[0], in0[1], 0x4321);
188 out0[1] = __byte_perm (in0[1], in0[2], 0x4321);
189 out0[2] = __byte_perm (in0[2], in0[3], 0x4321);
190 out0[3] = __byte_perm (in0[3], in1[0], 0x4321);
191 out1[0] = __byte_perm (in1[0], in1[1], 0x4321);
192 out1[1] = __byte_perm (in1[1], in1[2], 0x4321);
193 out1[2] = __byte_perm (in1[2], in1[3], 0x4321);
194 out1[3] = __byte_perm (in1[3], 0, 0x4321);
195 break;
196 case 2: out0[0] = __byte_perm (in0[0], in0[1], 0x5432);
197 out0[1] = __byte_perm (in0[1], in0[2], 0x5432);
198 out0[2] = __byte_perm (in0[2], in0[3], 0x5432);
199 out0[3] = __byte_perm (in0[3], in1[0], 0x5432);
200 out1[0] = __byte_perm (in1[0], in1[1], 0x5432);
201 out1[1] = __byte_perm (in1[1], in1[2], 0x5432);
202 out1[2] = __byte_perm (in1[2], in1[3], 0x5432);
203 out1[3] = __byte_perm (in1[3], 0, 0x5432);
204 break;
205 case 3: out0[0] = __byte_perm (in0[0], in0[1], 0x6543);
206 out0[1] = __byte_perm (in0[1], in0[2], 0x6543);
207 out0[2] = __byte_perm (in0[2], in0[3], 0x6543);
208 out0[3] = __byte_perm (in0[3], in1[0], 0x6543);
209 out1[0] = __byte_perm (in1[0], in1[1], 0x6543);
210 out1[1] = __byte_perm (in1[1], in1[2], 0x6543);
211 out1[2] = __byte_perm (in1[2], in1[3], 0x6543);
212 out1[3] = __byte_perm (in1[3], 0, 0x6543);
213 break;
214 case 4: out0[0] = in0[1];
215 out0[1] = in0[2];
216 out0[2] = in0[3];
217 out0[3] = in1[0];
218 out1[0] = in1[1];
219 out1[1] = in1[2];
220 out1[2] = in1[3];
221 out1[3] = 0;
222 break;
223 case 5: out0[0] = __byte_perm (in0[1], in0[2], 0x4321);
224 out0[1] = __byte_perm (in0[2], in0[3], 0x4321);
225 out0[2] = __byte_perm (in0[3], in1[0], 0x4321);
226 out0[3] = __byte_perm (in1[0], in1[1], 0x4321);
227 out1[0] = __byte_perm (in1[1], in1[2], 0x4321);
228 out1[1] = __byte_perm (in1[2], in1[3], 0x4321);
229 out1[2] = __byte_perm (in1[3], 0, 0x4321);
230 out1[3] = 0;
231 break;
232 case 6: out0[0] = __byte_perm (in0[1], in0[2], 0x5432);
233 out0[1] = __byte_perm (in0[2], in0[3], 0x5432);
234 out0[2] = __byte_perm (in0[3], in1[0], 0x5432);
235 out0[3] = __byte_perm (in1[0], in1[1], 0x5432);
236 out1[0] = __byte_perm (in1[1], in1[2], 0x5432);
237 out1[1] = __byte_perm (in1[2], in1[3], 0x5432);
238 out1[2] = __byte_perm (in1[3], 0, 0x5432);
239 out1[3] = 0;
240 break;
241 case 7: out0[0] = __byte_perm (in0[1], in0[2], 0x6543);
242 out0[1] = __byte_perm (in0[2], in0[3], 0x6543);
243 out0[2] = __byte_perm (in0[3], in1[0], 0x6543);
244 out0[3] = __byte_perm (in1[0], in1[1], 0x6543);
245 out1[0] = __byte_perm (in1[1], in1[2], 0x6543);
246 out1[1] = __byte_perm (in1[2], in1[3], 0x6543);
247 out1[2] = __byte_perm (in1[3], 0, 0x6543);
248 out1[3] = 0;
249 break;
250 case 8: out0[0] = in0[2];
251 out0[1] = in0[3];
252 out0[2] = in1[0];
253 out0[3] = in1[1];
254 out1[0] = in1[2];
255 out1[1] = in1[3];
256 out1[2] = 0;
257 out1[3] = 0;
258 break;
259 case 9: out0[0] = __byte_perm (in0[2], in0[3], 0x4321);
260 out0[1] = __byte_perm (in0[3], in1[0], 0x4321);
261 out0[2] = __byte_perm (in1[0], in1[1], 0x4321);
262 out0[3] = __byte_perm (in1[1], in1[2], 0x4321);
263 out1[0] = __byte_perm (in1[2], in1[3], 0x4321);
264 out1[1] = __byte_perm (in1[3], 0, 0x4321);
265 out1[2] = 0;
266 out1[3] = 0;
267 break;
268 case 10: out0[0] = __byte_perm (in0[2], in0[3], 0x5432);
269 out0[1] = __byte_perm (in0[3], in1[0], 0x5432);
270 out0[2] = __byte_perm (in1[0], in1[1], 0x5432);
271 out0[3] = __byte_perm (in1[1], in1[2], 0x5432);
272 out1[0] = __byte_perm (in1[2], in1[3], 0x5432);
273 out1[1] = __byte_perm (in1[3], 0, 0x5432);
274 out1[2] = 0;
275 out1[3] = 0;
276 break;
277 case 11: out0[0] = __byte_perm (in0[2], in0[3], 0x6543);
278 out0[1] = __byte_perm (in0[3], in1[0], 0x6543);
279 out0[2] = __byte_perm (in1[0], in1[1], 0x6543);
280 out0[3] = __byte_perm (in1[1], in1[2], 0x6543);
281 out1[0] = __byte_perm (in1[2], in1[3], 0x6543);
282 out1[1] = __byte_perm (in1[3], 0, 0x6543);
283 out1[2] = 0;
284 out1[3] = 0;
285 break;
286 case 12: out0[0] = in0[3];
287 out0[1] = in1[0];
288 out0[2] = in1[1];
289 out0[3] = in1[2];
290 out1[0] = in1[3];
291 out1[1] = 0;
292 out1[2] = 0;
293 out1[3] = 0;
294 break;
295 case 13:
296 out0[0] = __byte_perm (in0[3], in1[0], 0x4321);
297 out0[1] = __byte_perm (in1[0], in1[1], 0x4321);
298 out0[2] = __byte_perm (in1[1], in1[2], 0x4321);
299 out0[3] = __byte_perm (in1[2], in1[3], 0x4321);
300 out1[0] = __byte_perm (in1[3], 0, 0x4321);
301 out1[1] = 0;
302 out1[2] = 0;
303 out1[3] = 0;
304 break;
305 case 14: out0[0] = __byte_perm (in0[3], in1[0], 0x5432);
306 out0[1] = __byte_perm (in1[0], in1[1], 0x5432);
307 out0[2] = __byte_perm (in1[1], in1[2], 0x5432);
308 out0[3] = __byte_perm (in1[2], in1[3], 0x5432);
309 out1[0] = __byte_perm (in1[3], 0, 0x5432);
310 out1[1] = 0;
311 out1[2] = 0;
312 out1[3] = 0;
313 break;
314 case 15: out0[0] = __byte_perm (in0[3], in1[0], 0x6543);
315 out0[1] = __byte_perm (in1[0], in1[1], 0x6543);
316 out0[2] = __byte_perm (in1[1], in1[2], 0x6543);
317 out0[3] = __byte_perm (in1[2], in1[3], 0x6543);
318 out1[0] = __byte_perm (in1[3], 0, 0x6543);
319 out1[1] = 0;
320 out1[2] = 0;
321 out1[3] = 0;
322 break;
323 case 16: out0[0] = in1[0];
324 out0[1] = in1[1];
325 out0[2] = in1[2];
326 out0[3] = in1[3];
327 out1[0] = 0;
328 out1[1] = 0;
329 out1[2] = 0;
330 out1[3] = 0;
331 break;
332 case 17: out0[0] = __byte_perm (in1[0], in1[1], 0x4321);
333 out0[1] = __byte_perm (in1[1], in1[2], 0x4321);
334 out0[2] = __byte_perm (in1[2], in1[3], 0x4321);
335 out0[3] = __byte_perm (in1[3], 0, 0x4321);
336 out1[0] = 0;
337 out1[1] = 0;
338 out1[2] = 0;
339 out1[3] = 0;
340 break;
341 case 18: out0[0] = __byte_perm (in1[0], in1[1], 0x5432);
342 out0[1] = __byte_perm (in1[1], in1[2], 0x5432);
343 out0[2] = __byte_perm (in1[2], in1[3], 0x5432);
344 out0[3] = __byte_perm (in1[3], 0, 0x5432);
345 out1[0] = 0;
346 out1[1] = 0;
347 out1[2] = 0;
348 out1[3] = 0;
349 break;
350 case 19: out0[0] = __byte_perm (in1[0], in1[1], 0x6543);
351 out0[1] = __byte_perm (in1[1], in1[2], 0x6543);
352 out0[2] = __byte_perm (in1[2], in1[3], 0x6543);
353 out0[3] = __byte_perm (in1[3], 0, 0x6543);
354 out1[0] = 0;
355 out1[1] = 0;
356 out1[2] = 0;
357 out1[3] = 0;
358 break;
359 case 20: out0[0] = in1[1];
360 out0[1] = in1[2];
361 out0[2] = in1[3];
362 out0[3] = 0;
363 out1[0] = 0;
364 out1[1] = 0;
365 out1[2] = 0;
366 out1[3] = 0;
367 break;
368 case 21: out0[0] = __byte_perm (in1[1], in1[2], 0x4321);
369 out0[1] = __byte_perm (in1[2], in1[3], 0x4321);
370 out0[2] = __byte_perm (in1[3], 0, 0x4321);
371 out0[3] = 0;
372 out1[0] = 0;
373 out1[1] = 0;
374 out1[2] = 0;
375 out1[3] = 0;
376 break;
377 case 22: out0[0] = __byte_perm (in1[1], in1[2], 0x5432);
378 out0[1] = __byte_perm (in1[2], in1[3], 0x5432);
379 out0[2] = __byte_perm (in1[3], 0, 0x5432);
380 out0[3] = 0;
381 out1[0] = 0;
382 out1[1] = 0;
383 out1[2] = 0;
384 out1[3] = 0;
385 break;
386 case 23: out0[0] = __byte_perm (in1[1], in1[2], 0x6543);
387 out0[1] = __byte_perm (in1[2], in1[3], 0x6543);
388 out0[2] = __byte_perm (in1[3], 0, 0x6543);
389 out0[3] = 0;
390 out1[0] = 0;
391 out1[1] = 0;
392 out1[2] = 0;
393 out1[3] = 0;
394 break;
395 case 24: out0[0] = in1[2];
396 out0[1] = in1[3];
397 out0[2] = 0;
398 out0[3] = 0;
399 out1[0] = 0;
400 out1[1] = 0;
401 out1[2] = 0;
402 out1[3] = 0;
403 break;
404 case 25: out0[0] = __byte_perm (in1[2], in1[3], 0x4321);
405 out0[1] = __byte_perm (in1[3], 0, 0x4321);
406 out0[2] = 0;
407 out0[3] = 0;
408 out1[0] = 0;
409 out1[1] = 0;
410 out1[2] = 0;
411 out1[3] = 0;
412 break;
413 case 26: out0[0] = __byte_perm (in1[2], in1[3], 0x5432);
414 out0[1] = __byte_perm (in1[3], 0, 0x5432);
415 out0[2] = 0;
416 out0[3] = 0;
417 out1[0] = 0;
418 out1[1] = 0;
419 out1[2] = 0;
420 out1[3] = 0;
421 break;
422 case 27: out0[0] = __byte_perm (in1[2], in1[3], 0x6543);
423 out0[1] = __byte_perm (in1[3], 0, 0x6543);
424 out0[2] = 0;
425 out0[3] = 0;
426 out1[0] = 0;
427 out1[1] = 0;
428 out1[2] = 0;
429 out1[3] = 0;
430 break;
431 case 28: out0[0] = in1[3];
432 out0[1] = 0;
433 out0[2] = 0;
434 out0[3] = 0;
435 out1[0] = 0;
436 out1[1] = 0;
437 out1[2] = 0;
438 out1[3] = 0;
439 break;
440 case 29: out0[0] = __byte_perm (in1[3], 0, 0x4321);
441 out0[1] = 0;
442 out0[2] = 0;
443 out0[3] = 0;
444 out1[0] = 0;
445 out1[1] = 0;
446 out1[2] = 0;
447 out1[3] = 0;
448 break;
449 case 30: out0[0] = __byte_perm (in1[3], 0, 0x5432);
450 out0[1] = 0;
451 out0[2] = 0;
452 out0[3] = 0;
453 out1[0] = 0;
454 out1[1] = 0;
455 out1[2] = 0;
456 out1[3] = 0;
457 break;
458 case 31: out0[0] = __byte_perm (in1[3], 0, 0x6543);
459 out0[1] = 0;
460 out0[2] = 0;
461 out0[3] = 0;
462 out1[0] = 0;
463 out1[1] = 0;
464 out1[2] = 0;
465 out1[3] = 0;
466 break;
467 }
468 #endif
469
470 #if defined IS_AMD || defined IS_GENERIC
471 switch (num)
472 {
473 case 0: out0[0] = in0[0];
474 out0[1] = in0[1];
475 out0[2] = in0[2];
476 out0[3] = in0[3];
477 out1[0] = in1[0];
478 out1[1] = in1[1];
479 out1[2] = in1[2];
480 out1[3] = in1[3];
481 break;
482 case 1: out0[0] = amd_bytealign (in0[1], in0[0], 1);
483 out0[1] = amd_bytealign (in0[2], in0[1], 1);
484 out0[2] = amd_bytealign (in0[3], in0[2], 1);
485 out0[3] = amd_bytealign (in1[0], in0[3], 1);
486 out1[0] = amd_bytealign (in1[1], in1[0], 1);
487 out1[1] = amd_bytealign (in1[2], in1[1], 1);
488 out1[2] = amd_bytealign (in1[3], in1[2], 1);
489 out1[3] = amd_bytealign ( 0, in1[3], 1);
490 break;
491 case 2: out0[0] = amd_bytealign (in0[1], in0[0], 2);
492 out0[1] = amd_bytealign (in0[2], in0[1], 2);
493 out0[2] = amd_bytealign (in0[3], in0[2], 2);
494 out0[3] = amd_bytealign (in1[0], in0[3], 2);
495 out1[0] = amd_bytealign (in1[1], in1[0], 2);
496 out1[1] = amd_bytealign (in1[2], in1[1], 2);
497 out1[2] = amd_bytealign (in1[3], in1[2], 2);
498 out1[3] = amd_bytealign ( 0, in1[3], 2);
499 break;
500 case 3: out0[0] = amd_bytealign (in0[1], in0[0], 3);
501 out0[1] = amd_bytealign (in0[2], in0[1], 3);
502 out0[2] = amd_bytealign (in0[3], in0[2], 3);
503 out0[3] = amd_bytealign (in1[0], in0[3], 3);
504 out1[0] = amd_bytealign (in1[1], in1[0], 3);
505 out1[1] = amd_bytealign (in1[2], in1[1], 3);
506 out1[2] = amd_bytealign (in1[3], in1[2], 3);
507 out1[3] = amd_bytealign ( 0, in1[3], 3);
508 break;
509 case 4: out0[0] = in0[1];
510 out0[1] = in0[2];
511 out0[2] = in0[3];
512 out0[3] = in1[0];
513 out1[0] = in1[1];
514 out1[1] = in1[2];
515 out1[2] = in1[3];
516 out1[3] = 0;
517 break;
518 case 5: out0[0] = amd_bytealign (in0[2], in0[1], 1);
519 out0[1] = amd_bytealign (in0[3], in0[2], 1);
520 out0[2] = amd_bytealign (in1[0], in0[3], 1);
521 out0[3] = amd_bytealign (in1[1], in1[0], 1);
522 out1[0] = amd_bytealign (in1[2], in1[1], 1);
523 out1[1] = amd_bytealign (in1[3], in1[2], 1);
524 out1[2] = amd_bytealign ( 0, in1[3], 1);
525 out1[3] = 0;
526 break;
527 case 6: out0[0] = amd_bytealign (in0[2], in0[1], 2);
528 out0[1] = amd_bytealign (in0[3], in0[2], 2);
529 out0[2] = amd_bytealign (in1[0], in0[3], 2);
530 out0[3] = amd_bytealign (in1[1], in1[0], 2);
531 out1[0] = amd_bytealign (in1[2], in1[1], 2);
532 out1[1] = amd_bytealign (in1[3], in1[2], 2);
533 out1[2] = amd_bytealign ( 0, in1[3], 2);
534 out1[3] = 0;
535 break;
536 case 7: out0[0] = amd_bytealign (in0[2], in0[1], 3);
537 out0[1] = amd_bytealign (in0[3], in0[2], 3);
538 out0[2] = amd_bytealign (in1[0], in0[3], 3);
539 out0[3] = amd_bytealign (in1[1], in1[0], 3);
540 out1[0] = amd_bytealign (in1[2], in1[1], 3);
541 out1[1] = amd_bytealign (in1[3], in1[2], 3);
542 out1[2] = amd_bytealign ( 0, in1[3], 3);
543 out1[3] = 0;
544 break;
545 case 8: out0[0] = in0[2];
546 out0[1] = in0[3];
547 out0[2] = in1[0];
548 out0[3] = in1[1];
549 out1[0] = in1[2];
550 out1[1] = in1[3];
551 out1[2] = 0;
552 out1[3] = 0;
553 break;
554 case 9: out0[0] = amd_bytealign (in0[3], in0[2], 1);
555 out0[1] = amd_bytealign (in1[0], in0[3], 1);
556 out0[2] = amd_bytealign (in1[1], in1[0], 1);
557 out0[3] = amd_bytealign (in1[2], in1[1], 1);
558 out1[0] = amd_bytealign (in1[3], in1[2], 1);
559 out1[1] = amd_bytealign ( 0, in1[3], 1);
560 out1[2] = 0;
561 out1[3] = 0;
562 break;
563 case 10: out0[0] = amd_bytealign (in0[3], in0[2], 2);
564 out0[1] = amd_bytealign (in1[0], in0[3], 2);
565 out0[2] = amd_bytealign (in1[1], in1[0], 2);
566 out0[3] = amd_bytealign (in1[2], in1[1], 2);
567 out1[0] = amd_bytealign (in1[3], in1[2], 2);
568 out1[1] = amd_bytealign ( 0, in1[3], 2);
569 out1[2] = 0;
570 out1[3] = 0;
571 break;
572 case 11: out0[0] = amd_bytealign (in0[3], in0[2], 3);
573 out0[1] = amd_bytealign (in1[0], in0[3], 3);
574 out0[2] = amd_bytealign (in1[1], in1[0], 3);
575 out0[3] = amd_bytealign (in1[2], in1[1], 3);
576 out1[0] = amd_bytealign (in1[3], in1[2], 3);
577 out1[1] = amd_bytealign ( 0, in1[3], 3);
578 out1[2] = 0;
579 out1[3] = 0;
580 break;
581 case 12: out0[0] = in0[3];
582 out0[1] = in1[0];
583 out0[2] = in1[1];
584 out0[3] = in1[2];
585 out1[0] = in1[3];
586 out1[1] = 0;
587 out1[2] = 0;
588 out1[3] = 0;
589 break;
590 case 13: out0[0] = amd_bytealign (in1[0], in0[3], 1);
591 out0[1] = amd_bytealign (in1[1], in1[0], 1);
592 out0[2] = amd_bytealign (in1[2], in1[1], 1);
593 out0[3] = amd_bytealign (in1[3], in1[2], 1);
594 out1[0] = amd_bytealign ( 0, in1[3], 1);
595 out1[1] = 0;
596 out1[2] = 0;
597 out1[3] = 0;
598 break;
599 case 14: out0[0] = amd_bytealign (in1[0], in0[3], 2);
600 out0[1] = amd_bytealign (in1[1], in1[0], 2);
601 out0[2] = amd_bytealign (in1[2], in1[1], 2);
602 out0[3] = amd_bytealign (in1[3], in1[2], 2);
603 out1[0] = amd_bytealign ( 0, in1[3], 2);
604 out1[1] = 0;
605 out1[2] = 0;
606 out1[3] = 0;
607 break;
608 case 15: out0[0] = amd_bytealign (in1[0], in0[3], 3);
609 out0[1] = amd_bytealign (in1[1], in1[0], 3);
610 out0[2] = amd_bytealign (in1[2], in1[1], 3);
611 out0[3] = amd_bytealign (in1[3], in1[2], 3);
612 out1[0] = amd_bytealign ( 0, in1[3], 3);
613 out1[1] = 0;
614 out1[2] = 0;
615 out1[3] = 0;
616 break;
617 case 16: out0[0] = in1[0];
618 out0[1] = in1[1];
619 out0[2] = in1[2];
620 out0[3] = in1[3];
621 out1[0] = 0;
622 out1[1] = 0;
623 out1[2] = 0;
624 out1[3] = 0;
625 break;
626 case 17: out0[0] = amd_bytealign (in1[1], in1[0], 1);
627 out0[1] = amd_bytealign (in1[2], in1[1], 1);
628 out0[2] = amd_bytealign (in1[3], in1[2], 1);
629 out0[3] = amd_bytealign ( 0, in1[3], 1);
630 out1[0] = 0;
631 out1[1] = 0;
632 out1[2] = 0;
633 out1[3] = 0;
634 break;
635 case 18: out0[0] = amd_bytealign (in1[1], in1[0], 2);
636 out0[1] = amd_bytealign (in1[2], in1[1], 2);
637 out0[2] = amd_bytealign (in1[3], in1[2], 2);
638 out0[3] = amd_bytealign ( 0, in1[3], 2);
639 out1[0] = 0;
640 out1[1] = 0;
641 out1[2] = 0;
642 out1[3] = 0;
643 break;
644 case 19: out0[0] = amd_bytealign (in1[1], in1[0], 3);
645 out0[1] = amd_bytealign (in1[2], in1[1], 3);
646 out0[2] = amd_bytealign (in1[3], in1[2], 3);
647 out0[3] = amd_bytealign ( 0, in1[3], 3);
648 out1[0] = 0;
649 out1[1] = 0;
650 out1[2] = 0;
651 out1[3] = 0;
652 break;
653 case 20: out0[0] = in1[1];
654 out0[1] = in1[2];
655 out0[2] = in1[3];
656 out0[3] = 0;
657 out1[0] = 0;
658 out1[1] = 0;
659 out1[2] = 0;
660 out1[3] = 0;
661 break;
662 case 21: out0[0] = amd_bytealign (in1[2], in1[1], 1);
663 out0[1] = amd_bytealign (in1[3], in1[2], 1);
664 out0[2] = amd_bytealign ( 0, in1[3], 1);
665 out0[3] = 0;
666 out1[0] = 0;
667 out1[1] = 0;
668 out1[2] = 0;
669 out1[3] = 0;
670 break;
671 case 22: out0[0] = amd_bytealign (in1[2], in1[1], 2);
672 out0[1] = amd_bytealign (in1[3], in1[2], 2);
673 out0[2] = amd_bytealign ( 0, in1[3], 2);
674 out0[3] = 0;
675 out1[0] = 0;
676 out1[1] = 0;
677 out1[2] = 0;
678 out1[3] = 0;
679 break;
680 case 23: out0[0] = amd_bytealign (in1[2], in1[1], 3);
681 out0[1] = amd_bytealign (in1[3], in1[2], 3);
682 out0[2] = amd_bytealign ( 0, in1[3], 3);
683 out0[3] = 0;
684 out1[0] = 0;
685 out1[1] = 0;
686 out1[2] = 0;
687 out1[3] = 0;
688 break;
689 case 24: out0[0] = in1[2];
690 out0[1] = in1[3];
691 out0[2] = 0;
692 out0[3] = 0;
693 out1[0] = 0;
694 out1[1] = 0;
695 out1[2] = 0;
696 out1[3] = 0;
697 break;
698 case 25: out0[0] = amd_bytealign (in1[3], in1[2], 1);
699 out0[1] = amd_bytealign ( 0, in1[3], 1);
700 out0[2] = 0;
701 out0[3] = 0;
702 out1[0] = 0;
703 out1[1] = 0;
704 out1[2] = 0;
705 out1[3] = 0;
706 break;
707 case 26: out0[0] = amd_bytealign (in1[3], in1[2], 2);
708 out0[1] = amd_bytealign ( 0, in1[3], 2);
709 out0[2] = 0;
710 out0[3] = 0;
711 out1[0] = 0;
712 out1[1] = 0;
713 out1[2] = 0;
714 out1[3] = 0;
715 break;
716 case 27: out0[0] = amd_bytealign (in1[3], in1[2], 3);
717 out0[1] = amd_bytealign ( 0, in1[3], 3);
718 out0[2] = 0;
719 out0[3] = 0;
720 out1[0] = 0;
721 out1[1] = 0;
722 out1[2] = 0;
723 out1[3] = 0;
724 break;
725 case 28: out0[0] = in1[3];
726 out0[1] = 0;
727 out0[2] = 0;
728 out0[3] = 0;
729 out1[0] = 0;
730 out1[1] = 0;
731 out1[2] = 0;
732 out1[3] = 0;
733 break;
734 case 29: out0[0] = amd_bytealign ( 0, in1[3], 1);
735 out0[1] = 0;
736 out0[2] = 0;
737 out0[3] = 0;
738 out1[0] = 0;
739 out1[1] = 0;
740 out1[2] = 0;
741 out1[3] = 0;
742 break;
743 case 30: out0[0] = amd_bytealign ( 0, in1[3], 2);
744 out0[1] = 0;
745 out0[2] = 0;
746 out0[3] = 0;
747 out1[0] = 0;
748 out1[1] = 0;
749 out1[2] = 0;
750 out1[3] = 0;
751 break;
752 case 31: out0[0] = amd_bytealign ( 0, in1[3], 3);
753 out0[1] = 0;
754 out0[2] = 0;
755 out0[3] = 0;
756 out1[0] = 0;
757 out1[1] = 0;
758 out1[2] = 0;
759 out1[3] = 0;
760 break;
761 }
762 #endif
763 }
764
765 static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
766 {
767 #ifdef IS_NV
768 switch (num)
769 {
770 case 0: out1[3] = in1[3];
771 out1[2] = in1[2];
772 out1[1] = in1[1];
773 out1[0] = in1[0];
774 out0[3] = in0[3];
775 out0[2] = in0[2];
776 out0[1] = in0[1];
777 out0[0] = in0[0];
778 break;
779 case 1: out1[3] = __byte_perm (in1[2], in1[3], 0x6543);
780 out1[2] = __byte_perm (in1[1], in1[2], 0x6543);
781 out1[1] = __byte_perm (in1[0], in1[1], 0x6543);
782 out1[0] = __byte_perm (in0[3], in1[0], 0x6543);
783 out0[3] = __byte_perm (in0[2], in0[3], 0x6543);
784 out0[2] = __byte_perm (in0[1], in0[2], 0x6543);
785 out0[1] = __byte_perm (in0[0], in0[1], 0x6543);
786 out0[0] = __byte_perm ( 0, in0[0], 0x6543);
787 break;
788 case 2: out1[3] = __byte_perm (in1[2], in1[3], 0x5432);
789 out1[2] = __byte_perm (in1[1], in1[2], 0x5432);
790 out1[1] = __byte_perm (in1[0], in1[1], 0x5432);
791 out1[0] = __byte_perm (in0[3], in1[0], 0x5432);
792 out0[3] = __byte_perm (in0[2], in0[3], 0x5432);
793 out0[2] = __byte_perm (in0[1], in0[2], 0x5432);
794 out0[1] = __byte_perm (in0[0], in0[1], 0x5432);
795 out0[0] = __byte_perm ( 0, in0[0], 0x5432);
796 break;
797 case 3: out1[3] = __byte_perm (in1[2], in1[3], 0x4321);
798 out1[2] = __byte_perm (in1[1], in1[2], 0x4321);
799 out1[1] = __byte_perm (in1[0], in1[1], 0x4321);
800 out1[0] = __byte_perm (in0[3], in1[0], 0x4321);
801 out0[3] = __byte_perm (in0[2], in0[3], 0x4321);
802 out0[2] = __byte_perm (in0[1], in0[2], 0x4321);
803 out0[1] = __byte_perm (in0[0], in0[1], 0x4321);
804 out0[0] = __byte_perm ( 0, in0[0], 0x4321);
805 break;
806 case 4: out1[3] = in1[2];
807 out1[2] = in1[1];
808 out1[1] = in1[0];
809 out1[0] = in0[3];
810 out0[3] = in0[2];
811 out0[2] = in0[1];
812 out0[1] = in0[0];
813 out0[0] = 0;
814 break;
815 case 5: out1[3] = __byte_perm (in1[1], in1[2], 0x6543);
816 out1[2] = __byte_perm (in1[0], in1[1], 0x6543);
817 out1[1] = __byte_perm (in0[3], in1[0], 0x6543);
818 out1[0] = __byte_perm (in0[2], in0[3], 0x6543);
819 out0[3] = __byte_perm (in0[1], in0[2], 0x6543);
820 out0[2] = __byte_perm (in0[0], in0[1], 0x6543);
821 out0[1] = __byte_perm ( 0, in0[0], 0x6543);
822 out0[0] = 0;
823 break;
824 case 6: out1[3] = __byte_perm (in1[1], in1[2], 0x5432);
825 out1[2] = __byte_perm (in1[0], in1[1], 0x5432);
826 out1[1] = __byte_perm (in0[3], in1[0], 0x5432);
827 out1[0] = __byte_perm (in0[2], in0[3], 0x5432);
828 out0[3] = __byte_perm (in0[1], in0[2], 0x5432);
829 out0[2] = __byte_perm (in0[0], in0[1], 0x5432);
830 out0[1] = __byte_perm ( 0, in0[0], 0x5432);
831 out0[0] = 0;
832 break;
833 case 7: out1[3] = __byte_perm (in1[1], in1[2], 0x4321);
834 out1[2] = __byte_perm (in1[0], in1[1], 0x4321);
835 out1[1] = __byte_perm (in0[3], in1[0], 0x4321);
836 out1[0] = __byte_perm (in0[2], in0[3], 0x4321);
837 out0[3] = __byte_perm (in0[1], in0[2], 0x4321);
838 out0[2] = __byte_perm (in0[0], in0[1], 0x4321);
839 out0[1] = __byte_perm ( 0, in0[0], 0x4321);
840 out0[0] = 0;
841 break;
842 case 8: out1[3] = in1[1];
843 out1[2] = in1[0];
844 out1[1] = in0[3];
845 out1[0] = in0[2];
846 out0[3] = in0[1];
847 out0[2] = in0[0];
848 out0[1] = 0;
849 out0[0] = 0;
850 break;
851 case 9: out1[3] = __byte_perm (in1[0], in1[1], 0x6543);
852 out1[2] = __byte_perm (in0[3], in1[0], 0x6543);
853 out1[1] = __byte_perm (in0[2], in0[3], 0x6543);
854 out1[0] = __byte_perm (in0[1], in0[2], 0x6543);
855 out0[3] = __byte_perm (in0[0], in0[1], 0x6543);
856 out0[2] = __byte_perm ( 0, in0[0], 0x6543);
857 out0[1] = 0;
858 out0[0] = 0;
859 break;
860 case 10: out1[3] = __byte_perm (in1[0], in1[1], 0x5432);
861 out1[2] = __byte_perm (in0[3], in1[0], 0x5432);
862 out1[1] = __byte_perm (in0[2], in0[3], 0x5432);
863 out1[0] = __byte_perm (in0[1], in0[2], 0x5432);
864 out0[3] = __byte_perm (in0[0], in0[1], 0x5432);
865 out0[2] = __byte_perm ( 0, in0[0], 0x5432);
866 out0[1] = 0;
867 out0[0] = 0;
868 break;
869 case 11: out1[3] = __byte_perm (in1[0], in1[1], 0x4321);
870 out1[2] = __byte_perm (in0[3], in1[0], 0x4321);
871 out1[1] = __byte_perm (in0[2], in0[3], 0x4321);
872 out1[0] = __byte_perm (in0[1], in0[2], 0x4321);
873 out0[3] = __byte_perm (in0[0], in0[1], 0x4321);
874 out0[2] = __byte_perm ( 0, in0[0], 0x4321);
875 out0[1] = 0;
876 out0[0] = 0;
877 break;
878 case 12: out1[3] = in1[0];
879 out1[2] = in0[3];
880 out1[1] = in0[2];
881 out1[0] = in0[1];
882 out0[3] = in0[0];
883 out0[2] = 0;
884 out0[1] = 0;
885 out0[0] = 0;
886 break;
887 case 13: out1[3] = __byte_perm (in0[3], in1[0], 0x6543);
888 out1[2] = __byte_perm (in0[2], in0[3], 0x6543);
889 out1[1] = __byte_perm (in0[1], in0[2], 0x6543);
890 out1[0] = __byte_perm (in0[0], in0[1], 0x6543);
891 out0[3] = __byte_perm ( 0, in0[0], 0x6543);
892 out0[2] = 0;
893 out0[1] = 0;
894 out0[0] = 0;
895 break;
896 case 14: out1[3] = __byte_perm (in0[3], in1[0], 0x5432);
897 out1[2] = __byte_perm (in0[2], in0[3], 0x5432);
898 out1[1] = __byte_perm (in0[1], in0[2], 0x5432);
899 out1[0] = __byte_perm (in0[0], in0[1], 0x5432);
900 out0[3] = __byte_perm ( 0, in0[0], 0x5432);
901 out0[2] = 0;
902 out0[1] = 0;
903 out0[0] = 0;
904 break;
905 case 15: out1[3] = __byte_perm (in0[3], in1[0], 0x4321);
906 out1[2] = __byte_perm (in0[2], in0[3], 0x4321);
907 out1[1] = __byte_perm (in0[1], in0[2], 0x4321);
908 out1[0] = __byte_perm (in0[0], in0[1], 0x4321);
909 out0[3] = __byte_perm ( 0, in0[0], 0x4321);
910 out0[2] = 0;
911 out0[1] = 0;
912 out0[0] = 0;
913 break;
914 case 16: out1[3] = in0[3];
915 out1[2] = in0[2];
916 out1[1] = in0[1];
917 out1[0] = in0[0];
918 out0[3] = 0;
919 out0[2] = 0;
920 out0[1] = 0;
921 out0[0] = 0;
922 break;
923 case 17: out1[3] = __byte_perm (in0[2], in0[3], 0x6543);
924 out1[2] = __byte_perm (in0[1], in0[2], 0x6543);
925 out1[1] = __byte_perm (in0[0], in0[1], 0x6543);
926 out1[0] = __byte_perm ( 0, in0[0], 0x6543);
927 out0[3] = 0;
928 out0[2] = 0;
929 out0[1] = 0;
930 out0[0] = 0;
931 break;
932 case 18: out1[3] = __byte_perm (in0[2], in0[3], 0x5432);
933 out1[2] = __byte_perm (in0[1], in0[2], 0x5432);
934 out1[1] = __byte_perm (in0[0], in0[1], 0x5432);
935 out1[0] = __byte_perm ( 0, in0[0], 0x5432);
936 out0[3] = 0;
937 out0[2] = 0;
938 out0[1] = 0;
939 out0[0] = 0;
940 break;
941 case 19: out1[3] = __byte_perm (in0[2], in0[3], 0x4321);
942 out1[2] = __byte_perm (in0[1], in0[2], 0x4321);
943 out1[1] = __byte_perm (in0[0], in0[1], 0x4321);
944 out1[0] = __byte_perm ( 0, in0[0], 0x4321);
945 out0[3] = 0;
946 out0[2] = 0;
947 out0[1] = 0;
948 out0[0] = 0;
949 break;
950 case 20: out1[3] = in0[2];
951 out1[2] = in0[1];
952 out1[1] = in0[0];
953 out1[0] = 0;
954 out0[3] = 0;
955 out0[2] = 0;
956 out0[1] = 0;
957 out0[0] = 0;
958 break;
959 case 21: out1[3] = __byte_perm (in0[1], in0[2], 0x6543);
960 out1[2] = __byte_perm (in0[0], in0[1], 0x6543);
961 out1[1] = __byte_perm ( 0, in0[0], 0x6543);
962 out1[0] = 0;
963 out0[3] = 0;
964 out0[2] = 0;
965 out0[1] = 0;
966 out0[0] = 0;
967 break;
968 case 22: out1[3] = __byte_perm (in0[1], in0[2], 0x5432);
969 out1[2] = __byte_perm (in0[0], in0[1], 0x5432);
970 out1[1] = __byte_perm ( 0, in0[0], 0x5432);
971 out1[0] = 0;
972 out0[3] = 0;
973 out0[2] = 0;
974 out0[1] = 0;
975 out0[0] = 0;
976 break;
977 case 23: out1[3] = __byte_perm (in0[1], in0[2], 0x4321);
978 out1[2] = __byte_perm (in0[0], in0[1], 0x4321);
979 out1[1] = __byte_perm ( 0, in0[0], 0x4321);
980 out1[0] = 0;
981 out0[3] = 0;
982 out0[2] = 0;
983 out0[1] = 0;
984 out0[0] = 0;
985 break;
986 case 24: out1[3] = in0[1];
987 out1[2] = in0[0];
988 out1[1] = 0;
989 out1[0] = 0;
990 out0[3] = 0;
991 out0[2] = 0;
992 out0[1] = 0;
993 out0[0] = 0;
994 break;
995 case 25: out1[3] = __byte_perm (in0[0], in0[1], 0x6543);
996 out1[2] = __byte_perm ( 0, in0[0], 0x6543);
997 out1[1] = 0;
998 out1[0] = 0;
999 out0[3] = 0;
1000 out0[2] = 0;
1001 out0[1] = 0;
1002 out0[0] = 0;
1003 break;
1004 case 26: out1[3] = __byte_perm (in0[0], in0[1], 0x5432);
1005 out1[2] = __byte_perm ( 0, in0[0], 0x5432);
1006 out1[1] = 0;
1007 out1[0] = 0;
1008 out0[3] = 0;
1009 out0[2] = 0;
1010 out0[1] = 0;
1011 out0[0] = 0;
1012 break;
1013 case 27: out1[3] = __byte_perm (in0[0], in0[1], 0x4321);
1014 out1[2] = __byte_perm ( 0, in0[0], 0x4321);
1015 out1[1] = 0;
1016 out1[0] = 0;
1017 out0[3] = 0;
1018 out0[2] = 0;
1019 out0[1] = 0;
1020 out0[0] = 0;
1021 break;
1022 case 28: out1[3] = in0[0];
1023 out1[2] = 0;
1024 out1[1] = 0;
1025 out1[0] = 0;
1026 out0[3] = 0;
1027 out0[2] = 0;
1028 out0[1] = 0;
1029 out0[0] = 0;
1030 break;
1031 case 29: out1[3] = __byte_perm ( 0, in0[0], 0x6543);
1032 out1[2] = 0;
1033 out1[1] = 0;
1034 out1[0] = 0;
1035 out0[3] = 0;
1036 out0[2] = 0;
1037 out0[1] = 0;
1038 out0[0] = 0;
1039 break;
1040 case 30: out1[3] = __byte_perm ( 0, in0[0], 0x5432);
1041 out1[2] = 0;
1042 out1[1] = 0;
1043 out1[0] = 0;
1044 out0[3] = 0;
1045 out0[2] = 0;
1046 out0[1] = 0;
1047 out0[0] = 0;
1048 break;
1049 case 31: out1[3] = __byte_perm ( 0, in0[0], 0x4321);
1050 out1[2] = 0;
1051 out1[1] = 0;
1052 out1[0] = 0;
1053 out0[3] = 0;
1054 out0[2] = 0;
1055 out0[1] = 0;
1056 out0[0] = 0;
1057 break;
1058 }
1059 #endif
1060
1061 #if defined IS_AMD || defined IS_GENERIC
1062 switch (num)
1063 {
1064 case 0: out1[3] = in1[3];
1065 out1[2] = in1[2];
1066 out1[1] = in1[1];
1067 out1[0] = in1[0];
1068 out0[3] = in0[3];
1069 out0[2] = in0[2];
1070 out0[1] = in0[1];
1071 out0[0] = in0[0];
1072 break;
1073 case 1: out1[3] = amd_bytealign (in1[3], in1[2], 3);
1074 out1[2] = amd_bytealign (in1[2], in1[1], 3);
1075 out1[1] = amd_bytealign (in1[1], in1[0], 3);
1076 out1[0] = amd_bytealign (in1[0], in0[3], 3);
1077 out0[3] = amd_bytealign (in0[3], in0[2], 3);
1078 out0[2] = amd_bytealign (in0[2], in0[1], 3);
1079 out0[1] = amd_bytealign (in0[1], in0[0], 3);
1080 out0[0] = amd_bytealign (in0[0], 0, 3);
1081 break;
1082 case 2: out1[3] = amd_bytealign (in1[3], in1[2], 2);
1083 out1[2] = amd_bytealign (in1[2], in1[1], 2);
1084 out1[1] = amd_bytealign (in1[1], in1[0], 2);
1085 out1[0] = amd_bytealign (in1[0], in0[3], 2);
1086 out0[3] = amd_bytealign (in0[3], in0[2], 2);
1087 out0[2] = amd_bytealign (in0[2], in0[1], 2);
1088 out0[1] = amd_bytealign (in0[1], in0[0], 2);
1089 out0[0] = amd_bytealign (in0[0], 0, 2);
1090 break;
1091 case 3: out1[3] = amd_bytealign (in1[3], in1[2], 1);
1092 out1[2] = amd_bytealign (in1[2], in1[1], 1);
1093 out1[1] = amd_bytealign (in1[1], in1[0], 1);
1094 out1[0] = amd_bytealign (in1[0], in0[3], 1);
1095 out0[3] = amd_bytealign (in0[3], in0[2], 1);
1096 out0[2] = amd_bytealign (in0[2], in0[1], 1);
1097 out0[1] = amd_bytealign (in0[1], in0[0], 1);
1098 out0[0] = amd_bytealign (in0[0], 0, 1);
1099 break;
1100 case 4: out1[3] = in1[2];
1101 out1[2] = in1[1];
1102 out1[1] = in1[0];
1103 out1[0] = in0[3];
1104 out0[3] = in0[2];
1105 out0[2] = in0[1];
1106 out0[1] = in0[0];
1107 out0[0] = 0;
1108 break;
1109 case 5: out1[3] = amd_bytealign (in1[2], in1[1], 3);
1110 out1[2] = amd_bytealign (in1[1], in1[0], 3);
1111 out1[1] = amd_bytealign (in1[0], in0[3], 3);
1112 out1[0] = amd_bytealign (in0[3], in0[2], 3);
1113 out0[3] = amd_bytealign (in0[2], in0[1], 3);
1114 out0[2] = amd_bytealign (in0[1], in0[0], 3);
1115 out0[1] = amd_bytealign (in0[0], 0, 3);
1116 out0[0] = 0;
1117 break;
1118 case 6: out1[3] = amd_bytealign (in1[2], in1[1], 2);
1119 out1[2] = amd_bytealign (in1[1], in1[0], 2);
1120 out1[1] = amd_bytealign (in1[0], in0[3], 2);
1121 out1[0] = amd_bytealign (in0[3], in0[2], 2);
1122 out0[3] = amd_bytealign (in0[2], in0[1], 2);
1123 out0[2] = amd_bytealign (in0[1], in0[0], 2);
1124 out0[1] = amd_bytealign (in0[0], 0, 2);
1125 out0[0] = 0;
1126 break;
1127 case 7: out1[3] = amd_bytealign (in1[2], in1[1], 1);
1128 out1[2] = amd_bytealign (in1[1], in1[0], 1);
1129 out1[1] = amd_bytealign (in1[0], in0[3], 1);
1130 out1[0] = amd_bytealign (in0[3], in0[2], 1);
1131 out0[3] = amd_bytealign (in0[2], in0[1], 1);
1132 out0[2] = amd_bytealign (in0[1], in0[0], 1);
1133 out0[1] = amd_bytealign (in0[0], 0, 1);
1134 out0[0] = 0;
1135 break;
1136 case 8: out1[3] = in1[1];
1137 out1[2] = in1[0];
1138 out1[1] = in0[3];
1139 out1[0] = in0[2];
1140 out0[3] = in0[1];
1141 out0[2] = in0[0];
1142 out0[1] = 0;
1143 out0[0] = 0;
1144 break;
1145 case 9: out1[3] = amd_bytealign (in1[1], in1[0], 3);
1146 out1[2] = amd_bytealign (in1[0], in0[3], 3);
1147 out1[1] = amd_bytealign (in0[3], in0[2], 3);
1148 out1[0] = amd_bytealign (in0[2], in0[1], 3);
1149 out0[3] = amd_bytealign (in0[1], in0[0], 3);
1150 out0[2] = amd_bytealign (in0[0], 0, 3);
1151 out0[1] = 0;
1152 out0[0] = 0;
1153 break;
1154 case 10: out1[3] = amd_bytealign (in1[1], in1[0], 2);
1155 out1[2] = amd_bytealign (in1[0], in0[3], 2);
1156 out1[1] = amd_bytealign (in0[3], in0[2], 2);
1157 out1[0] = amd_bytealign (in0[2], in0[1], 2);
1158 out0[3] = amd_bytealign (in0[1], in0[0], 2);
1159 out0[2] = amd_bytealign (in0[0], 0, 2);
1160 out0[1] = 0;
1161 out0[0] = 0;
1162 break;
1163 case 11: out1[3] = amd_bytealign (in1[1], in1[0], 1);
1164 out1[2] = amd_bytealign (in1[0], in0[3], 1);
1165 out1[1] = amd_bytealign (in0[3], in0[2], 1);
1166 out1[0] = amd_bytealign (in0[2], in0[1], 1);
1167 out0[3] = amd_bytealign (in0[1], in0[0], 1);
1168 out0[2] = amd_bytealign (in0[0], 0, 1);
1169 out0[1] = 0;
1170 out0[0] = 0;
1171 break;
1172 case 12: out1[3] = in1[0];
1173 out1[2] = in0[3];
1174 out1[1] = in0[2];
1175 out1[0] = in0[1];
1176 out0[3] = in0[0];
1177 out0[2] = 0;
1178 out0[1] = 0;
1179 out0[0] = 0;
1180 break;
1181 case 13: out1[3] = amd_bytealign (in1[0], in0[3], 3);
1182 out1[2] = amd_bytealign (in0[3], in0[2], 3);
1183 out1[1] = amd_bytealign (in0[2], in0[1], 3);
1184 out1[0] = amd_bytealign (in0[1], in0[0], 3);
1185 out0[3] = amd_bytealign (in0[0], 0, 3);
1186 out0[2] = 0;
1187 out0[1] = 0;
1188 out0[0] = 0;
1189 break;
1190 case 14: out1[3] = amd_bytealign (in1[0], in0[3], 2);
1191 out1[2] = amd_bytealign (in0[3], in0[2], 2);
1192 out1[1] = amd_bytealign (in0[2], in0[1], 2);
1193 out1[0] = amd_bytealign (in0[1], in0[0], 2);
1194 out0[3] = amd_bytealign (in0[0], 0, 2);
1195 out0[2] = 0;
1196 out0[1] = 0;
1197 out0[0] = 0;
1198 break;
1199 case 15: out1[3] = amd_bytealign (in1[0], in0[3], 1);
1200 out1[2] = amd_bytealign (in0[3], in0[2], 1);
1201 out1[1] = amd_bytealign (in0[2], in0[1], 1);
1202 out1[0] = amd_bytealign (in0[1], in0[0], 1);
1203 out0[3] = amd_bytealign (in0[0], 0, 1);
1204 out0[2] = 0;
1205 out0[1] = 0;
1206 out0[0] = 0;
1207 break;
1208 case 16: out1[3] = in0[3];
1209 out1[2] = in0[2];
1210 out1[1] = in0[1];
1211 out1[0] = in0[0];
1212 out0[3] = 0;
1213 out0[2] = 0;
1214 out0[1] = 0;
1215 out0[0] = 0;
1216 break;
1217 case 17: out1[3] = amd_bytealign (in0[3], in0[2], 3);
1218 out1[2] = amd_bytealign (in0[2], in0[1], 3);
1219 out1[1] = amd_bytealign (in0[1], in0[0], 3);
1220 out1[0] = amd_bytealign (in0[0], 0, 3);
1221 out0[3] = 0;
1222 out0[2] = 0;
1223 out0[1] = 0;
1224 out0[0] = 0;
1225 break;
1226 case 18: out1[3] = amd_bytealign (in0[3], in0[2], 2);
1227 out1[2] = amd_bytealign (in0[2], in0[1], 2);
1228 out1[1] = amd_bytealign (in0[1], in0[0], 2);
1229 out1[0] = amd_bytealign (in0[0], 0, 2);
1230 out0[3] = 0;
1231 out0[2] = 0;
1232 out0[1] = 0;
1233 out0[0] = 0;
1234 break;
1235 case 19: out1[3] = amd_bytealign (in0[3], in0[2], 1);
1236 out1[2] = amd_bytealign (in0[2], in0[1], 1);
1237 out1[1] = amd_bytealign (in0[1], in0[0], 1);
1238 out1[0] = amd_bytealign (in0[0], 0, 1);
1239 out0[3] = 0;
1240 out0[2] = 0;
1241 out0[1] = 0;
1242 out0[0] = 0;
1243 break;
1244 case 20: out1[3] = in0[2];
1245 out1[2] = in0[1];
1246 out1[1] = in0[0];
1247 out1[0] = 0;
1248 out0[3] = 0;
1249 out0[2] = 0;
1250 out0[1] = 0;
1251 out0[0] = 0;
1252 break;
1253 case 21: out1[3] = amd_bytealign (in0[2], in0[1], 3);
1254 out1[2] = amd_bytealign (in0[1], in0[0], 3);
1255 out1[1] = amd_bytealign (in0[0], 0, 3);
1256 out1[0] = 0;
1257 out0[3] = 0;
1258 out0[2] = 0;
1259 out0[1] = 0;
1260 out0[0] = 0;
1261 break;
1262 case 22: out1[3] = amd_bytealign (in0[2], in0[1], 2);
1263 out1[2] = amd_bytealign (in0[1], in0[0], 2);
1264 out1[1] = amd_bytealign (in0[0], 0, 2);
1265 out1[0] = 0;
1266 out0[3] = 0;
1267 out0[2] = 0;
1268 out0[1] = 0;
1269 out0[0] = 0;
1270 break;
1271 case 23: out1[3] = amd_bytealign (in0[2], in0[1], 1);
1272 out1[2] = amd_bytealign (in0[1], in0[0], 1);
1273 out1[1] = amd_bytealign (in0[0], 0, 1);
1274 out1[0] = 0;
1275 out0[3] = 0;
1276 out0[2] = 0;
1277 out0[1] = 0;
1278 out0[0] = 0;
1279 break;
1280 case 24: out1[3] = in0[1];
1281 out1[2] = in0[0];
1282 out1[1] = 0;
1283 out1[0] = 0;
1284 out0[3] = 0;
1285 out0[2] = 0;
1286 out0[1] = 0;
1287 out0[0] = 0;
1288 break;
1289 case 25: out1[3] = amd_bytealign (in0[1], in0[0], 3);
1290 out1[2] = amd_bytealign (in0[0], 0, 3);
1291 out1[1] = 0;
1292 out1[0] = 0;
1293 out0[3] = 0;
1294 out0[2] = 0;
1295 out0[1] = 0;
1296 out0[0] = 0;
1297 break;
1298 case 26: out1[3] = amd_bytealign (in0[1], in0[0], 2);
1299 out1[2] = amd_bytealign (in0[0], 0, 2);
1300 out1[1] = 0;
1301 out1[0] = 0;
1302 out0[3] = 0;
1303 out0[2] = 0;
1304 out0[1] = 0;
1305 out0[0] = 0;
1306 break;
1307 case 27: out1[3] = amd_bytealign (in0[1], in0[0], 1);
1308 out1[2] = amd_bytealign (in0[0], 0, 1);
1309 out1[1] = 0;
1310 out1[0] = 0;
1311 out0[3] = 0;
1312 out0[2] = 0;
1313 out0[1] = 0;
1314 out0[0] = 0;
1315 break;
1316 case 28: out1[3] = in0[0];
1317 out1[2] = 0;
1318 out1[1] = 0;
1319 out1[0] = 0;
1320 out0[3] = 0;
1321 out0[2] = 0;
1322 out0[1] = 0;
1323 out0[0] = 0;
1324 break;
1325 case 29: out1[3] = amd_bytealign (in0[0], 0, 3);
1326 out1[2] = 0;
1327 out1[1] = 0;
1328 out1[0] = 0;
1329 out0[3] = 0;
1330 out0[2] = 0;
1331 out0[1] = 0;
1332 out0[0] = 0;
1333 break;
1334 case 30: out1[3] = amd_bytealign (in0[0], 0, 2);
1335 out1[2] = 0;
1336 out1[1] = 0;
1337 out1[0] = 0;
1338 out0[3] = 0;
1339 out0[2] = 0;
1340 out0[1] = 0;
1341 out0[0] = 0;
1342 break;
1343 case 31: out1[3] = amd_bytealign (in0[0], 0, 1);
1344 out1[2] = 0;
1345 out1[1] = 0;
1346 out1[0] = 0;
1347 out0[3] = 0;
1348 out0[2] = 0;
1349 out0[1] = 0;
1350 out0[0] = 0;
1351 break;
1352 }
1353 #endif
1354 }
1355
1356 static void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
1357 {
1358 u32 tmp[2];
1359
1360 switch (offset & 3)
1361 {
1362 case 0: tmp[0] = src_r0;
1363 tmp[1] = 0;
1364 break;
1365 case 1: tmp[0] = src_r0 << 8;
1366 tmp[1] = src_r0 >> 24;
1367 break;
1368 case 2: tmp[0] = src_r0 << 16;
1369 tmp[1] = src_r0 >> 16;
1370 break;
1371 case 3: tmp[0] = src_r0 << 24;
1372 tmp[1] = src_r0 >> 8;
1373 break;
1374 }
1375
1376 switch (offset / 4)
1377 {
1378 case 0: dst0[0] |= tmp[0];
1379 dst0[1] = tmp[1];
1380 break;
1381 case 1: dst0[1] |= tmp[0];
1382 dst0[2] = tmp[1];
1383 break;
1384 case 2: dst0[2] |= tmp[0];
1385 dst0[3] = tmp[1];
1386 break;
1387 case 3: dst0[3] |= tmp[0];
1388 dst1[0] = tmp[1];
1389 break;
1390 case 4: dst1[0] |= tmp[0];
1391 dst1[1] = tmp[1];
1392 break;
1393 case 5: dst1[1] |= tmp[0];
1394 dst1[2] = tmp[1];
1395 break;
1396 case 6: dst1[2] |= tmp[0];
1397 dst1[3] = tmp[1];
1398 break;
1399 case 7: dst1[3] |= tmp[0];
1400 break;
1401 }
1402 }
1403
1404 static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
1405 {
1406 #ifdef IS_NV
1407 switch (offset)
1408 {
1409 case 0:
1410 dst0[0] = src_r0[0];
1411 dst0[1] = src_r0[1];
1412 dst0[2] = src_r0[2];
1413 dst0[3] = src_r0[3];
1414 dst1[0] = src_r1[0];
1415 dst1[1] = src_r1[1];
1416 dst1[2] = src_r1[2];
1417 dst1[3] = src_r1[3];
1418 break;
1419
1420 case 1:
1421 dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x6540);
1422 dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1423 dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1424 dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1425 dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1426 dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
1427 dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x6543);
1428 dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x6543);
1429 break;
1430
1431 case 2:
1432 dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x5410);
1433 dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1434 dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1435 dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1436 dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1437 dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
1438 dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x5432);
1439 dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x5432);
1440 break;
1441
1442 case 3:
1443 dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x4210);
1444 dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1445 dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1446 dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1447 dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1448 dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
1449 dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x4321);
1450 dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x4321);
1451 break;
1452
1453 case 4:
1454 dst0[1] = src_r0[0];
1455 dst0[2] = src_r0[1];
1456 dst0[3] = src_r0[2];
1457 dst1[0] = src_r0[3];
1458 dst1[1] = src_r1[0];
1459 dst1[2] = src_r1[1];
1460 dst1[3] = src_r1[2];
1461 break;
1462
1463 case 5:
1464 dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x6540);
1465 dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1466 dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1467 dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1468 dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1469 dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
1470 dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x6543);
1471 break;
1472
1473 case 6:
1474 dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x5410);
1475 dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1476 dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1477 dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1478 dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1479 dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
1480 dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x5432);
1481 break;
1482
1483 case 7:
1484 dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x4210);
1485 dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1486 dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1487 dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1488 dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1489 dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
1490 dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x4321);
1491 break;
1492
1493 case 8:
1494 dst0[2] = src_r0[0];
1495 dst0[3] = src_r0[1];
1496 dst1[0] = src_r0[2];
1497 dst1[1] = src_r0[3];
1498 dst1[2] = src_r1[0];
1499 dst1[3] = src_r1[1];
1500 break;
1501
1502 case 9:
1503 dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x6540);
1504 dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1505 dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1506 dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1507 dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1508 dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
1509 break;
1510
1511 case 10:
1512 dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x5410);
1513 dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1514 dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1515 dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1516 dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1517 dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
1518 break;
1519
1520 case 11:
1521 dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x4210);
1522 dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1523 dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1524 dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1525 dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1526 dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
1527 break;
1528
1529 case 12:
1530 dst0[3] = src_r0[0];
1531 dst1[0] = src_r0[1];
1532 dst1[1] = src_r0[2];
1533 dst1[2] = src_r0[3];
1534 dst1[3] = src_r1[0];
1535 break;
1536
1537 case 13:
1538 dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x6540);
1539 dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1540 dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1541 dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1542 dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1543 break;
1544
1545 case 14:
1546 dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x5410);
1547 dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1548 dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1549 dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1550 dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1551 break;
1552
1553 case 15:
1554 dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x4210);
1555 dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1556 dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1557 dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1558 dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1559 break;
1560
1561 case 16:
1562 dst1[0] = src_r0[0];
1563 dst1[1] = src_r0[1];
1564 dst1[2] = src_r0[2];
1565 dst1[3] = src_r0[3];
1566 break;
1567
1568 case 17:
1569 dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x6540);
1570 dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1571 dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1572 dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1573 break;
1574
1575 case 18:
1576 dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x5410);
1577 dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1578 dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1579 dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1580 break;
1581
1582 case 19:
1583 dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x4210);
1584 dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1585 dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1586 dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1587 break;
1588
1589 case 20:
1590 dst1[1] = src_r0[0];
1591 dst1[2] = src_r0[1];
1592 dst1[3] = src_r0[2];
1593 break;
1594
1595 case 21:
1596 dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x6540);
1597 dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1598 dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1599 break;
1600
1601 case 22:
1602 dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x5410);
1603 dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1604 dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1605 break;
1606
1607 case 23:
1608 dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x4210);
1609 dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1610 dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1611 break;
1612
1613 case 24:
1614 dst1[2] = src_r0[0];
1615 dst1[3] = src_r0[1];
1616 break;
1617
1618 case 25:
1619 dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x6540);
1620 dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1621 break;
1622
1623 case 26:
1624 dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x5410);
1625 dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1626 break;
1627
1628 case 27:
1629 dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x4210);
1630 dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1631 break;
1632
1633 case 28:
1634 dst1[3] = src_r0[0];
1635 break;
1636
1637 case 29:
1638 dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x6540);
1639 break;
1640
1641 case 30:
1642 dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x5410);
1643 break;
1644
1645 case 31:
1646 dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x4210);
1647 break;
1648 }
1649 #endif
1650
1651 #if defined IS_AMD || defined IS_GENERIC
1652 switch (offset)
1653 {
1654 case 0:
1655 dst0[0] = src_r0[0];
1656 dst0[1] = src_r0[1];
1657 dst0[2] = src_r0[2];
1658 dst0[3] = src_r0[3];
1659 dst1[0] = src_r1[0];
1660 dst1[1] = src_r1[1];
1661 dst1[2] = src_r1[2];
1662 dst1[3] = src_r1[3];
1663 break;
1664
1665 case 1:
1666 dst0[0] = src_l0[0]
1667 | src_r0[0] << 8;
1668 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
1669 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
1670 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
1671 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 3);
1672 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 3);
1673 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 3);
1674 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 3);
1675 break;
1676
1677 case 2:
1678 dst0[0] = src_l0[0]
1679 | src_r0[0] << 16;
1680 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
1681 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
1682 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
1683 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 2);
1684 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 2);
1685 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 2);
1686 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 2);
1687 break;
1688
1689 case 3:
1690 dst0[0] = src_l0[0]
1691 | src_r0[0] << 24;
1692 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
1693 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
1694 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
1695 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 1);
1696 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 1);
1697 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 1);
1698 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 1);
1699 break;
1700
1701 case 4:
1702 dst0[1] = src_r0[0];
1703 dst0[2] = src_r0[1];
1704 dst0[3] = src_r0[2];
1705 dst1[0] = src_r0[3];
1706 dst1[1] = src_r1[0];
1707 dst1[2] = src_r1[1];
1708 dst1[3] = src_r1[2];
1709 break;
1710
1711 case 5:
1712 dst0[1] = src_l0[1]
1713 | src_r0[0] << 8;
1714 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
1715 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
1716 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 3);
1717 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 3);
1718 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 3);
1719 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 3);
1720 break;
1721
1722 case 6:
1723 dst0[1] = src_l0[1]
1724 | src_r0[0] << 16;
1725 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
1726 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
1727 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 2);
1728 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 2);
1729 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 2);
1730 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 2);
1731 break;
1732
1733 case 7:
1734 dst0[1] = src_l0[1]
1735 | src_r0[0] << 24;
1736 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
1737 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
1738 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 1);
1739 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 1);
1740 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 1);
1741 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 1);
1742 break;
1743
1744 case 8:
1745 dst0[2] = src_r0[0];
1746 dst0[3] = src_r0[1];
1747 dst1[0] = src_r0[2];
1748 dst1[1] = src_r0[3];
1749 dst1[2] = src_r1[0];
1750 dst1[3] = src_r1[1];
1751 break;
1752
1753 case 9:
1754 dst0[2] = src_l0[2]
1755 | src_r0[0] << 8;
1756 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
1757 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 3);
1758 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 3);
1759 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 3);
1760 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 3);
1761 break;
1762
1763 case 10:
1764 dst0[2] = src_l0[2]
1765 | src_r0[0] << 16;
1766 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
1767 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 2);
1768 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 2);
1769 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 2);
1770 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 2);
1771 break;
1772
1773 case 11:
1774 dst0[2] = src_l0[2]
1775 | src_r0[0] << 24;
1776 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
1777 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 1);
1778 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 1);
1779 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 1);
1780 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 1);
1781 break;
1782
1783 case 12:
1784 dst0[3] = src_r0[0];
1785 dst1[0] = src_r0[1];
1786 dst1[1] = src_r0[2];
1787 dst1[2] = src_r0[3];
1788 dst1[3] = src_r1[0];
1789 break;
1790
1791 case 13:
1792 dst0[3] = src_l0[3]
1793 | src_r0[0] << 8;
1794 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 3);
1795 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 3);
1796 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 3);
1797 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 3);
1798 break;
1799
1800 case 14:
1801 dst0[3] = src_l0[3]
1802 | src_r0[0] << 16;
1803 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 2);
1804 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 2);
1805 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 2);
1806 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 2);
1807 break;
1808
1809 case 15:
1810 dst0[3] = src_l0[3]
1811 | src_r0[0] << 24;
1812 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 1);
1813 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 1);
1814 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 1);
1815 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 1);
1816 break;
1817
1818 case 16:
1819 dst1[0] = src_r0[0];
1820 dst1[1] = src_r0[1];
1821 dst1[2] = src_r0[2];
1822 dst1[3] = src_r0[3];
1823 break;
1824
1825 case 17:
1826 dst1[0] = src_l1[0]
1827 | src_r0[0] << 8;
1828 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
1829 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
1830 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
1831 break;
1832
1833 case 18:
1834 dst1[0] = src_l1[0]
1835 | src_r0[0] << 16;
1836 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
1837 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
1838 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
1839 break;
1840
1841 case 19:
1842 dst1[0] = src_l1[0]
1843 | src_r0[0] << 24;
1844 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
1845 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
1846 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
1847 break;
1848
1849 case 20:
1850 dst1[1] = src_r0[0];
1851 dst1[2] = src_r0[1];
1852 dst1[3] = src_r0[2];
1853 break;
1854
1855 case 21:
1856 dst1[1] = src_l1[1]
1857 | src_r0[0] << 8;
1858 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
1859 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
1860 break;
1861
1862 case 22:
1863 dst1[1] = src_l1[1]
1864 | src_r0[0] << 16;
1865 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
1866 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
1867 break;
1868
1869 case 23:
1870 dst1[1] = src_l1[1]
1871 | src_r0[0] << 24;
1872 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
1873 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
1874 break;
1875
1876 case 24:
1877 dst1[2] = src_r0[0];
1878 dst1[3] = src_r0[1];
1879 break;
1880
1881 case 25:
1882 dst1[2] = src_l1[2]
1883 | src_r0[0] << 8;
1884 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
1885 break;
1886
1887 case 26:
1888 dst1[2] = src_l1[2]
1889 | src_r0[0] << 16;
1890 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
1891 break;
1892
1893 case 27:
1894 dst1[2] = src_l1[2]
1895 | src_r0[0] << 24;
1896 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
1897 break;
1898
1899 case 28:
1900 dst1[3] = src_r0[0];
1901 break;
1902
1903 case 29:
1904 dst1[3] = src_l1[3]
1905 | src_r0[0] << 8;
1906 break;
1907
1908 case 30:
1909 dst1[3] = src_l1[3]
1910 | src_r0[0] << 16;
1911 break;
1912
1913 case 31:
1914 dst1[3] = src_l1[3]
1915 | src_r0[0] << 24;
1916 break;
1917 }
1918 #endif
1919 }
1920
1921 static void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
1922 {
1923 rshift_block_N (in0, in1, out0, out1, 32 - len);
1924
1925 u32 tib40[4];
1926 u32 tib41[4];
1927
1928 tib40[0] = out1[3];
1929 tib40[1] = out1[2];
1930 tib40[2] = out1[1];
1931 tib40[3] = out1[0];
1932 tib41[0] = out0[3];
1933 tib41[1] = out0[2];
1934 tib41[2] = out0[1];
1935 tib41[3] = out0[0];
1936
1937 out0[0] = swap32 (tib40[0]);
1938 out0[1] = swap32 (tib40[1]);
1939 out0[2] = swap32 (tib40[2]);
1940 out0[3] = swap32 (tib40[3]);
1941 out1[0] = swap32 (tib41[0]);
1942 out1[1] = swap32 (tib41[1]);
1943 out1[2] = swap32 (tib41[2]);
1944 out1[3] = swap32 (tib41[3]);
1945 }
1946
1947 static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1948 {
1949 buf0[0] |= (generate_cmask (buf0[0]));
1950 buf0[1] |= (generate_cmask (buf0[1]));
1951 buf0[2] |= (generate_cmask (buf0[2]));
1952 buf0[3] |= (generate_cmask (buf0[3]));
1953 buf1[0] |= (generate_cmask (buf1[0]));
1954 buf1[1] |= (generate_cmask (buf1[1]));
1955 buf1[2] |= (generate_cmask (buf1[2]));
1956 buf1[3] |= (generate_cmask (buf1[3]));
1957
1958 return in_len;
1959 }
1960
1961 static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1962 {
1963 buf0[0] &= ~(generate_cmask (buf0[0]));
1964 buf0[1] &= ~(generate_cmask (buf0[1]));
1965 buf0[2] &= ~(generate_cmask (buf0[2]));
1966 buf0[3] &= ~(generate_cmask (buf0[3]));
1967 buf1[0] &= ~(generate_cmask (buf1[0]));
1968 buf1[1] &= ~(generate_cmask (buf1[1]));
1969 buf1[2] &= ~(generate_cmask (buf1[2]));
1970 buf1[3] &= ~(generate_cmask (buf1[3]));
1971
1972 return in_len;
1973 }
1974
1975 static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1976 {
1977 rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len);
1978
1979 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
1980
1981 return in_len;
1982 }
1983
1984 static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1985 {
1986 rule_op_mangle_urest (p0, p1, buf0, buf1, in_len);
1987
1988 buf0[0] |= (0x00000020 & generate_cmask (buf0[0]));
1989
1990 return in_len;
1991 }
1992
1993 static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1994 {
1995 buf0[0] ^= (generate_cmask (buf0[0]));
1996 buf0[1] ^= (generate_cmask (buf0[1]));
1997 buf0[2] ^= (generate_cmask (buf0[2]));
1998 buf0[3] ^= (generate_cmask (buf0[3]));
1999 buf1[0] ^= (generate_cmask (buf1[0]));
2000 buf1[1] ^= (generate_cmask (buf1[1]));
2001 buf1[2] ^= (generate_cmask (buf1[2]));
2002 buf1[3] ^= (generate_cmask (buf1[3]));
2003
2004 return in_len;
2005 }
2006
2007 static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2008 {
2009 if (p0 >= in_len) return (in_len);
2010
2011 const u32 tmp = 0x20u << ((p0 & 3) * 8);
2012
2013 switch (p0 / 4)
2014 {
2015 case 0: buf0[0] ^= (tmp & generate_cmask (buf0[0])); break;
2016 case 1: buf0[1] ^= (tmp & generate_cmask (buf0[1])); break;
2017 case 2: buf0[2] ^= (tmp & generate_cmask (buf0[2])); break;
2018 case 3: buf0[3] ^= (tmp & generate_cmask (buf0[3])); break;
2019 case 4: buf1[0] ^= (tmp & generate_cmask (buf1[0])); break;
2020 case 5: buf1[1] ^= (tmp & generate_cmask (buf1[1])); break;
2021 case 6: buf1[2] ^= (tmp & generate_cmask (buf1[2])); break;
2022 case 7: buf1[3] ^= (tmp & generate_cmask (buf1[3])); break;
2023 }
2024
2025 return in_len;
2026 }
2027
2028 static u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2029 {
2030 reverse_block (buf0, buf1, buf0, buf1, in_len);
2031
2032 return in_len;
2033 }
2034
2035 static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2036 {
2037 if ((in_len + in_len) >= 32) return (in_len);
2038
2039 u32 out_len = in_len;
2040
2041 u32 tib40[4];
2042 u32 tib41[4];
2043
2044 tib40[0] = buf0[0];
2045 tib40[1] = buf0[1];
2046 tib40[2] = buf0[2];
2047 tib40[3] = buf0[3];
2048 tib41[0] = buf1[0];
2049 tib41[1] = buf1[1];
2050 tib41[2] = buf1[2];
2051 tib41[3] = buf1[3];
2052
2053 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2054
2055 out_len += in_len;
2056
2057 return out_len;
2058 }
2059
2060 static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2061 {
2062 if (((in_len * p0) + in_len) >= 32) return (in_len);
2063
2064 u32 out_len = in_len;
2065
2066 u32 tib40[4];
2067 u32 tib41[4];
2068
2069 tib40[0] = buf0[0];
2070 tib40[1] = buf0[1];
2071 tib40[2] = buf0[2];
2072 tib40[3] = buf0[3];
2073 tib41[0] = buf1[0];
2074 tib41[1] = buf1[1];
2075 tib41[2] = buf1[2];
2076 tib41[3] = buf1[3];
2077
2078 for (u32 i = 0; i < p0; i++)
2079 {
2080 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2081
2082 out_len += in_len;
2083 }
2084
2085 return out_len;
2086 }
2087
2088 static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2089 {
2090 if ((in_len + in_len) >= 32) return (in_len);
2091
2092 u32 out_len = in_len;
2093
2094 u32 tib40[4];
2095 u32 tib41[4];
2096
2097 reverse_block (buf0, buf1, tib40, tib41, out_len);
2098
2099 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2100
2101 out_len += in_len;
2102
2103 return out_len;
2104 }
2105
2106 static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2107 {
2108 if ((in_len + 1) >= 32) return (in_len);
2109
2110 u32 out_len = in_len;
2111
2112 append_block1 (out_len, buf0, buf1, p0);
2113
2114 out_len++;
2115
2116 return out_len;
2117 }
2118
2119 static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2120 {
2121 if ((in_len + 1) >= 32) return (in_len);
2122
2123 u32 out_len = in_len;
2124
2125 rshift_block (buf0, buf1, buf0, buf1);
2126
2127 buf0[0] = buf0[0] | p0;
2128
2129 out_len++;
2130
2131 return out_len;
2132 }
2133
2134 static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2135 {
2136 if (in_len == 0) return (in_len);
2137
2138 const u32 in_len1 = in_len - 1;
2139
2140 const u32 sh = (in_len1 & 3) * 8;
2141
2142 const u32 tmp = (buf0[0] & 0xff) << sh;
2143
2144 lshift_block (buf0, buf1, buf0, buf1);
2145
2146 switch (in_len1 / 4)
2147 {
2148 case 0: buf0[0] |= tmp; break;
2149 case 1: buf0[1] |= tmp; break;
2150 case 2: buf0[2] |= tmp; break;
2151 case 3: buf0[3] |= tmp; break;
2152 case 4: buf1[0] |= tmp; break;
2153 case 5: buf1[1] |= tmp; break;
2154 case 6: buf1[2] |= tmp; break;
2155 case 7: buf1[3] |= tmp; break;
2156 }
2157
2158 return in_len;
2159 }
2160
2161 static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2162 {
2163 if (in_len == 0) return (in_len);
2164
2165 const u32 in_len1 = in_len - 1;
2166
2167 const u32 sh = (in_len1 & 3) * 8;
2168
2169 u32 tmp = 0;
2170
2171 switch (in_len1 / 4)
2172 {
2173 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2174 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2175 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2176 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2177 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2178 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2179 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2180 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2181 }
2182
2183 rshift_block (buf0, buf1, buf0, buf1);
2184
2185 buf0[0] |= tmp;
2186
2187 truncate_right (buf0, buf1, in_len);
2188
2189 return in_len;
2190 }
2191
2192 static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2193 {
2194 if (in_len == 0) return (in_len);
2195
2196 const u32 in_len1 = in_len - 1;
2197
2198 lshift_block (buf0, buf1, buf0, buf1);
2199
2200 return in_len1;
2201 }
2202
2203 static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2204 {
2205 if (in_len == 0) return (in_len);
2206
2207 const u32 in_len1 = in_len - 1;
2208
2209 const u32 tmp = (1 << ((in_len1 & 3) * 8)) - 1;
2210
2211 switch (in_len1 / 4)
2212 {
2213 case 0: buf0[0] &= tmp; break;
2214 case 1: buf0[1] &= tmp; break;
2215 case 2: buf0[2] &= tmp; break;
2216 case 3: buf0[3] &= tmp; break;
2217 case 4: buf1[0] &= tmp; break;
2218 case 5: buf1[1] &= tmp; break;
2219 case 6: buf1[2] &= tmp; break;
2220 case 7: buf1[3] &= tmp; break;
2221 }
2222
2223 return in_len1;
2224 }
2225
2226 static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2227 {
2228 if (p0 >= in_len) return (in_len);
2229
2230 u32 out_len = in_len;
2231
2232 u32 tib40[4];
2233 u32 tib41[4];
2234
2235 lshift_block (buf0, buf1, tib40, tib41);
2236
2237 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2238 const u32 mr = ~ml;
2239
2240 switch (p0 / 4)
2241 {
2242 case 0: buf0[0] = (buf0[0] & ml)
2243 | (tib40[0] & mr);
2244 buf0[1] = tib40[1];
2245 buf0[2] = tib40[2];
2246 buf0[3] = tib40[3];
2247 buf1[0] = tib41[0];
2248 buf1[1] = tib41[1];
2249 buf1[2] = tib41[2];
2250 buf1[3] = tib41[3];
2251 break;
2252 case 1: buf0[1] = (buf0[1] & ml)
2253 | (tib40[1] & mr);
2254 buf0[2] = tib40[2];
2255 buf0[3] = tib40[3];
2256 buf1[0] = tib41[0];
2257 buf1[1] = tib41[1];
2258 buf1[2] = tib41[2];
2259 buf1[3] = tib41[3];
2260 break;
2261 case 2: buf0[2] = (buf0[2] & ml)
2262 | (tib40[2] & mr);
2263 buf0[3] = tib40[3];
2264 buf1[0] = tib41[0];
2265 buf1[1] = tib41[1];
2266 buf1[2] = tib41[2];
2267 buf1[3] = tib41[3];
2268 break;
2269 case 3: buf0[3] = (buf0[3] & ml)
2270 | (tib40[3] & mr);
2271 buf1[0] = tib41[0];
2272 buf1[1] = tib41[1];
2273 buf1[2] = tib41[2];
2274 buf1[3] = tib41[3];
2275 break;
2276 case 4: buf1[0] = (buf1[0] & ml)
2277 | (tib41[0] & mr);
2278 buf1[1] = tib41[1];
2279 buf1[2] = tib41[2];
2280 buf1[3] = tib41[3];
2281 break;
2282 case 5: buf1[1] = (buf1[1] & ml)
2283 | (tib41[1] & mr);
2284 buf1[2] = tib41[2];
2285 buf1[3] = tib41[3];
2286 break;
2287 case 6: buf1[2] = (buf1[2] & ml)
2288 | (tib41[2] & mr);
2289 buf1[3] = tib41[3];
2290 break;
2291 case 7: buf1[3] = (buf1[3] & ml)
2292 | (tib41[3] & mr);
2293 break;
2294 }
2295
2296 out_len--;
2297
2298 return out_len;
2299 }
2300
2301 static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2302 {
2303 if (p0 >= in_len) return (in_len);
2304
2305 if ((p0 + p1) > in_len) return (in_len);
2306
2307 u32 out_len = p1;
2308
2309 lshift_block_N (buf0, buf1, buf0, buf1, p0);
2310
2311 truncate_right (buf0, buf1, out_len);
2312
2313 return out_len;
2314 }
2315
2316 static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2317 {
2318 if (p0 >= in_len) return (in_len);
2319
2320 if ((p0 + p1) > in_len) return (in_len);
2321
2322 u32 out_len = in_len;
2323
2324 u32 tib40[4];
2325 u32 tib41[4];
2326
2327 tib40[0] = 0;
2328 tib40[1] = 0;
2329 tib40[2] = 0;
2330 tib40[3] = 0;
2331 tib41[0] = 0;
2332 tib41[1] = 0;
2333 tib41[2] = 0;
2334 tib41[3] = 0;
2335
2336 lshift_block_N (buf0, buf1, tib40, tib41, p1);
2337
2338 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2339 const u32 mr = ~ml;
2340
2341 switch (p0 / 4)
2342 {
2343 case 0: buf0[0] = (buf0[0] & ml)
2344 | (tib40[0] & mr);
2345 buf0[1] = tib40[1];
2346 buf0[2] = tib40[2];
2347 buf0[3] = tib40[3];
2348 buf1[0] = tib41[0];
2349 buf1[1] = tib41[1];
2350 buf1[2] = tib41[2];
2351 buf1[3] = tib41[3];
2352 break;
2353 case 1: buf0[1] = (buf0[1] & ml)
2354 | (tib40[1] & mr);
2355 buf0[2] = tib40[2];
2356 buf0[3] = tib40[3];
2357 buf1[0] = tib41[0];
2358 buf1[1] = tib41[1];
2359 buf1[2] = tib41[2];
2360 buf1[3] = tib41[3];
2361 break;
2362 case 2: buf0[2] = (buf0[2] & ml)
2363 | (tib40[2] & mr);
2364 buf0[3] = tib40[3];
2365 buf1[0] = tib41[0];
2366 buf1[1] = tib41[1];
2367 buf1[2] = tib41[2];
2368 buf1[3] = tib41[3];
2369 break;
2370 case 3: buf0[3] = (buf0[3] & ml)
2371 | (tib40[3] & mr);
2372 buf1[0] = tib41[0];
2373 buf1[1] = tib41[1];
2374 buf1[2] = tib41[2];
2375 buf1[3] = tib41[3];
2376 break;
2377 case 4: buf1[0] = (buf1[0] & ml)
2378 | (tib41[0] & mr);
2379 buf1[1] = tib41[1];
2380 buf1[2] = tib41[2];
2381 buf1[3] = tib41[3];
2382 break;
2383 case 5: buf1[1] = (buf1[1] & ml)
2384 | (tib41[1] & mr);
2385 buf1[2] = tib41[2];
2386 buf1[3] = tib41[3];
2387 break;
2388 case 6: buf1[2] = (buf1[2] & ml)
2389 | (tib41[2] & mr);
2390 buf1[3] = tib41[3];
2391 break;
2392 case 7: buf1[3] = (buf1[3] & ml)
2393 | (tib41[3] & mr);
2394 break;
2395 }
2396
2397 out_len -= p1;
2398
2399 return out_len;
2400 }
2401
2402 static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2403 {
2404 if (p0 > in_len) return (in_len);
2405
2406 if ((in_len + 1) >= 32) return (in_len);
2407
2408 u32 out_len = in_len;
2409
2410 u32 tib40[4];
2411 u32 tib41[4];
2412
2413 rshift_block (buf0, buf1, tib40, tib41);
2414
2415 const u32 p1n = p1 << ((p0 & 3) * 8);
2416
2417 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2418
2419 const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
2420
2421 switch (p0 / 4)
2422 {
2423 case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr);
2424 buf0[1] = tib40[1];
2425 buf0[2] = tib40[2];
2426 buf0[3] = tib40[3];
2427 buf1[0] = tib41[0];
2428 buf1[1] = tib41[1];
2429 buf1[2] = tib41[2];
2430 buf1[3] = tib41[3];
2431 break;
2432 case 1: buf0[1] = (buf0[1] & ml) | p1n | (tib40[1] & mr);
2433 buf0[2] = tib40[2];
2434 buf0[3] = tib40[3];
2435 buf1[0] = tib41[0];
2436 buf1[1] = tib41[1];
2437 buf1[2] = tib41[2];
2438 buf1[3] = tib41[3];
2439 break;
2440 case 2: buf0[2] = (buf0[2] & ml) | p1n | (tib40[2] & mr);
2441 buf0[3] = tib40[3];
2442 buf1[0] = tib41[0];
2443 buf1[1] = tib41[1];
2444 buf1[2] = tib41[2];
2445 buf1[3] = tib41[3];
2446 break;
2447 case 3: buf0[3] = (buf0[3] & ml) | p1n | (tib40[3] & mr);
2448 buf1[0] = tib41[0];
2449 buf1[1] = tib41[1];
2450 buf1[2] = tib41[2];
2451 buf1[3] = tib41[3];
2452 break;
2453 case 4: buf1[0] = (buf1[0] & ml) | p1n | (tib41[0] & mr);
2454 buf1[1] = tib41[1];
2455 buf1[2] = tib41[2];
2456 buf1[3] = tib41[3];
2457 break;
2458 case 5: buf1[1] = (buf1[1] & ml) | p1n | (tib41[1] & mr);
2459 buf1[2] = tib41[2];
2460 buf1[3] = tib41[3];
2461 break;
2462 case 6: buf1[2] = (buf1[2] & ml) | p1n | (tib41[2] & mr);
2463 buf1[3] = tib41[3];
2464 break;
2465 case 7: buf1[3] = (buf1[3] & ml) | p1n | (tib41[3] & mr);
2466 break;
2467 }
2468
2469 out_len++;
2470
2471 return out_len;
2472 }
2473
2474 static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2475 {
2476 if (p0 >= in_len) return (in_len);
2477
2478 const u32 p1n = p1 << ((p0 & 3) * 8);
2479
2480 const u32 m = ~(0xffu << ((p0 & 3) * 8));
2481
2482 switch (p0 / 4)
2483 {
2484 case 0: buf0[0] = (buf0[0] & m) | p1n; break;
2485 case 1: buf0[1] = (buf0[1] & m) | p1n; break;
2486 case 2: buf0[2] = (buf0[2] & m) | p1n; break;
2487 case 3: buf0[3] = (buf0[3] & m) | p1n; break;
2488 case 4: buf1[0] = (buf1[0] & m) | p1n; break;
2489 case 5: buf1[1] = (buf1[1] & m) | p1n; break;
2490 case 6: buf1[2] = (buf1[2] & m) | p1n; break;
2491 case 7: buf1[3] = (buf1[3] & m) | p1n; break;
2492 }
2493
2494 return in_len;
2495 }
2496
2497 static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2498 {
2499 if (p0 >= in_len) return (in_len);
2500
2501 truncate_right (buf0, buf1, p0);
2502
2503 return p0;
2504 }
2505
2506 static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2507 {
2508 #ifdef IS_NV
2509 for (u32 i = 0; i < in_len; i++)
2510 {
2511 switch (i)
2512 {
2513 case 0: if ((__byte_perm (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7650);
2514 break;
2515 case 1: if ((__byte_perm (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7604);
2516 break;
2517 case 2: if ((__byte_perm (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7054);
2518 break;
2519 case 3: if ((__byte_perm (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x0654);
2520 break;
2521 case 4: if ((__byte_perm (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7650);
2522 break;
2523 case 5: if ((__byte_perm (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7604);
2524 break;
2525 case 6: if ((__byte_perm (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7054);
2526 break;
2527 case 7: if ((__byte_perm (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x0654);
2528 break;
2529 case 8: if ((__byte_perm (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7650);
2530 break;
2531 case 9: if ((__byte_perm (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7604);
2532 break;
2533 case 10: if ((__byte_perm (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7054);
2534 break;
2535 case 11: if ((__byte_perm (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x0654);
2536 break;
2537 case 12: if ((__byte_perm (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7650);
2538 break;
2539 case 13: if ((__byte_perm (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7604);
2540 break;
2541 case 14: if ((__byte_perm (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7054);
2542 break;
2543 case 15: if ((__byte_perm (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x0654);
2544 break;
2545 case 16: if ((__byte_perm (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7650);
2546 break;
2547 case 17: if ((__byte_perm (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7604);
2548 break;
2549 case 18: if ((__byte_perm (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7054);
2550 break;
2551 case 19: if ((__byte_perm (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x0654);
2552 break;
2553 case 20: if ((__byte_perm (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7650);
2554 break;
2555 case 21: if ((__byte_perm (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7604);
2556 break;
2557 case 22: if ((__byte_perm (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7054);
2558 break;
2559 case 23: if ((__byte_perm (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x0654);
2560 break;
2561 case 24: if ((__byte_perm (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7650);
2562 break;
2563 case 25: if ((__byte_perm (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7604);
2564 break;
2565 case 26: if ((__byte_perm (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7054);
2566 break;
2567 case 27: if ((__byte_perm (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x0654);
2568 break;
2569 case 28: if ((__byte_perm (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7650);
2570 break;
2571 case 29: if ((__byte_perm (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7604);
2572 break;
2573 case 30: if ((__byte_perm (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7054);
2574 break;
2575 case 31: if ((__byte_perm (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x0654);
2576 break;
2577 }
2578 }
2579 #endif
2580
2581 #if defined IS_AMD || defined IS_GENERIC
2582 const uchar4 tmp0 = (uchar4) (p0);
2583 const uchar4 tmp1 = (uchar4) (p1);
2584
2585 uchar4 tmp;
2586
2587 tmp = as_uchar4 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint (tmp);
2588 tmp = as_uchar4 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint (tmp);
2589 tmp = as_uchar4 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint (tmp);
2590 tmp = as_uchar4 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint (tmp);
2591 tmp = as_uchar4 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint (tmp);
2592 tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp);
2593 tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp);
2594 tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp);
2595 #endif
2596
2597 return in_len;
2598 }
2599
2600 static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2601 {
2602 // TODO
2603 return in_len;
2604 }
2605
2606 static u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2607 {
2608 // TODO
2609 return in_len;
2610 }
2611
2612 static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2613 {
2614 if ( in_len == 0) return (in_len);
2615 if ((in_len + p0) >= 32) return (in_len);
2616
2617 u32 out_len = in_len;
2618
2619 const u32 tmp = buf0[0] & 0xFF;
2620
2621 rshift_block_N (buf0, buf1, buf0, buf1, p0);
2622
2623 #ifdef IS_NV
2624 switch (p0)
2625 {
2626 case 1: buf0[0] |= tmp;
2627 break;
2628 case 2: buf0[0] |= __byte_perm (tmp, 0, 0x5400);
2629 break;
2630 case 3: buf0[0] |= __byte_perm (tmp, 0, 0x4000);
2631 break;
2632 case 4: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2633 break;
2634 case 5: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2635 buf0[1] |= tmp;
2636 break;
2637 case 6: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2638 buf0[1] |= __byte_perm (tmp, 0, 0x5400);
2639 break;
2640 case 7: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2641 buf0[1] |= __byte_perm (tmp, 0, 0x4000);
2642 break;
2643 case 8: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2644 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2645 break;
2646 case 9: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2647 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2648 buf0[2] |= tmp;
2649 break;
2650 case 10: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2651 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2652 buf0[2] |= __byte_perm (tmp, 0, 0x5400);
2653 break;
2654 case 11: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2655 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2656 buf0[2] |= __byte_perm (tmp, 0, 0x4000);
2657 break;
2658 case 12: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2659 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2660 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2661 break;
2662 case 13: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2663 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2664 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2665 buf0[3] |= tmp;
2666 break;
2667 case 14: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2668 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2669 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2670 buf0[3] |= __byte_perm (tmp, 0, 0x5400);
2671 break;
2672 case 15: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2673 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2674 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2675 buf0[3] |= __byte_perm (tmp, 0, 0x4000);
2676 break;
2677 case 16: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2678 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2679 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2680 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2681 break;
2682 case 17: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2683 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2684 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2685 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2686 buf1[0] |= tmp;
2687 break;
2688 case 18: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2689 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2690 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2691 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2692 buf1[0] |= __byte_perm (tmp, 0, 0x5400);
2693 break;
2694 case 19: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2695 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2696 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2697 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2698 buf1[0] |= __byte_perm (tmp, 0, 0x4000);
2699 break;
2700 case 20: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2701 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2702 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2703 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2704 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2705 break;
2706 case 21: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2707 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2708 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2709 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2710 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2711 buf1[1] |= tmp;
2712 break;
2713 case 22: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2714 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2715 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2716 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2717 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2718 buf1[1] |= __byte_perm (tmp, 0, 0x5400);
2719 break;
2720 case 23: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2721 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2722 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2723 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2724 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2725 buf1[1] |= __byte_perm (tmp, 0, 0x4000);
2726 break;
2727 case 24: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2728 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2729 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2730 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2731 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2732 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2733 break;
2734 case 25: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2735 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2736 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2737 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2738 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2739 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2740 buf1[2] |= tmp;
2741 break;
2742 case 26: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2743 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2744 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2745 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2746 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2747 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2748 buf1[2] |= __byte_perm (tmp, 0, 0x5400);
2749 break;
2750 case 27: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2751 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2752 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2753 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2754 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2755 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2756 buf1[2] |= __byte_perm (tmp, 0, 0x4000);
2757 break;
2758 case 28: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2759 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2760 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2761 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2762 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2763 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2764 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2765 break;
2766 case 29: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2767 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2768 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2769 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2770 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2771 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2772 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2773 buf1[3] |= tmp;
2774 break;
2775 case 30: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2776 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2777 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2778 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2779 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2780 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2781 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2782 buf1[3] |= __byte_perm (tmp, 0, 0x5400);
2783 break;
2784 case 31: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2785 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2786 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2787 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2788 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2789 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2790 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2791 buf1[3] |= __byte_perm (tmp, 0, 0x4000);
2792 break;
2793 }
2794 #endif
2795
2796 #if defined IS_AMD || defined IS_GENERIC
2797 switch (p0)
2798 {
2799 case 1: buf0[0] |= tmp << 0;
2800 break;
2801 case 2: buf0[0] |= tmp << 0 | tmp << 8;
2802 break;
2803 case 3: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2804 break;
2805 case 4: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2806 break;
2807 case 5: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2808 buf0[1] |= tmp << 0;
2809 break;
2810 case 6: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2811 buf0[1] |= tmp << 0 | tmp << 8;
2812 break;
2813 case 7: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2814 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2815 break;
2816 case 8: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2817 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2818 break;
2819 case 9: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2820 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2821 buf0[2] |= tmp << 0;
2822 break;
2823 case 10: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2824 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2825 buf0[2] |= tmp << 0 | tmp << 8;
2826 break;
2827 case 11: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2828 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2829 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2830 break;
2831 case 12: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2832 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2833 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2834 break;
2835 case 13: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2836 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2837 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2838 buf0[3] |= tmp << 0;
2839 break;
2840 case 14: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2841 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2842 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2843 buf0[3] |= tmp << 0 | tmp << 8;
2844 break;
2845 case 15: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2846 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2847 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2848 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2849 break;
2850 case 16: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2851 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2852 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2853 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2854 break;
2855 case 17: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2856 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2857 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2858 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2859 buf1[0] |= tmp << 0;
2860 break;
2861 case 18: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2862 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2863 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2864 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2865 buf1[0] |= tmp << 0 | tmp << 8;
2866 break;
2867 case 19: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2868 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2869 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2870 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2871 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2872 break;
2873 case 20: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2874 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2875 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2876 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2877 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2878 break;
2879 case 21: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2880 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2881 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2882 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2883 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2884 buf1[1] |= tmp << 0;
2885 break;
2886 case 22: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2887 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2888 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2889 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2890 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2891 buf1[1] |= tmp << 0 | tmp << 8;
2892 break;
2893 case 23: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2894 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2895 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2896 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2897 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2898 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2899 break;
2900 case 24: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2901 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2902 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2903 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2904 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2905 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2906 break;
2907 case 25: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2908 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2909 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2910 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2911 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2912 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2913 buf1[2] |= tmp << 0;
2914 break;
2915 case 26: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2916 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2917 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2918 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2919 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2920 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2921 buf1[2] |= tmp << 0 | tmp << 8;
2922 break;
2923 case 27: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2924 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2925 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2926 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2927 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2928 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2929 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2930 break;
2931 case 28: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2932 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2933 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2934 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2935 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2936 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2937 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2938 break;
2939 case 29: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2940 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2941 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2942 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2943 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2944 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2945 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2946 buf1[3] |= tmp << 0;
2947 break;
2948 case 30: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2949 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2950 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2951 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2952 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2953 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2954 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2955 buf1[3] |= tmp << 0 | tmp << 8;
2956 break;
2957 case 31: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2958 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2959 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2960 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2961 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2962 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2963 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2964 buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2965 break;
2966 }
2967 #endif
2968
2969 out_len += p0;
2970
2971 return out_len;
2972 }
2973
2974 static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2975 {
2976 if ( in_len == 0) return (in_len);
2977 if ((in_len + p0) >= 32) return (in_len);
2978
2979 const u32 in_len1 = in_len - 1;
2980
2981 const u32 sh = (in_len1 & 3) * 8;
2982
2983 u32 tmp = 0;
2984
2985 switch (in_len1 / 4)
2986 {
2987 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2988 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2989 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2990 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2991 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2992 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2993 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2994 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2995 }
2996
2997 u32 out_len = in_len;
2998
2999 for (u32 i = 0; i < p0; i++)
3000 {
3001 append_block1 (out_len, buf0, buf1, tmp);
3002
3003 out_len++;
3004 }
3005
3006 return out_len;
3007 }
3008
3009 static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3010 {
3011 if ( in_len == 0) return (in_len);
3012 if ((in_len + in_len) >= 32) return (in_len);
3013
3014 u32 out_len = in_len;
3015
3016 u32 tib40[4];
3017 u32 tib41[4];
3018
3019 #ifdef IS_NV
3020 tib40[0] = __byte_perm (buf0[0], 0, 0x1100);
3021 tib40[1] = __byte_perm (buf0[0], 0, 0x3322);
3022 tib40[2] = __byte_perm (buf0[1], 0, 0x1100);
3023 tib40[3] = __byte_perm (buf0[1], 0, 0x3322);
3024 tib41[0] = __byte_perm (buf0[2], 0, 0x1100);
3025 tib41[1] = __byte_perm (buf0[2], 0, 0x3322);
3026 tib41[2] = __byte_perm (buf0[3], 0, 0x1100);
3027 tib41[3] = __byte_perm (buf0[3], 0, 0x3322);
3028
3029 buf0[0] = tib40[0];
3030 buf0[1] = tib40[1];
3031 buf0[2] = tib40[2];
3032 buf0[3] = tib40[3];
3033 buf1[0] = tib41[0];
3034 buf1[1] = tib41[1];
3035 buf1[2] = tib41[2];
3036 buf1[3] = tib41[3];
3037 #endif
3038
3039 #if defined IS_AMD || defined IS_GENERIC
3040 tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8);
3041 tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8);
3042 tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8);
3043 tib40[3] = ((buf0[1] & 0x00FF0000) >> 16) | ((buf0[1] & 0xFF000000) >> 8);
3044 tib41[0] = ((buf0[2] & 0x000000FF) << 0) | ((buf0[2] & 0x0000FF00) << 8);
3045 tib41[1] = ((buf0[2] & 0x00FF0000) >> 16) | ((buf0[2] & 0xFF000000) >> 8);
3046 tib41[2] = ((buf0[3] & 0x000000FF) << 0) | ((buf0[3] & 0x0000FF00) << 8);
3047 tib41[3] = ((buf0[3] & 0x00FF0000) >> 16) | ((buf0[3] & 0xFF000000) >> 8);
3048
3049 buf0[0] = tib40[0] | (tib40[0] << 8);
3050 buf0[1] = tib40[1] | (tib40[1] << 8);
3051 buf0[2] = tib40[2] | (tib40[2] << 8);
3052 buf0[3] = tib40[3] | (tib40[3] << 8);
3053 buf1[0] = tib41[0] | (tib41[0] << 8);
3054 buf1[1] = tib41[1] | (tib41[1] << 8);
3055 buf1[2] = tib41[2] | (tib41[2] << 8);
3056 buf1[3] = tib41[3] | (tib41[3] << 8);
3057 #endif
3058
3059 out_len = out_len + out_len;
3060
3061 return out_len;
3062 }
3063
3064 static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3065 {
3066 if (in_len < 2) return (in_len);
3067
3068 #ifdef IS_NV
3069 buf0[0] = __byte_perm (buf0[0], 0, 0x3201);
3070 #endif
3071
3072 #if defined IS_AMD || defined IS_GENERIC
3073 buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3074 #endif
3075
3076 return in_len;
3077 }
3078
3079 static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3080 {
3081 if (in_len < 2) return (in_len);
3082
3083 #ifdef IS_NV
3084 switch (in_len)
3085 {
3086 case 2: buf0[0] = __byte_perm (buf0[0], 0, 0x5401);
3087 break;
3088 case 3: buf0[0] = __byte_perm (buf0[0], 0, 0x4120);
3089 break;
3090 case 4: buf0[0] = __byte_perm (buf0[0], 0, 0x2310);
3091 break;
3092 case 5: buf0[1] = __byte_perm (buf0[1], buf0[0], 0x7210);
3093 buf0[0] = __byte_perm (buf0[0], buf0[1], 0x4210);
3094 buf0[1] = __byte_perm (buf0[1], 0, 0x6543);
3095 break;
3096 case 6: buf0[1] = __byte_perm (buf0[1], 0, 0x5401);
3097 break;
3098 case 7: buf0[1] = __byte_perm (buf0[1], 0, 0x4120);
3099 break;
3100 case 8: buf0[1] = __byte_perm (buf0[1], 0, 0x2310);
3101 break;
3102 case 9: buf0[2] = __byte_perm (buf0[2], buf0[1], 0x7210);
3103 buf0[1] = __byte_perm (buf0[1], buf0[2], 0x4210);
3104 buf0[2] = __byte_perm (buf0[2], 0, 0x6543);
3105 break;
3106 case 10: buf0[2] = __byte_perm (buf0[2], 0, 0x5401);
3107 break;
3108 case 11: buf0[2] = __byte_perm (buf0[2], 0, 0x4120);
3109 break;
3110 case 12: buf0[2] = __byte_perm (buf0[2], 0, 0x2310);
3111 break;
3112 case 13: buf0[3] = __byte_perm (buf0[3], buf0[2], 0x7210);
3113 buf0[2] = __byte_perm (buf0[2], buf0[3], 0x4210);
3114 buf0[3] = __byte_perm (buf0[3], 0, 0x6543);
3115 break;
3116 case 14: buf0[3] = __byte_perm (buf0[3], 0, 0x5401);
3117 break;
3118 case 15: buf0[3] = __byte_perm (buf0[3], 0, 0x4120);
3119 break;
3120 case 16: buf0[3] = __byte_perm (buf0[3], 0, 0x2310);
3121 break;
3122 case 17: buf1[0] = __byte_perm (buf1[0], buf0[3], 0x7210);
3123 buf0[3] = __byte_perm (buf0[3], buf1[0], 0x4210);
3124 buf1[0] = __byte_perm (buf1[0], 0, 0x6543);
3125 break;
3126 case 18: buf1[0] = __byte_perm (buf1[0], 0, 0x5401);
3127 break;
3128 case 19: buf1[0] = __byte_perm (buf1[0], 0, 0x4120);
3129 break;
3130 case 20: buf1[0] = __byte_perm (buf1[0], 0, 0x2310);
3131 break;
3132 case 21: buf1[1] = __byte_perm (buf1[1], buf1[0], 0x7210);
3133 buf1[0] = __byte_perm (buf1[0], buf1[1], 0x4210);
3134 buf1[1] = __byte_perm (buf1[1], 0, 0x6543);
3135 break;
3136 case 22: buf1[1] = __byte_perm (buf1[1], 0, 0x5401);
3137 break;
3138 case 23: buf1[1] = __byte_perm (buf1[1], 0, 0x4120);
3139 break;
3140 case 24: buf1[1] = __byte_perm (buf1[1], 0, 0x2310);
3141 break;
3142 case 25: buf1[2] = __byte_perm (buf1[2], buf1[1], 0x7210);
3143 buf1[1] = __byte_perm (buf1[1], buf1[2], 0x4210);
3144 buf1[2] = __byte_perm (buf1[2], 0, 0x6543);
3145 break;
3146 case 26: buf1[2] = __byte_perm (buf1[2], 0, 0x5401);
3147 break;
3148 case 27: buf1[2] = __byte_perm (buf1[2], 0, 0x4120);
3149 break;
3150 case 28: buf1[2] = __byte_perm (buf1[2], 0, 0x2310);
3151 break;
3152 case 29: buf1[3] = __byte_perm (buf1[3], buf1[2], 0x7210);
3153 buf1[2] = __byte_perm (buf1[2], buf1[3], 0x4210);
3154 buf1[3] = __byte_perm (buf1[3], 0, 0x6543);
3155 break;
3156 case 30: buf1[3] = __byte_perm (buf1[3], 0, 0x5401);
3157 break;
3158 case 31: buf1[3] = __byte_perm (buf1[3], 0, 0x4120);
3159 break;
3160 }
3161 #endif
3162
3163 #if defined IS_AMD || defined IS_GENERIC
3164 switch (in_len)
3165 {
3166 case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3167 break;
3168 case 3: buf0[0] = (buf0[0] & 0x000000FF) | ((buf0[0] << 8) & 0x00FF0000) | ((buf0[0] >> 8) & 0x0000FF00);
3169 break;
3170 case 4: buf0[0] = (buf0[0] & 0x0000FFFF) | ((buf0[0] << 8) & 0xFF000000) | ((buf0[0] >> 8) & 0x00FF0000);
3171 break;
3172 case 5: buf0[1] = (buf0[0] & 0xFF000000) | buf0[1];
3173 buf0[0] = (buf0[0] & 0x00FFFFFF) | (buf0[1] << 24);
3174 buf0[1] = (buf0[1] >> 24);
3175 break;
3176 case 6: buf0[1] = ((buf0[1] << 8) & 0x0000FF00) | ((buf0[1] >> 8) & 0x000000FF);
3177 break;
3178 case 7: buf0[1] = (buf0[1] & 0x000000FF) | ((buf0[1] << 8) & 0x00FF0000) | ((buf0[1] >> 8) & 0x0000FF00);
3179 break;
3180 case 8: buf0[1] = (buf0[1] & 0x0000FFFF) | ((buf0[1] << 8) & 0xFF000000) | ((buf0[1] >> 8) & 0x00FF0000);
3181 break;
3182 case 9: buf0[2] = (buf0[1] & 0xFF000000) | buf0[2];
3183 buf0[1] = (buf0[1] & 0x00FFFFFF) | (buf0[2] << 24);
3184 buf0[2] = (buf0[2] >> 24);
3185 break;
3186 case 10: buf0[2] = ((buf0[2] << 8) & 0x0000FF00) | ((buf0[2] >> 8) & 0x000000FF);
3187 break;
3188 case 11: buf0[2] = (buf0[2] & 0x000000FF) | ((buf0[2] << 8) & 0x00FF0000) | ((buf0[2] >> 8) & 0x0000FF00);
3189 break;
3190 case 12: buf0[2] = (buf0[2] & 0x0000FFFF) | ((buf0[2] << 8) & 0xFF000000) | ((buf0[2] >> 8) & 0x00FF0000);
3191 break;
3192 case 13: buf0[3] = (buf0[2] & 0xFF000000) | buf0[3];
3193 buf0[2] = (buf0[2] & 0x00FFFFFF) | (buf0[3] << 24);
3194 buf0[3] = (buf0[3] >> 24);
3195 break;
3196 case 14: buf0[3] = ((buf0[3] << 8) & 0x0000FF00) | ((buf0[3] >> 8) & 0x000000FF);
3197 break;
3198 case 15: buf0[3] = (buf0[3] & 0x000000FF) | ((buf0[3] << 8) & 0x00FF0000) | ((buf0[3] >> 8) & 0x0000FF00);
3199 break;
3200 case 16: buf0[3] = (buf0[3] & 0x0000FFFF) | ((buf0[3] << 8) & 0xFF000000) | ((buf0[3] >> 8) & 0x00FF0000);
3201 break;
3202 case 17: buf1[0] = (buf0[3] & 0xFF000000) | buf1[0];
3203 buf0[3] = (buf0[3] & 0x00FFFFFF) | (buf1[0] << 24);
3204 buf1[0] = (buf1[0] >> 24);
3205 break;
3206 case 18: buf1[0] = ((buf1[0] << 8) & 0x0000FF00) | ((buf1[0] >> 8) & 0x000000FF);
3207 break;
3208 case 19: buf1[0] = (buf1[0] & 0x000000FF) | ((buf1[0] << 8) & 0x00FF0000) | ((buf1[0] >> 8) & 0x0000FF00);
3209 break;
3210 case 20: buf1[0] = (buf1[0] & 0x0000FFFF) | ((buf1[0] << 8) & 0xFF000000) | ((buf1[0] >> 8) & 0x00FF0000);
3211 break;
3212 case 21: buf1[1] = (buf1[0] & 0xFF000000) | buf1[1];
3213 buf1[0] = (buf1[0] & 0x00FFFFFF) | (buf1[1] << 24);
3214 buf1[1] = (buf1[1] >> 24);
3215 break;
3216 case 22: buf1[1] = ((buf1[1] << 8) & 0x0000FF00) | ((buf1[1] >> 8) & 0x000000FF);
3217 break;
3218 case 23: buf1[1] = (buf1[1] & 0x000000FF) | ((buf1[1] << 8) & 0x00FF0000) | ((buf1[1] >> 8) & 0x0000FF00);
3219 break;
3220 case 24: buf1[1] = (buf1[1] & 0x0000FFFF) | ((buf1[1] << 8) & 0xFF000000) | ((buf1[1] >> 8) & 0x00FF0000);
3221 break;
3222 case 25: buf1[2] = (buf1[1] & 0xFF000000) | buf1[2];
3223 buf1[1] = (buf1[1] & 0x00FFFFFF) | (buf1[2] << 24);
3224 buf1[2] = (buf1[2] >> 24);
3225 break;
3226 case 26: buf1[2] = ((buf1[2] << 8) & 0x0000FF00) | ((buf1[2] >> 8) & 0x000000FF);
3227 break;
3228 case 27: buf1[2] = (buf1[2] & 0x000000FF) | ((buf1[2] << 8) & 0x00FF0000) | ((buf1[2] >> 8) & 0x0000FF00);
3229 break;
3230 case 28: buf1[2] = (buf1[2] & 0x0000FFFF) | ((buf1[2] << 8) & 0xFF000000) | ((buf1[2] >> 8) & 0x00FF0000);
3231 break;
3232 case 29: buf1[3] = (buf1[2] & 0xFF000000) | buf1[3];
3233 buf1[2] = (buf1[2] & 0x00FFFFFF) | (buf1[3] << 24);
3234 buf1[3] = (buf1[3] >> 24);
3235 break;
3236 case 30: buf1[3] = ((buf1[3] << 8) & 0x0000FF00) | ((buf1[3] >> 8) & 0x000000FF);
3237 break;
3238 case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00);
3239 break;
3240 }
3241 #endif
3242
3243 return in_len;
3244 }
3245
3246 static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3247 {
3248 if (p0 >= in_len) return (in_len);
3249 if (p1 >= in_len) return (in_len);
3250
3251 u32 tmp0 = 0;
3252 u32 tmp1 = 0;
3253
3254 #ifdef IS_NV
3255 switch (p0)
3256 {
3257 case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540);
3258 break;
3259 case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541);
3260 break;
3261 case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542);
3262 break;
3263 case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543);
3264 break;
3265 case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540);
3266 break;
3267 case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541);
3268 break;
3269 case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542);
3270 break;
3271 case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543);
3272 break;
3273 case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540);
3274 break;
3275 case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541);
3276 break;
3277 case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542);
3278 break;
3279 case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543);
3280 break;
3281 case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540);
3282 break;
3283 case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541);
3284 break;
3285 case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542);
3286 break;
3287 case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543);
3288 break;
3289 case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540);
3290 break;
3291 case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541);
3292 break;
3293 case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542);
3294 break;
3295 case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543);
3296 break;
3297 case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540);
3298 break;
3299 case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541);
3300 break;
3301 case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542);
3302 break;
3303 case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543);
3304 break;
3305 case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540);
3306 break;
3307 case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541);
3308 break;
3309 case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542);
3310 break;
3311 case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543);
3312 break;
3313 case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540);
3314 break;
3315 case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541);
3316 break;
3317 case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542);
3318 break;
3319 case 31: tmp0 = __byte_perm (buf1[3], 0, 0x6543);
3320 break;
3321 }
3322
3323 switch (p1)
3324 {
3325 case 0: tmp1 = __byte_perm (buf0[0], 0, 0x6540);
3326 buf0[0] = __byte_perm (tmp0, buf0[0], 0x7650);
3327 break;
3328 case 1: tmp1 = __byte_perm (buf0[0], 0, 0x6541);
3329 buf0[0] = __byte_perm (tmp0, buf0[0], 0x7604);
3330 break;
3331 case 2: tmp1 = __byte_perm (buf0[0], 0, 0x6542);
3332 buf0[0] = __byte_perm (tmp0, buf0[0], 0x7054);
3333 break;
3334 case 3: tmp1 = __byte_perm (buf0[0], 0, 0x6543);
3335 buf0[0] = __byte_perm (tmp0, buf0[0], 0x0654);
3336 break;
3337 case 4: tmp1 = __byte_perm (buf0[1], 0, 0x6540);
3338 buf0[1] = __byte_perm (tmp0, buf0[1], 0x7650);
3339 break;
3340 case 5: tmp1 = __byte_perm (buf0[1], 0, 0x6541);
3341 buf0[1] = __byte_perm (tmp0, buf0[1], 0x7604);
3342 break;
3343 case 6: tmp1 = __byte_perm (buf0[1], 0, 0x6542);
3344 buf0[1] = __byte_perm (tmp0, buf0[1], 0x7054);
3345 break;
3346 case 7: tmp1 = __byte_perm (buf0[1], 0, 0x6543);
3347 buf0[1] = __byte_perm (tmp0, buf0[1], 0x0654);
3348 break;
3349 case 8: tmp1 = __byte_perm (buf0[2], 0, 0x6540);
3350 buf0[2] = __byte_perm (tmp0, buf0[2], 0x7650);
3351 break;
3352 case 9: tmp1 = __byte_perm (buf0[2], 0, 0x6541);
3353 buf0[2] = __byte_perm (tmp0, buf0[2], 0x7604);
3354 break;
3355 case 10: tmp1 = __byte_perm (buf0[2], 0, 0x6542);
3356 buf0[2] = __byte_perm (tmp0, buf0[2], 0x7054);
3357 break;
3358 case 11: tmp1 = __byte_perm (buf0[2], 0, 0x6543);
3359 buf0[2] = __byte_perm (tmp0, buf0[2], 0x0654);
3360 break;
3361 case 12: tmp1 = __byte_perm (buf0[3], 0, 0x6540);
3362 buf0[3] = __byte_perm (tmp0, buf0[3], 0x7650);
3363 break;
3364 case 13: tmp1 = __byte_perm (buf0[3], 0, 0x6541);
3365 buf0[3] = __byte_perm (tmp0, buf0[3], 0x7604);
3366 break;
3367 case 14: tmp1 = __byte_perm (buf0[3], 0, 0x6542);
3368 buf0[3] = __byte_perm (tmp0, buf0[3], 0x7054);
3369 break;
3370 case 15: tmp1 = __byte_perm (buf0[3], 0, 0x6543);
3371 buf0[3] = __byte_perm (tmp0, buf0[3], 0x0654);
3372 break;
3373 case 16: tmp1 = __byte_perm (buf1[0], 0, 0x6540);
3374 buf1[0] = __byte_perm (tmp0, buf1[0], 0x7650);
3375 break;
3376 case 17: tmp1 = __byte_perm (buf1[0], 0, 0x6541);
3377 buf1[0] = __byte_perm (tmp0, buf1[0], 0x7604);
3378 break;
3379 case 18: tmp1 = __byte_perm (buf1[0], 0, 0x6542);
3380 buf1[0] = __byte_perm (tmp0, buf1[0], 0x7054);
3381 break;
3382 case 19: tmp1 = __byte_perm (buf1[0], 0, 0x6543);
3383 buf1[0] = __byte_perm (tmp0, buf1[0], 0x0654);
3384 break;
3385 case 20: tmp1 = __byte_perm (buf1[1], 0, 0x6540);
3386 buf1[1] = __byte_perm (tmp0, buf1[1], 0x7650);
3387 break;
3388 case 21: tmp1 = __byte_perm (buf1[1], 0, 0x6541);
3389 buf1[1] = __byte_perm (tmp0, buf1[1], 0x7604);
3390 break;
3391 case 22: tmp1 = __byte_perm (buf1[1], 0, 0x6542);
3392 buf1[1] = __byte_perm (tmp0, buf1[1], 0x7054);
3393 break;
3394 case 23: tmp1 = __byte_perm (buf1[1], 0, 0x6543);
3395 buf1[1] = __byte_perm (tmp0, buf1[1], 0x0654);
3396 break;
3397 case 24: tmp1 = __byte_perm (buf1[2], 0, 0x6540);
3398 buf1[2] = __byte_perm (tmp0, buf1[2], 0x7650);
3399 break;
3400 case 25: tmp1 = __byte_perm (buf1[2], 0, 0x6541);
3401 buf1[2] = __byte_perm (tmp0, buf1[2], 0x7604);
3402 break;
3403 case 26: tmp1 = __byte_perm (buf1[2], 0, 0x6542);
3404 buf1[2] = __byte_perm (tmp0, buf1[2], 0x7054);
3405 break;
3406 case 27: tmp1 = __byte_perm (buf1[2], 0, 0x6543);
3407 buf1[2] = __byte_perm (tmp0, buf1[2], 0x0654);
3408 break;
3409 case 28: tmp1 = __byte_perm (buf1[3], 0, 0x6540);
3410 buf1[3] = __byte_perm (tmp0, buf1[3], 0x7650);
3411 break;
3412 case 29: tmp1 = __byte_perm (buf1[3], 0, 0x6541);
3413 buf1[3] = __byte_perm (tmp0, buf1[3], 0x7604);
3414 break;
3415 case 30: tmp1 = __byte_perm (buf1[3], 0, 0x6542);
3416 buf1[3] = __byte_perm (tmp0, buf1[3], 0x7054);
3417 break;
3418 case 31: tmp1 = __byte_perm (buf1[3], 0, 0x6543);
3419 buf1[3] = __byte_perm (tmp0, buf1[3], 0x0654);
3420 break;
3421 }
3422
3423 switch (p0)
3424 {
3425 case 0: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7650);
3426 break;
3427 case 1: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7604);
3428 break;
3429 case 2: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7054);
3430 break;
3431 case 3: buf0[0] = __byte_perm (tmp1, buf0[0], 0x0654);
3432 break;
3433 case 4: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7650);
3434 break;
3435 case 5: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7604);
3436 break;
3437 case 6: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7054);
3438 break;
3439 case 7: buf0[1] = __byte_perm (tmp1, buf0[1], 0x0654);
3440 break;
3441 case 8: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7650);
3442 break;
3443 case 9: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7604);
3444 break;
3445 case 10: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7054);
3446 break;
3447 case 11: buf0[2] = __byte_perm (tmp1, buf0[2], 0x0654);
3448 break;
3449 case 12: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7650);
3450 break;
3451 case 13: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7604);
3452 break;
3453 case 14: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7054);
3454 break;
3455 case 15: buf0[3] = __byte_perm (tmp1, buf0[3], 0x0654);
3456 break;
3457 case 16: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7650);
3458 break;
3459 case 17: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7604);
3460 break;
3461 case 18: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7054);
3462 break;
3463 case 19: buf1[0] = __byte_perm (tmp1, buf1[0], 0x0654);
3464 break;
3465 case 20: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7650);
3466 break;
3467 case 21: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7604);
3468 break;
3469 case 22: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7054);
3470 break;
3471 case 23: buf1[1] = __byte_perm (tmp1, buf1[1], 0x0654);
3472 break;
3473 case 24: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7650);
3474 break;
3475 case 25: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7604);
3476 break;
3477 case 26: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7054);
3478 break;
3479 case 27: buf1[2] = __byte_perm (tmp1, buf1[2], 0x0654);
3480 break;
3481 case 28: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7650);
3482 break;
3483 case 29: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7604);
3484 break;
3485 case 30: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7054);
3486 break;
3487 case 31: buf1[3] = __byte_perm (tmp1, buf1[3], 0x0654);
3488 break;
3489 }
3490 #endif
3491
3492 #if defined IS_AMD || defined IS_GENERIC
3493 switch (p0)
3494 {
3495 case 0: tmp0 = (buf0[0] >> 0) & 0xFF;
3496 break;
3497 case 1: tmp0 = (buf0[0] >> 8) & 0xFF;
3498 break;
3499 case 2: tmp0 = (buf0[0] >> 16) & 0xFF;
3500 break;
3501 case 3: tmp0 = (buf0[0] >> 24) & 0xFF;
3502 break;
3503 case 4: tmp0 = (buf0[1] >> 0) & 0xFF;
3504 break;
3505 case 5: tmp0 = (buf0[1] >> 8) & 0xFF;
3506 break;
3507 case 6: tmp0 = (buf0[1] >> 16) & 0xFF;
3508 break;
3509 case 7: tmp0 = (buf0[1] >> 24) & 0xFF;
3510 break;
3511 case 8: tmp0 = (buf0[2] >> 0) & 0xFF;
3512 break;
3513 case 9: tmp0 = (buf0[2] >> 8) & 0xFF;
3514 break;
3515 case 10: tmp0 = (buf0[2] >> 16) & 0xFF;
3516 break;
3517 case 11: tmp0 = (buf0[2] >> 24) & 0xFF;
3518 break;
3519 case 12: tmp0 = (buf0[3] >> 0) & 0xFF;
3520 break;
3521 case 13: tmp0 = (buf0[3] >> 8) & 0xFF;
3522 break;
3523 case 14: tmp0 = (buf0[3] >> 16) & 0xFF;
3524 break;
3525 case 15: tmp0 = (buf0[3] >> 24) & 0xFF;
3526 break;
3527 case 16: tmp0 = (buf1[0] >> 0) & 0xFF;
3528 break;
3529 case 17: tmp0 = (buf1[0] >> 8) & 0xFF;
3530 break;
3531 case 18: tmp0 = (buf1[0] >> 16) & 0xFF;
3532 break;
3533 case 19: tmp0 = (buf1[0] >> 24) & 0xFF;
3534 break;
3535 case 20: tmp0 = (buf1[1] >> 0) & 0xFF;
3536 break;
3537 case 21: tmp0 = (buf1[1] >> 8) & 0xFF;
3538 break;
3539 case 22: tmp0 = (buf1[1] >> 16) & 0xFF;
3540 break;
3541 case 23: tmp0 = (buf1[1] >> 24) & 0xFF;
3542 break;
3543 case 24: tmp0 = (buf1[2] >> 0) & 0xFF;
3544 break;
3545 case 25: tmp0 = (buf1[2] >> 8) & 0xFF;
3546 break;
3547 case 26: tmp0 = (buf1[2] >> 16) & 0xFF;
3548 break;
3549 case 27: tmp0 = (buf1[2] >> 24) & 0xFF;
3550 break;
3551 case 28: tmp0 = (buf1[3] >> 0) & 0xFF;
3552 break;
3553 case 29: tmp0 = (buf1[3] >> 8) & 0xFF;
3554 break;
3555 case 30: tmp0 = (buf1[3] >> 16) & 0xFF;
3556 break;
3557 case 31: tmp0 = (buf1[3] >> 24) & 0xFF;
3558 break;
3559 }
3560
3561 switch (p1)
3562 {
3563 case 0: tmp1 = (buf0[0] >> 0) & 0xff;
3564 buf0[0] = (buf0[0] & 0xffffff00) | tmp0 << 0;
3565 break;
3566 case 1: tmp1 = (buf0[0] >> 8) & 0xff;
3567 buf0[0] = (buf0[0] & 0xffff00ff) | tmp0 << 8;
3568 break;
3569 case 2: tmp1 = (buf0[0] >> 16) & 0xff;
3570 buf0[0] = (buf0[0] & 0xff00ffff) | tmp0 << 16;
3571 break;
3572 case 3: tmp1 = (buf0[0] >> 24) & 0xff;
3573 buf0[0] = (buf0[0] & 0x00ffffff) | tmp0 << 24;
3574 break;
3575 case 4: tmp1 = (buf0[1] >> 0) & 0xff;
3576 buf0[1] = (buf0[1] & 0xffffff00) | tmp0 << 0;
3577 break;
3578 case 5: tmp1 = (buf0[1] >> 8) & 0xff;
3579 buf0[1] = (buf0[1] & 0xffff00ff) | tmp0 << 8;
3580 break;
3581 case 6: tmp1 = (buf0[1] >> 16) & 0xff;
3582 buf0[1] = (buf0[1] & 0xff00ffff) | tmp0 << 16;
3583 break;
3584 case 7: tmp1 = (buf0[1] >> 24) & 0xff;
3585 buf0[1] = (buf0[1] & 0x00ffffff) | tmp0 << 24;
3586 break;
3587 case 8: tmp1 = (buf0[2] >> 0) & 0xff;
3588 buf0[2] = (buf0[2] & 0xffffff00) | tmp0 << 0;
3589 break;
3590 case 9: tmp1 = (buf0[2] >> 8) & 0xff;
3591 buf0[2] = (buf0[2] & 0xffff00ff) | tmp0 << 8;
3592 break;
3593 case 10: tmp1 = (buf0[2] >> 16) & 0xff;
3594 buf0[2] = (buf0[2] & 0xff00ffff) | tmp0 << 16;
3595 break;
3596 case 11: tmp1 = (buf0[2] >> 24) & 0xff;
3597 buf0[2] = (buf0[2] & 0x00ffffff) | tmp0 << 24;
3598 break;
3599 case 12: tmp1 = (buf0[3] >> 0) & 0xff;
3600 buf0[3] = (buf0[3] & 0xffffff00) | tmp0 << 0;
3601 break;
3602 case 13: tmp1 = (buf0[3] >> 8) & 0xff;
3603 buf0[3] = (buf0[3] & 0xffff00ff) | tmp0 << 8;
3604 break;
3605 case 14: tmp1 = (buf0[3] >> 16) & 0xff;
3606 buf0[3] = (buf0[3] & 0xff00ffff) | tmp0 << 16;
3607 break;
3608 case 15: tmp1 = (buf0[3] >> 24) & 0xff;
3609 buf0[3] = (buf0[3] & 0x00ffffff) | tmp0 << 24;
3610 break;
3611 case 16: tmp1 = (buf1[0] >> 0) & 0xff;
3612 buf1[0] = (buf1[0] & 0xffffff00) | tmp0 << 0;
3613 break;
3614 case 17: tmp1 = (buf1[0] >> 8) & 0xff;
3615 buf1[0] = (buf1[0] & 0xffff00ff) | tmp0 << 8;
3616 break;
3617 case 18: tmp1 = (buf1[0] >> 16) & 0xff;
3618 buf1[0] = (buf1[0] & 0xff00ffff) | tmp0 << 16;
3619 break;
3620 case 19: tmp1 = (buf1[0] >> 24) & 0xff;
3621 buf1[0] = (buf1[0] & 0x00ffffff) | tmp0 << 24;
3622 break;
3623 case 20: tmp1 = (buf1[1] >> 0) & 0xff;
3624 buf1[1] = (buf1[1] & 0xffffff00) | tmp0 << 0;
3625 break;
3626 case 21: tmp1 = (buf1[1] >> 8) & 0xff;
3627 buf1[1] = (buf1[1] & 0xffff00ff) | tmp0 << 8;
3628 break;
3629 case 22: tmp1 = (buf1[1] >> 16) & 0xff;
3630 buf1[1] = (buf1[1] & 0xff00ffff) | tmp0 << 16;
3631 break;
3632 case 23: tmp1 = (buf1[1] >> 24) & 0xff;
3633 buf1[1] = (buf1[1] & 0x00ffffff) | tmp0 << 24;
3634 break;
3635 case 24: tmp1 = (buf1[2] >> 0) & 0xff;
3636 buf1[2] = (buf1[2] & 0xffffff00) | tmp0 << 0;
3637 break;
3638 case 25: tmp1 = (buf1[2] >> 8) & 0xff;
3639 buf1[2] = (buf1[2] & 0xffff00ff) | tmp0 << 8;
3640 break;
3641 case 26: tmp1 = (buf1[2] >> 16) & 0xff;
3642 buf1[2] = (buf1[2] & 0xff00ffff) | tmp0 << 16;
3643 break;
3644 case 27: tmp1 = (buf1[2] >> 24) & 0xff;
3645 buf1[2] = (buf1[2] & 0x00ffffff) | tmp0 << 24;
3646 break;
3647 case 28: tmp1 = (buf1[3] >> 0) & 0xff;
3648 buf1[3] = (buf1[3] & 0xffffff00) | tmp0 << 0;
3649 break;
3650 case 29: tmp1 = (buf1[3] >> 8) & 0xff;
3651 buf1[3] = (buf1[3] & 0xffff00ff) | tmp0 << 8;
3652 break;
3653 case 30: tmp1 = (buf1[3] >> 16) & 0xff;
3654 buf1[3] = (buf1[3] & 0xff00ffff) | tmp0 << 16;
3655 break;
3656 case 31: tmp1 = (buf1[3] >> 24) & 0xff;
3657 buf1[3] = (buf1[3] & 0x00ffffff) | tmp0 << 24;
3658 break;
3659 }
3660
3661 switch (p0)
3662 {
3663 case 0: buf0[0] = (buf0[0] & 0xffffff00) | tmp1 << 0;
3664 break;
3665 case 1: buf0[0] = (buf0[0] & 0xffff00ff) | tmp1 << 8;
3666 break;
3667 case 2: buf0[0] = (buf0[0] & 0xff00ffff) | tmp1 << 16;
3668 break;
3669 case 3: buf0[0] = (buf0[0] & 0x00ffffff) | tmp1 << 24;
3670 break;
3671 case 4: buf0[1] = (buf0[1] & 0xffffff00) | tmp1 << 0;
3672 break;
3673 case 5: buf0[1] = (buf0[1] & 0xffff00ff) | tmp1 << 8;
3674 break;
3675 case 6: buf0[1] = (buf0[1] & 0xff00ffff) | tmp1 << 16;
3676 break;
3677 case 7: buf0[1] = (buf0[1] & 0x00ffffff) | tmp1 << 24;
3678 break;
3679 case 8: buf0[2] = (buf0[2] & 0xffffff00) | tmp1 << 0;
3680 break;
3681 case 9: buf0[2] = (buf0[2] & 0xffff00ff) | tmp1 << 8;
3682 break;
3683 case 10: buf0[2] = (buf0[2] & 0xff00ffff) | tmp1 << 16;
3684 break;
3685 case 11: buf0[2] = (buf0[2] & 0x00ffffff) | tmp1 << 24;
3686 break;
3687 case 12: buf0[3] = (buf0[3] & 0xffffff00) | tmp1 << 0;
3688 break;
3689 case 13: buf0[3] = (buf0[3] & 0xffff00ff) | tmp1 << 8;
3690 break;
3691 case 14: buf0[3] = (buf0[3] & 0xff00ffff) | tmp1 << 16;
3692 break;
3693 case 15: buf0[3] = (buf0[3] & 0x00ffffff) | tmp1 << 24;
3694 break;
3695 case 16: buf1[0] = (buf1[0] & 0xffffff00) | tmp1 << 0;
3696 break;
3697 case 17: buf1[0] = (buf1[0] & 0xffff00ff) | tmp1 << 8;
3698 break;
3699 case 18: buf1[0] = (buf1[0] & 0xff00ffff) | tmp1 << 16;
3700 break;
3701 case 19: buf1[0] = (buf1[0] & 0x00ffffff) | tmp1 << 24;
3702 break;
3703 case 20: buf1[1] = (buf1[1] & 0xffffff00) | tmp1 << 0;
3704 break;
3705 case 21: buf1[1] = (buf1[1] & 0xffff00ff) | tmp1 << 8;
3706 break;
3707 case 22: buf1[1] = (buf1[1] & 0xff00ffff) | tmp1 << 16;
3708 break;
3709 case 23: buf1[1] = (buf1[1] & 0x00ffffff) | tmp1 << 24;
3710 break;
3711 case 24: buf1[2] = (buf1[2] & 0xffffff00) | tmp1 << 0;
3712 break;
3713 case 25: buf1[2] = (buf1[2] & 0xffff00ff) | tmp1 << 8;
3714 break;
3715 case 26: buf1[2] = (buf1[2] & 0xff00ffff) | tmp1 << 16;
3716 break;
3717 case 27: buf1[2] = (buf1[2] & 0x00ffffff) | tmp1 << 24;
3718 break;
3719 case 28: buf1[3] = (buf1[3] & 0xffffff00) | tmp1 << 0;
3720 break;
3721 case 29: buf1[3] = (buf1[3] & 0xffff00ff) | tmp1 << 8;
3722 break;
3723 case 30: buf1[3] = (buf1[3] & 0xff00ffff) | tmp1 << 16;
3724 break;
3725 case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24;
3726 break;
3727 }
3728 #endif
3729
3730 return in_len;
3731 }
3732
3733 static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3734 {
3735 if (p0 >= in_len) return (in_len);
3736
3737 const u32 mr = 0xffu << ((p0 & 3) * 8);
3738 const u32 ml = ~mr;
3739
3740 switch (p0 / 4)
3741 {
3742 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) << 1) & mr); break;
3743 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) << 1) & mr); break;
3744 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) << 1) & mr); break;
3745 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) << 1) & mr); break;
3746 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) << 1) & mr); break;
3747 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) << 1) & mr); break;
3748 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) << 1) & mr); break;
3749 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) << 1) & mr); break;
3750 }
3751
3752 return in_len;
3753 }
3754
3755 static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3756 {
3757 if (p0 >= in_len) return (in_len);
3758
3759 const u32 mr = 0xffu << ((p0 & 3) * 8);
3760 const u32 ml = ~mr;
3761
3762 switch (p0 / 4)
3763 {
3764 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) >> 1) & mr); break;
3765 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) >> 1) & mr); break;
3766 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) >> 1) & mr); break;
3767 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) >> 1) & mr); break;
3768 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) >> 1) & mr); break;
3769 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) >> 1) & mr); break;
3770 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) >> 1) & mr); break;
3771 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) >> 1) & mr); break;
3772 }
3773
3774 return in_len;
3775 }
3776
3777 static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3778 {
3779 if (p0 >= in_len) return (in_len);
3780
3781 const u32 mr = 0xffu << ((p0 & 3) * 8);
3782 const u32 ml = ~mr;
3783
3784 const u32 n = 0x01010101 & mr;
3785
3786 switch (p0 / 4)
3787 {
3788 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) + n) & mr); break;
3789 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) + n) & mr); break;
3790 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) + n) & mr); break;
3791 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) + n) & mr); break;
3792 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) + n) & mr); break;
3793 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) + n) & mr); break;
3794 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) + n) & mr); break;
3795 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) + n) & mr); break;
3796 }
3797
3798 return in_len;
3799 }
3800
3801 static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3802 {
3803 if (p0 >= in_len) return (in_len);
3804
3805 const u32 mr = 0xffu << ((p0 & 3) * 8);
3806 const u32 ml = ~mr;
3807
3808 const u32 n = 0x01010101 & mr;
3809
3810 switch (p0 / 4)
3811 {
3812 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) - n) & mr); break;
3813 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) - n) & mr); break;
3814 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) - n) & mr); break;
3815 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) - n) & mr); break;
3816 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) - n) & mr); break;
3817 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) - n) & mr); break;
3818 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) - n) & mr); break;
3819 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) - n) & mr); break;
3820 }
3821
3822 return in_len;
3823 }
3824
3825 static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3826 {
3827 if ((p0 + 1) >= in_len) return (in_len);
3828
3829 u32 tib40[4];
3830 u32 tib41[4];
3831
3832 lshift_block (buf0, buf1, tib40, tib41);
3833
3834 const u32 mr = 0xffu << ((p0 & 3) * 8);
3835 const u32 ml = ~mr;
3836
3837 switch (p0 / 4)
3838 {
3839 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3840 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3841 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3842 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3843 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3844 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3845 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3846 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3847 }
3848
3849 return in_len;
3850 }
3851
3852 static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3853 {
3854 if (p0 == 0) return (in_len);
3855
3856 if (p0 >= in_len) return (in_len);
3857
3858 u32 tib40[4];
3859 u32 tib41[4];
3860
3861 rshift_block (buf0, buf1, tib40, tib41);
3862
3863 const u32 mr = 0xffu << ((p0 & 3) * 8);
3864 const u32 ml = ~mr;
3865
3866 switch (p0 / 4)
3867 {
3868 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3869 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3870 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3871 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3872 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3873 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3874 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3875 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3876 }
3877
3878 return in_len;
3879 }
3880
3881 static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3882 {
3883 if (p0 > in_len) return (in_len);
3884
3885 if ((in_len + p0) >= 32) return (in_len);
3886
3887 u32 out_len = in_len;
3888
3889 u32 tib40[4];
3890 u32 tib41[4];
3891
3892 tib40[0] = buf0[0];
3893 tib40[1] = buf0[1];
3894 tib40[2] = buf0[2];
3895 tib40[3] = buf0[3];
3896 tib41[0] = buf1[0];
3897 tib41[1] = buf1[1];
3898 tib41[2] = buf1[2];
3899 tib41[3] = buf1[3];
3900
3901 truncate_right (tib40, tib41, p0);
3902
3903 rshift_block_N (buf0, buf1, buf0, buf1, p0);
3904
3905 buf0[0] |= tib40[0];
3906 buf0[1] |= tib40[1];
3907 buf0[2] |= tib40[2];
3908 buf0[3] |= tib40[3];
3909 buf1[0] |= tib41[0];
3910 buf1[1] |= tib41[1];
3911 buf1[2] |= tib41[2];
3912 buf1[3] |= tib41[3];
3913
3914 out_len += p0;
3915
3916 return out_len;
3917 }
3918
3919 static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3920 {
3921 if (p0 > in_len) return (in_len);
3922
3923 if ((in_len + p0) >= 32) return (in_len);
3924
3925 u32 out_len = in_len;
3926
3927 u32 tib40[4];
3928 u32 tib41[4];
3929
3930 rshift_block_N (buf0, buf1, tib40, tib41, p0);
3931
3932 truncate_left (tib40, tib41, out_len);
3933
3934 buf0[0] |= tib40[0];
3935 buf0[1] |= tib40[1];
3936 buf0[2] |= tib40[2];
3937 buf0[3] |= tib40[3];
3938 buf1[0] |= tib41[0];
3939 buf1[1] |= tib41[1];
3940 buf1[2] |= tib41[2];
3941 buf1[3] |= tib41[3];
3942
3943 out_len += p0;
3944
3945 return out_len;
3946 }
3947
3948 static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3949 {
3950 buf0[0] |= (generate_cmask (buf0[0]));
3951 buf0[1] |= (generate_cmask (buf0[1]));
3952 buf0[2] |= (generate_cmask (buf0[2]));
3953 buf0[3] |= (generate_cmask (buf0[3]));
3954 buf1[0] |= (generate_cmask (buf1[0]));
3955 buf1[1] |= (generate_cmask (buf1[1]));
3956 buf1[2] |= (generate_cmask (buf1[2]));
3957 buf1[3] |= (generate_cmask (buf1[3]));
3958
3959 #ifdef IS_NV
3960 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
3961
3962 for (u32 i = 0; i < in_len; i++)
3963 {
3964 u32 tmp0;
3965 u32 tmp1;
3966
3967 switch (i)
3968 {
3969 case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540);
3970 tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break;
3971 case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541);
3972 tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break;
3973 case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542);
3974 tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break;
3975 case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543);
3976 tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break;
3977 case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540);
3978 tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break;
3979 case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541);
3980 tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break;
3981 case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542);
3982 tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break;
3983 case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543);
3984 tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break;
3985 case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540);
3986 tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break;
3987 case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541);
3988 tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break;
3989 case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542);
3990 tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break;
3991 case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543);
3992 tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break;
3993 case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540);
3994 tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break;
3995 case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541);
3996 tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break;
3997 case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542);
3998 tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break;
3999 case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543);
4000 tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break;
4001 case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540);
4002 tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break;
4003 case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541);
4004 tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break;
4005 case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542);
4006 tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break;
4007 case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543);
4008 tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break;
4009 case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540);
4010 tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break;
4011 case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541);
4012 tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break;
4013 case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542);
4014 tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break;
4015 case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543);
4016 tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break;
4017 case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540);
4018 tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break;
4019 case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541);
4020 tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break;
4021 case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542);
4022 tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break;
4023 case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543);
4024 tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break;
4025 case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540);
4026 tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break;
4027 case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541);
4028 tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break;
4029 case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542);
4030 tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break;
4031 }
4032
4033 if (i < 3)
4034 {
4035 if (tmp0 == ' ') buf0[0] &= tmp1 ;
4036 }
4037 else if (i < 7)
4038 {
4039 if (tmp0 == ' ') buf0[1] &= tmp1 ;
4040 }
4041 else if (i < 11)
4042 {
4043 if (tmp0 == ' ') buf0[2] &= tmp1 ;
4044 }
4045 else if (i < 15)
4046 {
4047 if (tmp0 == ' ') buf0[3] &= tmp1 ;
4048 }
4049 else if (i < 19)
4050 {
4051 if (tmp0 == ' ') buf1[0] &= tmp1 ;
4052 }
4053 else if (i < 23)
4054 {
4055 if (tmp0 == ' ') buf1[1] &= tmp1 ;
4056 }
4057 else if (i < 27)
4058 {
4059 if (tmp0 == ' ') buf1[2] &= tmp1 ;
4060 }
4061 else if (i < 31)
4062 {
4063 if (tmp0 == ' ') buf1[3] &= tmp1 ;
4064 }
4065 }
4066 #endif
4067
4068 #if defined IS_AMD || defined IS_GENERIC
4069 u32 tib40[4];
4070 u32 tib41[4];
4071
4072 const uchar4 tmp0 = (uchar4) (' ');
4073 const uchar4 tmp1 = (uchar4) (0x00);
4074 const uchar4 tmp2 = (uchar4) (0xff);
4075
4076 uchar4 tmp;
4077
4078 tmp = as_uchar4 (buf0[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[0] = as_uint (tmp);
4079 tmp = as_uchar4 (buf0[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[1] = as_uint (tmp);
4080 tmp = as_uchar4 (buf0[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[2] = as_uint (tmp);
4081 tmp = as_uchar4 (buf0[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[3] = as_uint (tmp);
4082 tmp = as_uchar4 (buf1[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[0] = as_uint (tmp);
4083 tmp = as_uchar4 (buf1[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[1] = as_uint (tmp);
4084 tmp = as_uchar4 (buf1[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[2] = as_uint (tmp);
4085 tmp = as_uchar4 (buf1[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[3] = as_uint (tmp);
4086
4087 rshift_block (tib40, tib41, tib40, tib41); tib40[0] |= 0xff;
4088
4089 buf0[0] &= ~(generate_cmask (buf0[0]) & tib40[0]);
4090 buf0[1] &= ~(generate_cmask (buf0[1]) & tib40[1]);
4091 buf0[2] &= ~(generate_cmask (buf0[2]) & tib40[2]);
4092 buf0[3] &= ~(generate_cmask (buf0[3]) & tib40[3]);
4093 buf1[0] &= ~(generate_cmask (buf1[0]) & tib41[0]);
4094 buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]);
4095 buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]);
4096 buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]);
4097 #endif
4098
4099 return in_len;
4100 }
4101
4102 u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
4103 {
4104 u32 out_len = in_len;
4105
4106 switch (name)
4107 {
4108 case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break;
4109 case RULE_OP_MANGLE_UREST: out_len = rule_op_mangle_urest (p0, p1, buf0, buf1, out_len); break;
4110 case RULE_OP_MANGLE_LREST_UFIRST: out_len = rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, out_len); break;
4111 case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break;
4112 case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break;
4113 case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break;
4114 case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break;
4115 case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break;
4116 case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break;
4117 case RULE_OP_MANGLE_REFLECT: out_len = rule_op_mangle_reflect (p0, p1, buf0, buf1, out_len); break;
4118 case RULE_OP_MANGLE_APPEND: out_len = rule_op_mangle_append (p0, p1, buf0, buf1, out_len); break;
4119 case RULE_OP_MANGLE_PREPEND: out_len = rule_op_mangle_prepend (p0, p1, buf0, buf1, out_len); break;
4120 case RULE_OP_MANGLE_ROTATE_LEFT: out_len = rule_op_mangle_rotate_left (p0, p1, buf0, buf1, out_len); break;
4121 case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = rule_op_mangle_rotate_right (p0, p1, buf0, buf1, out_len); break;
4122 case RULE_OP_MANGLE_DELETE_FIRST: out_len = rule_op_mangle_delete_first (p0, p1, buf0, buf1, out_len); break;
4123 case RULE_OP_MANGLE_DELETE_LAST: out_len = rule_op_mangle_delete_last (p0, p1, buf0, buf1, out_len); break;
4124 case RULE_OP_MANGLE_DELETE_AT: out_len = rule_op_mangle_delete_at (p0, p1, buf0, buf1, out_len); break;
4125 case RULE_OP_MANGLE_EXTRACT: out_len = rule_op_mangle_extract (p0, p1, buf0, buf1, out_len); break;
4126 case RULE_OP_MANGLE_OMIT: out_len = rule_op_mangle_omit (p0, p1, buf0, buf1, out_len); break;
4127 case RULE_OP_MANGLE_INSERT: out_len = rule_op_mangle_insert (p0, p1, buf0, buf1, out_len); break;
4128 case RULE_OP_MANGLE_OVERSTRIKE: out_len = rule_op_mangle_overstrike (p0, p1, buf0, buf1, out_len); break;
4129 case RULE_OP_MANGLE_TRUNCATE_AT: out_len = rule_op_mangle_truncate_at (p0, p1, buf0, buf1, out_len); break;
4130 case RULE_OP_MANGLE_REPLACE: out_len = rule_op_mangle_replace (p0, p1, buf0, buf1, out_len); break;
4131 //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
4132 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
4133 case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = rule_op_mangle_dupechar_first (p0, p1, buf0, buf1, out_len); break;
4134 case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = rule_op_mangle_dupechar_last (p0, p1, buf0, buf1, out_len); break;
4135 case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = rule_op_mangle_dupechar_all (p0, p1, buf0, buf1, out_len); break;
4136 case RULE_OP_MANGLE_SWITCH_FIRST: out_len = rule_op_mangle_switch_first (p0, p1, buf0, buf1, out_len); break;
4137 case RULE_OP_MANGLE_SWITCH_LAST: out_len = rule_op_mangle_switch_last (p0, p1, buf0, buf1, out_len); break;
4138 case RULE_OP_MANGLE_SWITCH_AT: out_len = rule_op_mangle_switch_at (p0, p1, buf0, buf1, out_len); break;
4139 case RULE_OP_MANGLE_CHR_SHIFTL: out_len = rule_op_mangle_chr_shiftl (p0, p1, buf0, buf1, out_len); break;
4140 case RULE_OP_MANGLE_CHR_SHIFTR: out_len = rule_op_mangle_chr_shiftr (p0, p1, buf0, buf1, out_len); break;
4141 case RULE_OP_MANGLE_CHR_INCR: out_len = rule_op_mangle_chr_incr (p0, p1, buf0, buf1, out_len); break;
4142 case RULE_OP_MANGLE_CHR_DECR: out_len = rule_op_mangle_chr_decr (p0, p1, buf0, buf1, out_len); break;
4143 case RULE_OP_MANGLE_REPLACE_NP1: out_len = rule_op_mangle_replace_np1 (p0, p1, buf0, buf1, out_len); break;
4144 case RULE_OP_MANGLE_REPLACE_NM1: out_len = rule_op_mangle_replace_nm1 (p0, p1, buf0, buf1, out_len); break;
4145 case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = rule_op_mangle_dupeblock_first (p0, p1, buf0, buf1, out_len); break;
4146 case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = rule_op_mangle_dupeblock_last (p0, p1, buf0, buf1, out_len); break;
4147 case RULE_OP_MANGLE_TITLE: out_len = rule_op_mangle_title (p0, p1, buf0, buf1, out_len); break;
4148 }
4149
4150 return out_len;
4151 }
4152
4153 u32 apply_rules (__global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
4154 {
4155 u32 out_len = len;
4156
4157 for (u32 i = 0; cmds[i] != 0; i++)
4158 {
4159 const u32 cmd = cmds[i];
4160
4161 const u32 name = (cmd >> 0) & 0xff;
4162 const u32 p0 = (cmd >> 8) & 0xff;
4163 const u32 p1 = (cmd >> 16) & 0xff;
4164
4165 out_len = apply_rule (name, p0, p1, buf0, buf1, out_len);
4166 }
4167
4168 return out_len;
4169 }