23580ab4f8fe48f3b2ec6cdadf87343ce79a046b
[hashcat.git] / OpenCL / common.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 static int device_memcmp (const u32 d1[4], __global u32 *d2)
7 {
8 if (d1[3] > d2[DGST_R3]) return ( 1);
9 if (d1[3] < d2[DGST_R3]) return (-1);
10 if (d1[2] > d2[DGST_R2]) return ( 1);
11 if (d1[2] < d2[DGST_R2]) return (-1);
12 if (d1[1] > d2[DGST_R1]) return ( 1);
13 if (d1[1] < d2[DGST_R1]) return (-1);
14 if (d1[0] > d2[DGST_R0]) return ( 1);
15 if (d1[0] < d2[DGST_R0]) return (-1);
16
17 return (0);
18 }
19
20 static int find_hash (const u32 digest[4], const u32 digests_cnt, __global digest_t *digests_buf)
21 {
22 for (u32 l = 0, r = digests_cnt; r; r >>= 1)
23 {
24 const u32 m = r >> 1;
25
26 const u32 c = l + m;
27
28 const int cmp = device_memcmp (digest, digests_buf[c].digest_buf);
29
30 if (cmp > 0)
31 {
32 l += m + 1;
33
34 r--;
35 }
36
37 if (cmp == 0) return (c);
38 }
39
40 return (-1);
41 }
42
43 static u32 check_bitmap (__global u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest)
44 {
45 return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f)));
46 }
47
48 static u32 check (const u32 digest[2], __global u32 *bitmap_s1_a, __global u32 *bitmap_s1_b, __global u32 *bitmap_s1_c, __global u32 *bitmap_s1_d, __global u32 *bitmap_s2_a, __global u32 *bitmap_s2_b, __global u32 *bitmap_s2_c, __global u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2)
49 {
50 if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0);
51 if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0);
52 if (check_bitmap (bitmap_s1_c, bitmap_mask, bitmap_shift1, digest[2]) == 0) return (0);
53 if (check_bitmap (bitmap_s1_d, bitmap_mask, bitmap_shift1, digest[3]) == 0) return (0);
54
55 if (check_bitmap (bitmap_s2_a, bitmap_mask, bitmap_shift2, digest[0]) == 0) return (0);
56 if (check_bitmap (bitmap_s2_b, bitmap_mask, bitmap_shift2, digest[1]) == 0) return (0);
57 if (check_bitmap (bitmap_s2_c, bitmap_mask, bitmap_shift2, digest[2]) == 0) return (0);
58 if (check_bitmap (bitmap_s2_d, bitmap_mask, bitmap_shift2, digest[3]) == 0) return (0);
59
60 return (1);
61 }
62
63 static void mark_hash (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos)
64 {
65 hashes_shown[hash_pos] = 1;
66
67 plains_buf[hash_pos].gidvid = (gid * 1) + 0;
68 plains_buf[hash_pos].il_pos = il_pos;
69 }
70
71 static void truncate_block (u32 w[4], const u32 len)
72 {
73 switch (len)
74 {
75 case 0: w[0] &= 0;
76 w[1] &= 0;
77 w[2] &= 0;
78 w[3] &= 0;
79 break;
80 case 1: w[0] &= 0x000000FF;
81 w[1] &= 0;
82 w[2] &= 0;
83 w[3] &= 0;
84 break;
85 case 2: w[0] &= 0x0000FFFF;
86 w[1] &= 0;
87 w[2] &= 0;
88 w[3] &= 0;
89 break;
90 case 3: w[0] &= 0x00FFFFFF;
91 w[1] &= 0;
92 w[2] &= 0;
93 w[3] &= 0;
94 break;
95 case 4: w[1] &= 0;
96 w[2] &= 0;
97 w[3] &= 0;
98 break;
99 case 5: w[1] &= 0x000000FF;
100 w[2] &= 0;
101 w[3] &= 0;
102 break;
103 case 6: w[1] &= 0x0000FFFF;
104 w[2] &= 0;
105 w[3] &= 0;
106 break;
107 case 7: w[1] &= 0x00FFFFFF;
108 w[2] &= 0;
109 w[3] &= 0;
110 break;
111 case 8: w[2] &= 0;
112 w[3] &= 0;
113 break;
114 case 9: w[2] &= 0x000000FF;
115 w[3] &= 0;
116 break;
117 case 10: w[2] &= 0x0000FFFF;
118 w[3] &= 0;
119 break;
120 case 11: w[2] &= 0x00FFFFFF;
121 w[3] &= 0;
122 break;
123 case 12: w[3] &= 0;
124 break;
125 case 13: w[3] &= 0x000000FF;
126 break;
127 case 14: w[3] &= 0x0000FFFF;
128 break;
129 case 15: w[3] &= 0x00FFFFFF;
130 break;
131 }
132 }
133
134 static void make_unicode (const u32 in[4], u32 out1[4], u32 out2[4])
135 {
136 #ifdef IS_NV
137 out2[3] = __byte_perm (in[3], 0, 0x7372);
138 out2[2] = __byte_perm (in[3], 0, 0x7170);
139 out2[1] = __byte_perm (in[2], 0, 0x7372);
140 out2[0] = __byte_perm (in[2], 0, 0x7170);
141 out1[3] = __byte_perm (in[1], 0, 0x7372);
142 out1[2] = __byte_perm (in[1], 0, 0x7170);
143 out1[1] = __byte_perm (in[0], 0, 0x7372);
144 out1[0] = __byte_perm (in[0], 0, 0x7170);
145 #endif
146
147 #ifdef IS_AMD
148 out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
149 out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
150 out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
151 out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF);
152 out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF);
153 out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
154 out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
155 out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
156 #endif
157 }
158
159 static void undo_unicode (const u32 in1[4], const u32 in2[4], u32 out[4])
160 {
161 #ifdef IS_NV
162 out[0] = __byte_perm (in1[0], in1[1], 0x6420);
163 out[1] = __byte_perm (in1[2], in1[3], 0x6420);
164 out[2] = __byte_perm (in2[0], in2[1], 0x6420);
165 out[3] = __byte_perm (in2[2], in2[3], 0x6420);
166 #endif
167
168 #ifdef IS_AMD
169 out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
170 | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
171 out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
172 | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8);
173 out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8)
174 | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
175 out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
176 | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
177 #endif
178 }
179
180 // before: append_0x01_1
181 static void append_0x01_1x4 (u32 w0[4], const u32 offset)
182 {
183 switch (offset)
184 {
185 case 0:
186 w0[0] = 0x01;
187 break;
188
189 case 1:
190 w0[0] = w0[0] | 0x0100;
191 break;
192
193 case 2:
194 w0[0] = w0[0] | 0x010000;
195 break;
196
197 case 3:
198 w0[0] = w0[0] | 0x01000000;
199 break;
200
201 case 4:
202 w0[1] = 0x01;
203 break;
204
205 case 5:
206 w0[1] = w0[1] | 0x0100;
207 break;
208
209 case 6:
210 w0[1] = w0[1] | 0x010000;
211 break;
212
213 case 7:
214 w0[1] = w0[1] | 0x01000000;
215 break;
216
217 case 8:
218 w0[2] = 0x01;
219 break;
220
221 case 9:
222 w0[2] = w0[2] | 0x0100;
223 break;
224
225 case 10:
226 w0[2] = w0[2] | 0x010000;
227 break;
228
229 case 11:
230 w0[2] = w0[2] | 0x01000000;
231 break;
232
233 case 12:
234 w0[3] = 0x01;
235 break;
236
237 case 13:
238 w0[3] = w0[3] | 0x0100;
239 break;
240
241 case 14:
242 w0[3] = w0[3] | 0x010000;
243 break;
244
245 case 15:
246 w0[3] = w0[3] | 0x01000000;
247 break;
248 }
249 }
250
251 // before: append_0x01_2
252 static void append_0x01_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
253 {
254 switch (offset)
255 {
256 case 0:
257 w0[0] = 0x01;
258 break;
259
260 case 1:
261 w0[0] = w0[0] | 0x0100;
262 break;
263
264 case 2:
265 w0[0] = w0[0] | 0x010000;
266 break;
267
268 case 3:
269 w0[0] = w0[0] | 0x01000000;
270 break;
271
272 case 4:
273 w0[1] = 0x01;
274 break;
275
276 case 5:
277 w0[1] = w0[1] | 0x0100;
278 break;
279
280 case 6:
281 w0[1] = w0[1] | 0x010000;
282 break;
283
284 case 7:
285 w0[1] = w0[1] | 0x01000000;
286 break;
287
288 case 8:
289 w0[2] = 0x01;
290 break;
291
292 case 9:
293 w0[2] = w0[2] | 0x0100;
294 break;
295
296 case 10:
297 w0[2] = w0[2] | 0x010000;
298 break;
299
300 case 11:
301 w0[2] = w0[2] | 0x01000000;
302 break;
303
304 case 12:
305 w0[3] = 0x01;
306 break;
307
308 case 13:
309 w0[3] = w0[3] | 0x0100;
310 break;
311
312 case 14:
313 w0[3] = w0[3] | 0x010000;
314 break;
315
316 case 15:
317 w0[3] = w0[3] | 0x01000000;
318 break;
319
320 case 16:
321 w1[0] = 0x01;
322 break;
323
324 case 17:
325 w1[0] = w1[0] | 0x0100;
326 break;
327
328 case 18:
329 w1[0] = w1[0] | 0x010000;
330 break;
331
332 case 19:
333 w1[0] = w1[0] | 0x01000000;
334 break;
335
336 case 20:
337 w1[1] = 0x01;
338 break;
339
340 case 21:
341 w1[1] = w1[1] | 0x0100;
342 break;
343
344 case 22:
345 w1[1] = w1[1] | 0x010000;
346 break;
347
348 case 23:
349 w1[1] = w1[1] | 0x01000000;
350 break;
351
352 case 24:
353 w1[2] = 0x01;
354 break;
355
356 case 25:
357 w1[2] = w1[2] | 0x0100;
358 break;
359
360 case 26:
361 w1[2] = w1[2] | 0x010000;
362 break;
363
364 case 27:
365 w1[2] = w1[2] | 0x01000000;
366 break;
367
368 case 28:
369 w1[3] = 0x01;
370 break;
371
372 case 29:
373 w1[3] = w1[3] | 0x0100;
374 break;
375
376 case 30:
377 w1[3] = w1[3] | 0x010000;
378 break;
379
380 case 31:
381 w1[3] = w1[3] | 0x01000000;
382 break;
383 }
384 }
385
386 // before: append_0x01_3
387 static void append_0x01_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
388 {
389 switch (offset)
390 {
391 case 0:
392 w0[0] = 0x01;
393 break;
394
395 case 1:
396 w0[0] = w0[0] | 0x0100;
397 break;
398
399 case 2:
400 w0[0] = w0[0] | 0x010000;
401 break;
402
403 case 3:
404 w0[0] = w0[0] | 0x01000000;
405 break;
406
407 case 4:
408 w0[1] = 0x01;
409 break;
410
411 case 5:
412 w0[1] = w0[1] | 0x0100;
413 break;
414
415 case 6:
416 w0[1] = w0[1] | 0x010000;
417 break;
418
419 case 7:
420 w0[1] = w0[1] | 0x01000000;
421 break;
422
423 case 8:
424 w0[2] = 0x01;
425 break;
426
427 case 9:
428 w0[2] = w0[2] | 0x0100;
429 break;
430
431 case 10:
432 w0[2] = w0[2] | 0x010000;
433 break;
434
435 case 11:
436 w0[2] = w0[2] | 0x01000000;
437 break;
438
439 case 12:
440 w0[3] = 0x01;
441 break;
442
443 case 13:
444 w0[3] = w0[3] | 0x0100;
445 break;
446
447 case 14:
448 w0[3] = w0[3] | 0x010000;
449 break;
450
451 case 15:
452 w0[3] = w0[3] | 0x01000000;
453 break;
454
455 case 16:
456 w1[0] = 0x01;
457 break;
458
459 case 17:
460 w1[0] = w1[0] | 0x0100;
461 break;
462
463 case 18:
464 w1[0] = w1[0] | 0x010000;
465 break;
466
467 case 19:
468 w1[0] = w1[0] | 0x01000000;
469 break;
470
471 case 20:
472 w1[1] = 0x01;
473 break;
474
475 case 21:
476 w1[1] = w1[1] | 0x0100;
477 break;
478
479 case 22:
480 w1[1] = w1[1] | 0x010000;
481 break;
482
483 case 23:
484 w1[1] = w1[1] | 0x01000000;
485 break;
486
487 case 24:
488 w1[2] = 0x01;
489 break;
490
491 case 25:
492 w1[2] = w1[2] | 0x0100;
493 break;
494
495 case 26:
496 w1[2] = w1[2] | 0x010000;
497 break;
498
499 case 27:
500 w1[2] = w1[2] | 0x01000000;
501 break;
502
503 case 28:
504 w1[3] = 0x01;
505 break;
506
507 case 29:
508 w1[3] = w1[3] | 0x0100;
509 break;
510
511 case 30:
512 w1[3] = w1[3] | 0x010000;
513 break;
514
515 case 31:
516 w1[3] = w1[3] | 0x01000000;
517 break;
518
519 case 32:
520 w2[0] = 0x01;
521 break;
522
523 case 33:
524 w2[0] = w2[0] | 0x0100;
525 break;
526
527 case 34:
528 w2[0] = w2[0] | 0x010000;
529 break;
530
531 case 35:
532 w2[0] = w2[0] | 0x01000000;
533 break;
534
535 case 36:
536 w2[1] = 0x01;
537 break;
538
539 case 37:
540 w2[1] = w2[1] | 0x0100;
541 break;
542
543 case 38:
544 w2[1] = w2[1] | 0x010000;
545 break;
546
547 case 39:
548 w2[1] = w2[1] | 0x01000000;
549 break;
550
551 case 40:
552 w2[2] = 0x01;
553 break;
554
555 case 41:
556 w2[2] = w2[2] | 0x0100;
557 break;
558
559 case 42:
560 w2[2] = w2[2] | 0x010000;
561 break;
562
563 case 43:
564 w2[2] = w2[2] | 0x01000000;
565 break;
566
567 case 44:
568 w2[3] = 0x01;
569 break;
570
571 case 45:
572 w2[3] = w2[3] | 0x0100;
573 break;
574
575 case 46:
576 w2[3] = w2[3] | 0x010000;
577 break;
578
579 case 47:
580 w2[3] = w2[3] | 0x01000000;
581 break;
582 }
583 }
584
585 // before: append_0x01_4
586 static void append_0x01_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
587 {
588 switch (offset)
589 {
590 case 0:
591 w0[0] = 0x01;
592 break;
593
594 case 1:
595 w0[0] = w0[0] | 0x0100;
596 break;
597
598 case 2:
599 w0[0] = w0[0] | 0x010000;
600 break;
601
602 case 3:
603 w0[0] = w0[0] | 0x01000000;
604 break;
605
606 case 4:
607 w0[1] = 0x01;
608 break;
609
610 case 5:
611 w0[1] = w0[1] | 0x0100;
612 break;
613
614 case 6:
615 w0[1] = w0[1] | 0x010000;
616 break;
617
618 case 7:
619 w0[1] = w0[1] | 0x01000000;
620 break;
621
622 case 8:
623 w0[2] = 0x01;
624 break;
625
626 case 9:
627 w0[2] = w0[2] | 0x0100;
628 break;
629
630 case 10:
631 w0[2] = w0[2] | 0x010000;
632 break;
633
634 case 11:
635 w0[2] = w0[2] | 0x01000000;
636 break;
637
638 case 12:
639 w0[3] = 0x01;
640 break;
641
642 case 13:
643 w0[3] = w0[3] | 0x0100;
644 break;
645
646 case 14:
647 w0[3] = w0[3] | 0x010000;
648 break;
649
650 case 15:
651 w0[3] = w0[3] | 0x01000000;
652 break;
653
654 case 16:
655 w1[0] = 0x01;
656 break;
657
658 case 17:
659 w1[0] = w1[0] | 0x0100;
660 break;
661
662 case 18:
663 w1[0] = w1[0] | 0x010000;
664 break;
665
666 case 19:
667 w1[0] = w1[0] | 0x01000000;
668 break;
669
670 case 20:
671 w1[1] = 0x01;
672 break;
673
674 case 21:
675 w1[1] = w1[1] | 0x0100;
676 break;
677
678 case 22:
679 w1[1] = w1[1] | 0x010000;
680 break;
681
682 case 23:
683 w1[1] = w1[1] | 0x01000000;
684 break;
685
686 case 24:
687 w1[2] = 0x01;
688 break;
689
690 case 25:
691 w1[2] = w1[2] | 0x0100;
692 break;
693
694 case 26:
695 w1[2] = w1[2] | 0x010000;
696 break;
697
698 case 27:
699 w1[2] = w1[2] | 0x01000000;
700 break;
701
702 case 28:
703 w1[3] = 0x01;
704 break;
705
706 case 29:
707 w1[3] = w1[3] | 0x0100;
708 break;
709
710 case 30:
711 w1[3] = w1[3] | 0x010000;
712 break;
713
714 case 31:
715 w1[3] = w1[3] | 0x01000000;
716 break;
717
718 case 32:
719 w2[0] = 0x01;
720 break;
721
722 case 33:
723 w2[0] = w2[0] | 0x0100;
724 break;
725
726 case 34:
727 w2[0] = w2[0] | 0x010000;
728 break;
729
730 case 35:
731 w2[0] = w2[0] | 0x01000000;
732 break;
733
734 case 36:
735 w2[1] = 0x01;
736 break;
737
738 case 37:
739 w2[1] = w2[1] | 0x0100;
740 break;
741
742 case 38:
743 w2[1] = w2[1] | 0x010000;
744 break;
745
746 case 39:
747 w2[1] = w2[1] | 0x01000000;
748 break;
749
750 case 40:
751 w2[2] = 0x01;
752 break;
753
754 case 41:
755 w2[2] = w2[2] | 0x0100;
756 break;
757
758 case 42:
759 w2[2] = w2[2] | 0x010000;
760 break;
761
762 case 43:
763 w2[2] = w2[2] | 0x01000000;
764 break;
765
766 case 44:
767 w2[3] = 0x01;
768 break;
769
770 case 45:
771 w2[3] = w2[3] | 0x0100;
772 break;
773
774 case 46:
775 w2[3] = w2[3] | 0x010000;
776 break;
777
778 case 47:
779 w2[3] = w2[3] | 0x01000000;
780 break;
781
782 case 48:
783 w3[0] = 0x01;
784 break;
785
786 case 49:
787 w3[0] = w3[0] | 0x0100;
788 break;
789
790 case 50:
791 w3[0] = w3[0] | 0x010000;
792 break;
793
794 case 51:
795 w3[0] = w3[0] | 0x01000000;
796 break;
797
798 case 52:
799 w3[1] = 0x01;
800 break;
801
802 case 53:
803 w3[1] = w3[1] | 0x0100;
804 break;
805
806 case 54:
807 w3[1] = w3[1] | 0x010000;
808 break;
809
810 case 55:
811 w3[1] = w3[1] | 0x01000000;
812 break;
813
814 case 56:
815 w3[2] = 0x01;
816 break;
817
818 case 57:
819 w3[2] = w3[2] | 0x0100;
820 break;
821
822 case 58:
823 w3[2] = w3[2] | 0x010000;
824 break;
825
826 case 59:
827 w3[2] = w3[2] | 0x01000000;
828 break;
829
830 case 60:
831 w3[3] = 0x01;
832 break;
833
834 case 61:
835 w3[3] = w3[3] | 0x0100;
836 break;
837
838 case 62:
839 w3[3] = w3[3] | 0x010000;
840 break;
841
842 case 63:
843 w3[3] = w3[3] | 0x01000000;
844 break;
845 }
846 }
847
848 // before: append_0x01_8
849 static void append_0x01_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
850 {
851 switch (offset)
852 {
853 case 0:
854 w0[0] = 0x01;
855 break;
856
857 case 1:
858 w0[0] = w0[0] | 0x0100;
859 break;
860
861 case 2:
862 w0[0] = w0[0] | 0x010000;
863 break;
864
865 case 3:
866 w0[0] = w0[0] | 0x01000000;
867 break;
868
869 case 4:
870 w0[1] = 0x01;
871 break;
872
873 case 5:
874 w0[1] = w0[1] | 0x0100;
875 break;
876
877 case 6:
878 w0[1] = w0[1] | 0x010000;
879 break;
880
881 case 7:
882 w0[1] = w0[1] | 0x01000000;
883 break;
884
885 case 8:
886 w0[2] = 0x01;
887 break;
888
889 case 9:
890 w0[2] = w0[2] | 0x0100;
891 break;
892
893 case 10:
894 w0[2] = w0[2] | 0x010000;
895 break;
896
897 case 11:
898 w0[2] = w0[2] | 0x01000000;
899 break;
900
901 case 12:
902 w0[3] = 0x01;
903 break;
904
905 case 13:
906 w0[3] = w0[3] | 0x0100;
907 break;
908
909 case 14:
910 w0[3] = w0[3] | 0x010000;
911 break;
912
913 case 15:
914 w0[3] = w0[3] | 0x01000000;
915 break;
916
917 case 16:
918 w1[0] = 0x01;
919 break;
920
921 case 17:
922 w1[0] = w1[0] | 0x0100;
923 break;
924
925 case 18:
926 w1[0] = w1[0] | 0x010000;
927 break;
928
929 case 19:
930 w1[0] = w1[0] | 0x01000000;
931 break;
932
933 case 20:
934 w1[1] = 0x01;
935 break;
936
937 case 21:
938 w1[1] = w1[1] | 0x0100;
939 break;
940
941 case 22:
942 w1[1] = w1[1] | 0x010000;
943 break;
944
945 case 23:
946 w1[1] = w1[1] | 0x01000000;
947 break;
948
949 case 24:
950 w1[2] = 0x01;
951 break;
952
953 case 25:
954 w1[2] = w1[2] | 0x0100;
955 break;
956
957 case 26:
958 w1[2] = w1[2] | 0x010000;
959 break;
960
961 case 27:
962 w1[2] = w1[2] | 0x01000000;
963 break;
964
965 case 28:
966 w1[3] = 0x01;
967 break;
968
969 case 29:
970 w1[3] = w1[3] | 0x0100;
971 break;
972
973 case 30:
974 w1[3] = w1[3] | 0x010000;
975 break;
976
977 case 31:
978 w1[3] = w1[3] | 0x01000000;
979 break;
980
981 case 32:
982 w2[0] = 0x01;
983 break;
984
985 case 33:
986 w2[0] = w2[0] | 0x0100;
987 break;
988
989 case 34:
990 w2[0] = w2[0] | 0x010000;
991 break;
992
993 case 35:
994 w2[0] = w2[0] | 0x01000000;
995 break;
996
997 case 36:
998 w2[1] = 0x01;
999 break;
1000
1001 case 37:
1002 w2[1] = w2[1] | 0x0100;
1003 break;
1004
1005 case 38:
1006 w2[1] = w2[1] | 0x010000;
1007 break;
1008
1009 case 39:
1010 w2[1] = w2[1] | 0x01000000;
1011 break;
1012
1013 case 40:
1014 w2[2] = 0x01;
1015 break;
1016
1017 case 41:
1018 w2[2] = w2[2] | 0x0100;
1019 break;
1020
1021 case 42:
1022 w2[2] = w2[2] | 0x010000;
1023 break;
1024
1025 case 43:
1026 w2[2] = w2[2] | 0x01000000;
1027 break;
1028
1029 case 44:
1030 w2[3] = 0x01;
1031 break;
1032
1033 case 45:
1034 w2[3] = w2[3] | 0x0100;
1035 break;
1036
1037 case 46:
1038 w2[3] = w2[3] | 0x010000;
1039 break;
1040
1041 case 47:
1042 w2[3] = w2[3] | 0x01000000;
1043 break;
1044
1045 case 48:
1046 w3[0] = 0x01;
1047 break;
1048
1049 case 49:
1050 w3[0] = w3[0] | 0x0100;
1051 break;
1052
1053 case 50:
1054 w3[0] = w3[0] | 0x010000;
1055 break;
1056
1057 case 51:
1058 w3[0] = w3[0] | 0x01000000;
1059 break;
1060
1061 case 52:
1062 w3[1] = 0x01;
1063 break;
1064
1065 case 53:
1066 w3[1] = w3[1] | 0x0100;
1067 break;
1068
1069 case 54:
1070 w3[1] = w3[1] | 0x010000;
1071 break;
1072
1073 case 55:
1074 w3[1] = w3[1] | 0x01000000;
1075 break;
1076
1077 case 56:
1078 w3[2] = 0x01;
1079 break;
1080
1081 case 57:
1082 w3[2] = w3[2] | 0x0100;
1083 break;
1084
1085 case 58:
1086 w3[2] = w3[2] | 0x010000;
1087 break;
1088
1089 case 59:
1090 w3[2] = w3[2] | 0x01000000;
1091 break;
1092
1093 case 60:
1094 w3[3] = 0x01;
1095 break;
1096
1097 case 61:
1098 w3[3] = w3[3] | 0x0100;
1099 break;
1100
1101 case 62:
1102 w3[3] = w3[3] | 0x010000;
1103 break;
1104
1105 case 63:
1106 w3[3] = w3[3] | 0x01000000;
1107 break;
1108
1109 case 64:
1110 w4[0] = 0x01;
1111 break;
1112
1113 case 65:
1114 w4[0] = w4[0] | 0x0100;
1115 break;
1116
1117 case 66:
1118 w4[0] = w4[0] | 0x010000;
1119 break;
1120
1121 case 67:
1122 w4[0] = w4[0] | 0x01000000;
1123 break;
1124
1125 case 68:
1126 w4[1] = 0x01;
1127 break;
1128
1129 case 69:
1130 w4[1] = w4[1] | 0x0100;
1131 break;
1132
1133 case 70:
1134 w4[1] = w4[1] | 0x010000;
1135 break;
1136
1137 case 71:
1138 w4[1] = w4[1] | 0x01000000;
1139 break;
1140
1141 case 72:
1142 w4[2] = 0x01;
1143 break;
1144
1145 case 73:
1146 w4[2] = w4[2] | 0x0100;
1147 break;
1148
1149 case 74:
1150 w4[2] = w4[2] | 0x010000;
1151 break;
1152
1153 case 75:
1154 w4[2] = w4[2] | 0x01000000;
1155 break;
1156
1157 case 76:
1158 w4[3] = 0x01;
1159 break;
1160
1161 case 77:
1162 w4[3] = w4[3] | 0x0100;
1163 break;
1164
1165 case 78:
1166 w4[3] = w4[3] | 0x010000;
1167 break;
1168
1169 case 79:
1170 w4[3] = w4[3] | 0x01000000;
1171 break;
1172
1173 case 80:
1174 w5[0] = 0x01;
1175 break;
1176
1177 case 81:
1178 w5[0] = w5[0] | 0x0100;
1179 break;
1180
1181 case 82:
1182 w5[0] = w5[0] | 0x010000;
1183 break;
1184
1185 case 83:
1186 w5[0] = w5[0] | 0x01000000;
1187 break;
1188
1189 case 84:
1190 w5[1] = 0x01;
1191 break;
1192
1193 case 85:
1194 w5[1] = w5[1] | 0x0100;
1195 break;
1196
1197 case 86:
1198 w5[1] = w5[1] | 0x010000;
1199 break;
1200
1201 case 87:
1202 w5[1] = w5[1] | 0x01000000;
1203 break;
1204
1205 case 88:
1206 w5[2] = 0x01;
1207 break;
1208
1209 case 89:
1210 w5[2] = w5[2] | 0x0100;
1211 break;
1212
1213 case 90:
1214 w5[2] = w5[2] | 0x010000;
1215 break;
1216
1217 case 91:
1218 w5[2] = w5[2] | 0x01000000;
1219 break;
1220
1221 case 92:
1222 w5[3] = 0x01;
1223 break;
1224
1225 case 93:
1226 w5[3] = w5[3] | 0x0100;
1227 break;
1228
1229 case 94:
1230 w5[3] = w5[3] | 0x010000;
1231 break;
1232
1233 case 95:
1234 w5[3] = w5[3] | 0x01000000;
1235 break;
1236
1237 case 96:
1238 w6[0] = 0x01;
1239 break;
1240
1241 case 97:
1242 w6[0] = w6[0] | 0x0100;
1243 break;
1244
1245 case 98:
1246 w6[0] = w6[0] | 0x010000;
1247 break;
1248
1249 case 99:
1250 w6[0] = w6[0] | 0x01000000;
1251 break;
1252
1253 case 100:
1254 w6[1] = 0x01;
1255 break;
1256
1257 case 101:
1258 w6[1] = w6[1] | 0x0100;
1259 break;
1260
1261 case 102:
1262 w6[1] = w6[1] | 0x010000;
1263 break;
1264
1265 case 103:
1266 w6[1] = w6[1] | 0x01000000;
1267 break;
1268
1269 case 104:
1270 w6[2] = 0x01;
1271 break;
1272
1273 case 105:
1274 w6[2] = w6[2] | 0x0100;
1275 break;
1276
1277 case 106:
1278 w6[2] = w6[2] | 0x010000;
1279 break;
1280
1281 case 107:
1282 w6[2] = w6[2] | 0x01000000;
1283 break;
1284
1285 case 108:
1286 w6[3] = 0x01;
1287 break;
1288
1289 case 109:
1290 w6[3] = w6[3] | 0x0100;
1291 break;
1292
1293 case 110:
1294 w6[3] = w6[3] | 0x010000;
1295 break;
1296
1297 case 111:
1298 w6[3] = w6[3] | 0x01000000;
1299 break;
1300
1301 case 112:
1302 w7[0] = 0x01;
1303 break;
1304
1305 case 113:
1306 w7[0] = w7[0] | 0x0100;
1307 break;
1308
1309 case 114:
1310 w7[0] = w7[0] | 0x010000;
1311 break;
1312
1313 case 115:
1314 w7[0] = w7[0] | 0x01000000;
1315 break;
1316
1317 case 116:
1318 w7[1] = 0x01;
1319 break;
1320
1321 case 117:
1322 w7[1] = w7[1] | 0x0100;
1323 break;
1324
1325 case 118:
1326 w7[1] = w7[1] | 0x010000;
1327 break;
1328
1329 case 119:
1330 w7[1] = w7[1] | 0x01000000;
1331 break;
1332
1333 case 120:
1334 w7[2] = 0x01;
1335 break;
1336
1337 case 121:
1338 w7[2] = w7[2] | 0x0100;
1339 break;
1340
1341 case 122:
1342 w7[2] = w7[2] | 0x010000;
1343 break;
1344
1345 case 123:
1346 w7[2] = w7[2] | 0x01000000;
1347 break;
1348
1349 case 124:
1350 w7[3] = 0x01;
1351 break;
1352
1353 case 125:
1354 w7[3] = w7[3] | 0x0100;
1355 break;
1356
1357 case 126:
1358 w7[3] = w7[3] | 0x010000;
1359 break;
1360
1361 case 127:
1362 w7[3] = w7[3] | 0x01000000;
1363 break;
1364 }
1365 }
1366
1367 // before: append_0x02_1
1368 static void append_0x02_1x4 (u32 w0[4], const u32 offset)
1369 {
1370 switch (offset)
1371 {
1372 case 0:
1373 w0[0] = 0x02;
1374 break;
1375
1376 case 1:
1377 w0[0] = w0[0] | 0x0200;
1378 break;
1379
1380 case 2:
1381 w0[0] = w0[0] | 0x020000;
1382 break;
1383
1384 case 3:
1385 w0[0] = w0[0] | 0x02000000;
1386 break;
1387
1388 case 4:
1389 w0[1] = 0x02;
1390 break;
1391
1392 case 5:
1393 w0[1] = w0[1] | 0x0200;
1394 break;
1395
1396 case 6:
1397 w0[1] = w0[1] | 0x020000;
1398 break;
1399
1400 case 7:
1401 w0[1] = w0[1] | 0x02000000;
1402 break;
1403
1404 case 8:
1405 w0[2] = 0x02;
1406 break;
1407
1408 case 9:
1409 w0[2] = w0[2] | 0x0200;
1410 break;
1411
1412 case 10:
1413 w0[2] = w0[2] | 0x020000;
1414 break;
1415
1416 case 11:
1417 w0[2] = w0[2] | 0x02000000;
1418 break;
1419
1420 case 12:
1421 w0[3] = 0x02;
1422 break;
1423
1424 case 13:
1425 w0[3] = w0[3] | 0x0200;
1426 break;
1427
1428 case 14:
1429 w0[3] = w0[3] | 0x020000;
1430 break;
1431
1432 case 15:
1433 w0[3] = w0[3] | 0x02000000;
1434 break;
1435 }
1436 }
1437
1438 // before: append_0x02_2
1439 static void append_0x02_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
1440 {
1441 switch (offset)
1442 {
1443 case 0:
1444 w0[0] = 0x02;
1445 break;
1446
1447 case 1:
1448 w0[0] = w0[0] | 0x0200;
1449 break;
1450
1451 case 2:
1452 w0[0] = w0[0] | 0x020000;
1453 break;
1454
1455 case 3:
1456 w0[0] = w0[0] | 0x02000000;
1457 break;
1458
1459 case 4:
1460 w0[1] = 0x02;
1461 break;
1462
1463 case 5:
1464 w0[1] = w0[1] | 0x0200;
1465 break;
1466
1467 case 6:
1468 w0[1] = w0[1] | 0x020000;
1469 break;
1470
1471 case 7:
1472 w0[1] = w0[1] | 0x02000000;
1473 break;
1474
1475 case 8:
1476 w0[2] = 0x02;
1477 break;
1478
1479 case 9:
1480 w0[2] = w0[2] | 0x0200;
1481 break;
1482
1483 case 10:
1484 w0[2] = w0[2] | 0x020000;
1485 break;
1486
1487 case 11:
1488 w0[2] = w0[2] | 0x02000000;
1489 break;
1490
1491 case 12:
1492 w0[3] = 0x02;
1493 break;
1494
1495 case 13:
1496 w0[3] = w0[3] | 0x0200;
1497 break;
1498
1499 case 14:
1500 w0[3] = w0[3] | 0x020000;
1501 break;
1502
1503 case 15:
1504 w0[3] = w0[3] | 0x02000000;
1505 break;
1506
1507 case 16:
1508 w1[0] = 0x02;
1509 break;
1510
1511 case 17:
1512 w1[0] = w1[0] | 0x0200;
1513 break;
1514
1515 case 18:
1516 w1[0] = w1[0] | 0x020000;
1517 break;
1518
1519 case 19:
1520 w1[0] = w1[0] | 0x02000000;
1521 break;
1522
1523 case 20:
1524 w1[1] = 0x02;
1525 break;
1526
1527 case 21:
1528 w1[1] = w1[1] | 0x0200;
1529 break;
1530
1531 case 22:
1532 w1[1] = w1[1] | 0x020000;
1533 break;
1534
1535 case 23:
1536 w1[1] = w1[1] | 0x02000000;
1537 break;
1538
1539 case 24:
1540 w1[2] = 0x02;
1541 break;
1542
1543 case 25:
1544 w1[2] = w1[2] | 0x0200;
1545 break;
1546
1547 case 26:
1548 w1[2] = w1[2] | 0x020000;
1549 break;
1550
1551 case 27:
1552 w1[2] = w1[2] | 0x02000000;
1553 break;
1554
1555 case 28:
1556 w1[3] = 0x02;
1557 break;
1558
1559 case 29:
1560 w1[3] = w1[3] | 0x0200;
1561 break;
1562
1563 case 30:
1564 w1[3] = w1[3] | 0x020000;
1565 break;
1566
1567 case 31:
1568 w1[3] = w1[3] | 0x02000000;
1569 break;
1570 }
1571 }
1572
1573 // before: append_0x02_3
1574 static void append_0x02_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
1575 {
1576 switch (offset)
1577 {
1578 case 0:
1579 w0[0] = 0x02;
1580 break;
1581
1582 case 1:
1583 w0[0] = w0[0] | 0x0200;
1584 break;
1585
1586 case 2:
1587 w0[0] = w0[0] | 0x020000;
1588 break;
1589
1590 case 3:
1591 w0[0] = w0[0] | 0x02000000;
1592 break;
1593
1594 case 4:
1595 w0[1] = 0x02;
1596 break;
1597
1598 case 5:
1599 w0[1] = w0[1] | 0x0200;
1600 break;
1601
1602 case 6:
1603 w0[1] = w0[1] | 0x020000;
1604 break;
1605
1606 case 7:
1607 w0[1] = w0[1] | 0x02000000;
1608 break;
1609
1610 case 8:
1611 w0[2] = 0x02;
1612 break;
1613
1614 case 9:
1615 w0[2] = w0[2] | 0x0200;
1616 break;
1617
1618 case 10:
1619 w0[2] = w0[2] | 0x020000;
1620 break;
1621
1622 case 11:
1623 w0[2] = w0[2] | 0x02000000;
1624 break;
1625
1626 case 12:
1627 w0[3] = 0x02;
1628 break;
1629
1630 case 13:
1631 w0[3] = w0[3] | 0x0200;
1632 break;
1633
1634 case 14:
1635 w0[3] = w0[3] | 0x020000;
1636 break;
1637
1638 case 15:
1639 w0[3] = w0[3] | 0x02000000;
1640 break;
1641
1642 case 16:
1643 w1[0] = 0x02;
1644 break;
1645
1646 case 17:
1647 w1[0] = w1[0] | 0x0200;
1648 break;
1649
1650 case 18:
1651 w1[0] = w1[0] | 0x020000;
1652 break;
1653
1654 case 19:
1655 w1[0] = w1[0] | 0x02000000;
1656 break;
1657
1658 case 20:
1659 w1[1] = 0x02;
1660 break;
1661
1662 case 21:
1663 w1[1] = w1[1] | 0x0200;
1664 break;
1665
1666 case 22:
1667 w1[1] = w1[1] | 0x020000;
1668 break;
1669
1670 case 23:
1671 w1[1] = w1[1] | 0x02000000;
1672 break;
1673
1674 case 24:
1675 w1[2] = 0x02;
1676 break;
1677
1678 case 25:
1679 w1[2] = w1[2] | 0x0200;
1680 break;
1681
1682 case 26:
1683 w1[2] = w1[2] | 0x020000;
1684 break;
1685
1686 case 27:
1687 w1[2] = w1[2] | 0x02000000;
1688 break;
1689
1690 case 28:
1691 w1[3] = 0x02;
1692 break;
1693
1694 case 29:
1695 w1[3] = w1[3] | 0x0200;
1696 break;
1697
1698 case 30:
1699 w1[3] = w1[3] | 0x020000;
1700 break;
1701
1702 case 31:
1703 w1[3] = w1[3] | 0x02000000;
1704 break;
1705
1706 case 32:
1707 w2[0] = 0x02;
1708 break;
1709
1710 case 33:
1711 w2[0] = w2[0] | 0x0200;
1712 break;
1713
1714 case 34:
1715 w2[0] = w2[0] | 0x020000;
1716 break;
1717
1718 case 35:
1719 w2[0] = w2[0] | 0x02000000;
1720 break;
1721
1722 case 36:
1723 w2[1] = 0x02;
1724 break;
1725
1726 case 37:
1727 w2[1] = w2[1] | 0x0200;
1728 break;
1729
1730 case 38:
1731 w2[1] = w2[1] | 0x020000;
1732 break;
1733
1734 case 39:
1735 w2[1] = w2[1] | 0x02000000;
1736 break;
1737
1738 case 40:
1739 w2[2] = 0x02;
1740 break;
1741
1742 case 41:
1743 w2[2] = w2[2] | 0x0200;
1744 break;
1745
1746 case 42:
1747 w2[2] = w2[2] | 0x020000;
1748 break;
1749
1750 case 43:
1751 w2[2] = w2[2] | 0x02000000;
1752 break;
1753
1754 case 44:
1755 w2[3] = 0x02;
1756 break;
1757
1758 case 45:
1759 w2[3] = w2[3] | 0x0200;
1760 break;
1761
1762 case 46:
1763 w2[3] = w2[3] | 0x020000;
1764 break;
1765
1766 case 47:
1767 w2[3] = w2[3] | 0x02000000;
1768 break;
1769 }
1770 }
1771
1772 // before: append_0x02_4
1773 static void append_0x02_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
1774 {
1775 switch (offset)
1776 {
1777 case 0:
1778 w0[0] = 0x02;
1779 break;
1780
1781 case 1:
1782 w0[0] = w0[0] | 0x0200;
1783 break;
1784
1785 case 2:
1786 w0[0] = w0[0] | 0x020000;
1787 break;
1788
1789 case 3:
1790 w0[0] = w0[0] | 0x02000000;
1791 break;
1792
1793 case 4:
1794 w0[1] = 0x02;
1795 break;
1796
1797 case 5:
1798 w0[1] = w0[1] | 0x0200;
1799 break;
1800
1801 case 6:
1802 w0[1] = w0[1] | 0x020000;
1803 break;
1804
1805 case 7:
1806 w0[1] = w0[1] | 0x02000000;
1807 break;
1808
1809 case 8:
1810 w0[2] = 0x02;
1811 break;
1812
1813 case 9:
1814 w0[2] = w0[2] | 0x0200;
1815 break;
1816
1817 case 10:
1818 w0[2] = w0[2] | 0x020000;
1819 break;
1820
1821 case 11:
1822 w0[2] = w0[2] | 0x02000000;
1823 break;
1824
1825 case 12:
1826 w0[3] = 0x02;
1827 break;
1828
1829 case 13:
1830 w0[3] = w0[3] | 0x0200;
1831 break;
1832
1833 case 14:
1834 w0[3] = w0[3] | 0x020000;
1835 break;
1836
1837 case 15:
1838 w0[3] = w0[3] | 0x02000000;
1839 break;
1840
1841 case 16:
1842 w1[0] = 0x02;
1843 break;
1844
1845 case 17:
1846 w1[0] = w1[0] | 0x0200;
1847 break;
1848
1849 case 18:
1850 w1[0] = w1[0] | 0x020000;
1851 break;
1852
1853 case 19:
1854 w1[0] = w1[0] | 0x02000000;
1855 break;
1856
1857 case 20:
1858 w1[1] = 0x02;
1859 break;
1860
1861 case 21:
1862 w1[1] = w1[1] | 0x0200;
1863 break;
1864
1865 case 22:
1866 w1[1] = w1[1] | 0x020000;
1867 break;
1868
1869 case 23:
1870 w1[1] = w1[1] | 0x02000000;
1871 break;
1872
1873 case 24:
1874 w1[2] = 0x02;
1875 break;
1876
1877 case 25:
1878 w1[2] = w1[2] | 0x0200;
1879 break;
1880
1881 case 26:
1882 w1[2] = w1[2] | 0x020000;
1883 break;
1884
1885 case 27:
1886 w1[2] = w1[2] | 0x02000000;
1887 break;
1888
1889 case 28:
1890 w1[3] = 0x02;
1891 break;
1892
1893 case 29:
1894 w1[3] = w1[3] | 0x0200;
1895 break;
1896
1897 case 30:
1898 w1[3] = w1[3] | 0x020000;
1899 break;
1900
1901 case 31:
1902 w1[3] = w1[3] | 0x02000000;
1903 break;
1904
1905 case 32:
1906 w2[0] = 0x02;
1907 break;
1908
1909 case 33:
1910 w2[0] = w2[0] | 0x0200;
1911 break;
1912
1913 case 34:
1914 w2[0] = w2[0] | 0x020000;
1915 break;
1916
1917 case 35:
1918 w2[0] = w2[0] | 0x02000000;
1919 break;
1920
1921 case 36:
1922 w2[1] = 0x02;
1923 break;
1924
1925 case 37:
1926 w2[1] = w2[1] | 0x0200;
1927 break;
1928
1929 case 38:
1930 w2[1] = w2[1] | 0x020000;
1931 break;
1932
1933 case 39:
1934 w2[1] = w2[1] | 0x02000000;
1935 break;
1936
1937 case 40:
1938 w2[2] = 0x02;
1939 break;
1940
1941 case 41:
1942 w2[2] = w2[2] | 0x0200;
1943 break;
1944
1945 case 42:
1946 w2[2] = w2[2] | 0x020000;
1947 break;
1948
1949 case 43:
1950 w2[2] = w2[2] | 0x02000000;
1951 break;
1952
1953 case 44:
1954 w2[3] = 0x02;
1955 break;
1956
1957 case 45:
1958 w2[3] = w2[3] | 0x0200;
1959 break;
1960
1961 case 46:
1962 w2[3] = w2[3] | 0x020000;
1963 break;
1964
1965 case 47:
1966 w2[3] = w2[3] | 0x02000000;
1967 break;
1968
1969 case 48:
1970 w3[0] = 0x02;
1971 break;
1972
1973 case 49:
1974 w3[0] = w3[0] | 0x0200;
1975 break;
1976
1977 case 50:
1978 w3[0] = w3[0] | 0x020000;
1979 break;
1980
1981 case 51:
1982 w3[0] = w3[0] | 0x02000000;
1983 break;
1984
1985 case 52:
1986 w3[1] = 0x02;
1987 break;
1988
1989 case 53:
1990 w3[1] = w3[1] | 0x0200;
1991 break;
1992
1993 case 54:
1994 w3[1] = w3[1] | 0x020000;
1995 break;
1996
1997 case 55:
1998 w3[1] = w3[1] | 0x02000000;
1999 break;
2000
2001 case 56:
2002 w3[2] = 0x02;
2003 break;
2004
2005 case 57:
2006 w3[2] = w3[2] | 0x0200;
2007 break;
2008
2009 case 58:
2010 w3[2] = w3[2] | 0x020000;
2011 break;
2012
2013 case 59:
2014 w3[2] = w3[2] | 0x02000000;
2015 break;
2016
2017 case 60:
2018 w3[3] = 0x02;
2019 break;
2020
2021 case 61:
2022 w3[3] = w3[3] | 0x0200;
2023 break;
2024
2025 case 62:
2026 w3[3] = w3[3] | 0x020000;
2027 break;
2028
2029 case 63:
2030 w3[3] = w3[3] | 0x02000000;
2031 break;
2032 }
2033 }
2034
2035 // before: append_0x02_8
2036 static void append_0x02_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
2037 {
2038 switch (offset)
2039 {
2040 case 0:
2041 w0[0] = 0x02;
2042 break;
2043
2044 case 1:
2045 w0[0] = w0[0] | 0x0200;
2046 break;
2047
2048 case 2:
2049 w0[0] = w0[0] | 0x020000;
2050 break;
2051
2052 case 3:
2053 w0[0] = w0[0] | 0x02000000;
2054 break;
2055
2056 case 4:
2057 w0[1] = 0x02;
2058 break;
2059
2060 case 5:
2061 w0[1] = w0[1] | 0x0200;
2062 break;
2063
2064 case 6:
2065 w0[1] = w0[1] | 0x020000;
2066 break;
2067
2068 case 7:
2069 w0[1] = w0[1] | 0x02000000;
2070 break;
2071
2072 case 8:
2073 w0[2] = 0x02;
2074 break;
2075
2076 case 9:
2077 w0[2] = w0[2] | 0x0200;
2078 break;
2079
2080 case 10:
2081 w0[2] = w0[2] | 0x020000;
2082 break;
2083
2084 case 11:
2085 w0[2] = w0[2] | 0x02000000;
2086 break;
2087
2088 case 12:
2089 w0[3] = 0x02;
2090 break;
2091
2092 case 13:
2093 w0[3] = w0[3] | 0x0200;
2094 break;
2095
2096 case 14:
2097 w0[3] = w0[3] | 0x020000;
2098 break;
2099
2100 case 15:
2101 w0[3] = w0[3] | 0x02000000;
2102 break;
2103
2104 case 16:
2105 w1[0] = 0x02;
2106 break;
2107
2108 case 17:
2109 w1[0] = w1[0] | 0x0200;
2110 break;
2111
2112 case 18:
2113 w1[0] = w1[0] | 0x020000;
2114 break;
2115
2116 case 19:
2117 w1[0] = w1[0] | 0x02000000;
2118 break;
2119
2120 case 20:
2121 w1[1] = 0x02;
2122 break;
2123
2124 case 21:
2125 w1[1] = w1[1] | 0x0200;
2126 break;
2127
2128 case 22:
2129 w1[1] = w1[1] | 0x020000;
2130 break;
2131
2132 case 23:
2133 w1[1] = w1[1] | 0x02000000;
2134 break;
2135
2136 case 24:
2137 w1[2] = 0x02;
2138 break;
2139
2140 case 25:
2141 w1[2] = w1[2] | 0x0200;
2142 break;
2143
2144 case 26:
2145 w1[2] = w1[2] | 0x020000;
2146 break;
2147
2148 case 27:
2149 w1[2] = w1[2] | 0x02000000;
2150 break;
2151
2152 case 28:
2153 w1[3] = 0x02;
2154 break;
2155
2156 case 29:
2157 w1[3] = w1[3] | 0x0200;
2158 break;
2159
2160 case 30:
2161 w1[3] = w1[3] | 0x020000;
2162 break;
2163
2164 case 31:
2165 w1[3] = w1[3] | 0x02000000;
2166 break;
2167
2168 case 32:
2169 w2[0] = 0x02;
2170 break;
2171
2172 case 33:
2173 w2[0] = w2[0] | 0x0200;
2174 break;
2175
2176 case 34:
2177 w2[0] = w2[0] | 0x020000;
2178 break;
2179
2180 case 35:
2181 w2[0] = w2[0] | 0x02000000;
2182 break;
2183
2184 case 36:
2185 w2[1] = 0x02;
2186 break;
2187
2188 case 37:
2189 w2[1] = w2[1] | 0x0200;
2190 break;
2191
2192 case 38:
2193 w2[1] = w2[1] | 0x020000;
2194 break;
2195
2196 case 39:
2197 w2[1] = w2[1] | 0x02000000;
2198 break;
2199
2200 case 40:
2201 w2[2] = 0x02;
2202 break;
2203
2204 case 41:
2205 w2[2] = w2[2] | 0x0200;
2206 break;
2207
2208 case 42:
2209 w2[2] = w2[2] | 0x020000;
2210 break;
2211
2212 case 43:
2213 w2[2] = w2[2] | 0x02000000;
2214 break;
2215
2216 case 44:
2217 w2[3] = 0x02;
2218 break;
2219
2220 case 45:
2221 w2[3] = w2[3] | 0x0200;
2222 break;
2223
2224 case 46:
2225 w2[3] = w2[3] | 0x020000;
2226 break;
2227
2228 case 47:
2229 w2[3] = w2[3] | 0x02000000;
2230 break;
2231
2232 case 48:
2233 w3[0] = 0x02;
2234 break;
2235
2236 case 49:
2237 w3[0] = w3[0] | 0x0200;
2238 break;
2239
2240 case 50:
2241 w3[0] = w3[0] | 0x020000;
2242 break;
2243
2244 case 51:
2245 w3[0] = w3[0] | 0x02000000;
2246 break;
2247
2248 case 52:
2249 w3[1] = 0x02;
2250 break;
2251
2252 case 53:
2253 w3[1] = w3[1] | 0x0200;
2254 break;
2255
2256 case 54:
2257 w3[1] = w3[1] | 0x020000;
2258 break;
2259
2260 case 55:
2261 w3[1] = w3[1] | 0x02000000;
2262 break;
2263
2264 case 56:
2265 w3[2] = 0x02;
2266 break;
2267
2268 case 57:
2269 w3[2] = w3[2] | 0x0200;
2270 break;
2271
2272 case 58:
2273 w3[2] = w3[2] | 0x020000;
2274 break;
2275
2276 case 59:
2277 w3[2] = w3[2] | 0x02000000;
2278 break;
2279
2280 case 60:
2281 w3[3] = 0x02;
2282 break;
2283
2284 case 61:
2285 w3[3] = w3[3] | 0x0200;
2286 break;
2287
2288 case 62:
2289 w3[3] = w3[3] | 0x020000;
2290 break;
2291
2292 case 63:
2293 w3[3] = w3[3] | 0x02000000;
2294 break;
2295
2296 case 64:
2297 w4[0] = 0x02;
2298 break;
2299
2300 case 65:
2301 w4[0] = w4[0] | 0x0200;
2302 break;
2303
2304 case 66:
2305 w4[0] = w4[0] | 0x020000;
2306 break;
2307
2308 case 67:
2309 w4[0] = w4[0] | 0x02000000;
2310 break;
2311
2312 case 68:
2313 w4[1] = 0x02;
2314 break;
2315
2316 case 69:
2317 w4[1] = w4[1] | 0x0200;
2318 break;
2319
2320 case 70:
2321 w4[1] = w4[1] | 0x020000;
2322 break;
2323
2324 case 71:
2325 w4[1] = w4[1] | 0x02000000;
2326 break;
2327
2328 case 72:
2329 w4[2] = 0x02;
2330 break;
2331
2332 case 73:
2333 w4[2] = w4[2] | 0x0200;
2334 break;
2335
2336 case 74:
2337 w4[2] = w4[2] | 0x020000;
2338 break;
2339
2340 case 75:
2341 w4[2] = w4[2] | 0x02000000;
2342 break;
2343
2344 case 76:
2345 w4[3] = 0x02;
2346 break;
2347
2348 case 77:
2349 w4[3] = w4[3] | 0x0200;
2350 break;
2351
2352 case 78:
2353 w4[3] = w4[3] | 0x020000;
2354 break;
2355
2356 case 79:
2357 w4[3] = w4[3] | 0x02000000;
2358 break;
2359
2360 case 80:
2361 w5[0] = 0x02;
2362 break;
2363
2364 case 81:
2365 w5[0] = w5[0] | 0x0200;
2366 break;
2367
2368 case 82:
2369 w5[0] = w5[0] | 0x020000;
2370 break;
2371
2372 case 83:
2373 w5[0] = w5[0] | 0x02000000;
2374 break;
2375
2376 case 84:
2377 w5[1] = 0x02;
2378 break;
2379
2380 case 85:
2381 w5[1] = w5[1] | 0x0200;
2382 break;
2383
2384 case 86:
2385 w5[1] = w5[1] | 0x020000;
2386 break;
2387
2388 case 87:
2389 w5[1] = w5[1] | 0x02000000;
2390 break;
2391
2392 case 88:
2393 w5[2] = 0x02;
2394 break;
2395
2396 case 89:
2397 w5[2] = w5[2] | 0x0200;
2398 break;
2399
2400 case 90:
2401 w5[2] = w5[2] | 0x020000;
2402 break;
2403
2404 case 91:
2405 w5[2] = w5[2] | 0x02000000;
2406 break;
2407
2408 case 92:
2409 w5[3] = 0x02;
2410 break;
2411
2412 case 93:
2413 w5[3] = w5[3] | 0x0200;
2414 break;
2415
2416 case 94:
2417 w5[3] = w5[3] | 0x020000;
2418 break;
2419
2420 case 95:
2421 w5[3] = w5[3] | 0x02000000;
2422 break;
2423
2424 case 96:
2425 w6[0] = 0x02;
2426 break;
2427
2428 case 97:
2429 w6[0] = w6[0] | 0x0200;
2430 break;
2431
2432 case 98:
2433 w6[0] = w6[0] | 0x020000;
2434 break;
2435
2436 case 99:
2437 w6[0] = w6[0] | 0x02000000;
2438 break;
2439
2440 case 100:
2441 w6[1] = 0x02;
2442 break;
2443
2444 case 101:
2445 w6[1] = w6[1] | 0x0200;
2446 break;
2447
2448 case 102:
2449 w6[1] = w6[1] | 0x020000;
2450 break;
2451
2452 case 103:
2453 w6[1] = w6[1] | 0x02000000;
2454 break;
2455
2456 case 104:
2457 w6[2] = 0x02;
2458 break;
2459
2460 case 105:
2461 w6[2] = w6[2] | 0x0200;
2462 break;
2463
2464 case 106:
2465 w6[2] = w6[2] | 0x020000;
2466 break;
2467
2468 case 107:
2469 w6[2] = w6[2] | 0x02000000;
2470 break;
2471
2472 case 108:
2473 w6[3] = 0x02;
2474 break;
2475
2476 case 109:
2477 w6[3] = w6[3] | 0x0200;
2478 break;
2479
2480 case 110:
2481 w6[3] = w6[3] | 0x020000;
2482 break;
2483
2484 case 111:
2485 w6[3] = w6[3] | 0x02000000;
2486 break;
2487
2488 case 112:
2489 w7[0] = 0x02;
2490 break;
2491
2492 case 113:
2493 w7[0] = w7[0] | 0x0200;
2494 break;
2495
2496 case 114:
2497 w7[0] = w7[0] | 0x020000;
2498 break;
2499
2500 case 115:
2501 w7[0] = w7[0] | 0x02000000;
2502 break;
2503
2504 case 116:
2505 w7[1] = 0x02;
2506 break;
2507
2508 case 117:
2509 w7[1] = w7[1] | 0x0200;
2510 break;
2511
2512 case 118:
2513 w7[1] = w7[1] | 0x020000;
2514 break;
2515
2516 case 119:
2517 w7[1] = w7[1] | 0x02000000;
2518 break;
2519
2520 case 120:
2521 w7[2] = 0x02;
2522 break;
2523
2524 case 121:
2525 w7[2] = w7[2] | 0x0200;
2526 break;
2527
2528 case 122:
2529 w7[2] = w7[2] | 0x020000;
2530 break;
2531
2532 case 123:
2533 w7[2] = w7[2] | 0x02000000;
2534 break;
2535
2536 case 124:
2537 w7[3] = 0x02;
2538 break;
2539
2540 case 125:
2541 w7[3] = w7[3] | 0x0200;
2542 break;
2543
2544 case 126:
2545 w7[3] = w7[3] | 0x020000;
2546 break;
2547
2548 case 127:
2549 w7[3] = w7[3] | 0x02000000;
2550 break;
2551 }
2552 }
2553
2554 // before: append_0x80_1
2555 static void append_0x80_1x4 (u32 w0[4], const u32 offset)
2556 {
2557 switch (offset)
2558 {
2559 case 0:
2560 w0[0] = 0x80;
2561 break;
2562
2563 case 1:
2564 w0[0] = w0[0] | 0x8000;
2565 break;
2566
2567 case 2:
2568 w0[0] = w0[0] | 0x800000;
2569 break;
2570
2571 case 3:
2572 w0[0] = w0[0] | 0x80000000;
2573 break;
2574
2575 case 4:
2576 w0[1] = 0x80;
2577 break;
2578
2579 case 5:
2580 w0[1] = w0[1] | 0x8000;
2581 break;
2582
2583 case 6:
2584 w0[1] = w0[1] | 0x800000;
2585 break;
2586
2587 case 7:
2588 w0[1] = w0[1] | 0x80000000;
2589 break;
2590
2591 case 8:
2592 w0[2] = 0x80;
2593 break;
2594
2595 case 9:
2596 w0[2] = w0[2] | 0x8000;
2597 break;
2598
2599 case 10:
2600 w0[2] = w0[2] | 0x800000;
2601 break;
2602
2603 case 11:
2604 w0[2] = w0[2] | 0x80000000;
2605 break;
2606
2607 case 12:
2608 w0[3] = 0x80;
2609 break;
2610
2611 case 13:
2612 w0[3] = w0[3] | 0x8000;
2613 break;
2614
2615 case 14:
2616 w0[3] = w0[3] | 0x800000;
2617 break;
2618
2619 case 15:
2620 w0[3] = w0[3] | 0x80000000;
2621 break;
2622 }
2623 }
2624
2625 // before: append_0x80_2
2626 static void append_0x80_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
2627 {
2628 switch (offset)
2629 {
2630 case 0:
2631 w0[0] = 0x80;
2632 break;
2633
2634 case 1:
2635 w0[0] = w0[0] | 0x8000;
2636 break;
2637
2638 case 2:
2639 w0[0] = w0[0] | 0x800000;
2640 break;
2641
2642 case 3:
2643 w0[0] = w0[0] | 0x80000000;
2644 break;
2645
2646 case 4:
2647 w0[1] = 0x80;
2648 break;
2649
2650 case 5:
2651 w0[1] = w0[1] | 0x8000;
2652 break;
2653
2654 case 6:
2655 w0[1] = w0[1] | 0x800000;
2656 break;
2657
2658 case 7:
2659 w0[1] = w0[1] | 0x80000000;
2660 break;
2661
2662 case 8:
2663 w0[2] = 0x80;
2664 break;
2665
2666 case 9:
2667 w0[2] = w0[2] | 0x8000;
2668 break;
2669
2670 case 10:
2671 w0[2] = w0[2] | 0x800000;
2672 break;
2673
2674 case 11:
2675 w0[2] = w0[2] | 0x80000000;
2676 break;
2677
2678 case 12:
2679 w0[3] = 0x80;
2680 break;
2681
2682 case 13:
2683 w0[3] = w0[3] | 0x8000;
2684 break;
2685
2686 case 14:
2687 w0[3] = w0[3] | 0x800000;
2688 break;
2689
2690 case 15:
2691 w0[3] = w0[3] | 0x80000000;
2692 break;
2693
2694 case 16:
2695 w1[0] = 0x80;
2696 break;
2697
2698 case 17:
2699 w1[0] = w1[0] | 0x8000;
2700 break;
2701
2702 case 18:
2703 w1[0] = w1[0] | 0x800000;
2704 break;
2705
2706 case 19:
2707 w1[0] = w1[0] | 0x80000000;
2708 break;
2709
2710 case 20:
2711 w1[1] = 0x80;
2712 break;
2713
2714 case 21:
2715 w1[1] = w1[1] | 0x8000;
2716 break;
2717
2718 case 22:
2719 w1[1] = w1[1] | 0x800000;
2720 break;
2721
2722 case 23:
2723 w1[1] = w1[1] | 0x80000000;
2724 break;
2725
2726 case 24:
2727 w1[2] = 0x80;
2728 break;
2729
2730 case 25:
2731 w1[2] = w1[2] | 0x8000;
2732 break;
2733
2734 case 26:
2735 w1[2] = w1[2] | 0x800000;
2736 break;
2737
2738 case 27:
2739 w1[2] = w1[2] | 0x80000000;
2740 break;
2741
2742 case 28:
2743 w1[3] = 0x80;
2744 break;
2745
2746 case 29:
2747 w1[3] = w1[3] | 0x8000;
2748 break;
2749
2750 case 30:
2751 w1[3] = w1[3] | 0x800000;
2752 break;
2753
2754 case 31:
2755 w1[3] = w1[3] | 0x80000000;
2756 break;
2757 }
2758 }
2759
2760 // before: append_0x80_2_be
2761 static void append_0x80_2x4_be (u32 w0[4], u32 w1[4], const u32 offset)
2762 {
2763 switch (offset)
2764 {
2765 case 0:
2766 w0[0] |= 0x80000000;
2767 break;
2768
2769 case 1:
2770 w0[0] |= 0x800000;
2771 break;
2772
2773 case 2:
2774 w0[0] |= 0x8000;
2775 break;
2776
2777 case 3:
2778 w0[0] |= 0x80;
2779 break;
2780
2781 case 4:
2782 w0[1] |= 0x80000000;
2783 break;
2784
2785 case 5:
2786 w0[1] |= 0x800000;
2787 break;
2788
2789 case 6:
2790 w0[1] |= 0x8000;
2791 break;
2792
2793 case 7:
2794 w0[1] |= 0x80;
2795 break;
2796
2797 case 8:
2798 w0[2] |= 0x80000000;
2799 break;
2800
2801 case 9:
2802 w0[2] |= 0x800000;
2803 break;
2804
2805 case 10:
2806 w0[2] |= 0x8000;
2807 break;
2808
2809 case 11:
2810 w0[2] |= 0x80;
2811 break;
2812
2813 case 12:
2814 w0[3] |= 0x80000000;
2815 break;
2816
2817 case 13:
2818 w0[3] |= 0x800000;
2819 break;
2820
2821 case 14:
2822 w0[3] |= 0x8000;
2823 break;
2824
2825 case 15:
2826 w0[3] |= 0x80;
2827 break;
2828
2829 case 16:
2830 w1[0] |= 0x80000000;
2831 break;
2832
2833 case 17:
2834 w1[0] |= 0x800000;
2835 break;
2836
2837 case 18:
2838 w1[0] |= 0x8000;
2839 break;
2840
2841 case 19:
2842 w1[0] |= 0x80;
2843 break;
2844
2845 case 20:
2846 w1[1] |= 0x80000000;
2847 break;
2848
2849 case 21:
2850 w1[1] |= 0x800000;
2851 break;
2852
2853 case 22:
2854 w1[1] |= 0x8000;
2855 break;
2856
2857 case 23:
2858 w1[1] |= 0x80;
2859 break;
2860
2861 case 24:
2862 w1[2] |= 0x80000000;
2863 break;
2864
2865 case 25:
2866 w1[2] |= 0x800000;
2867 break;
2868
2869 case 26:
2870 w1[2] |= 0x8000;
2871 break;
2872
2873 case 27:
2874 w1[2] |= 0x80;
2875 break;
2876
2877 case 28:
2878 w1[3] |= 0x80000000;
2879 break;
2880
2881 case 29:
2882 w1[3] |= 0x800000;
2883 break;
2884
2885 case 30:
2886 w1[3] |= 0x8000;
2887 break;
2888
2889 case 31:
2890 w1[3] |= 0x80;
2891 break;
2892 }
2893 }
2894
2895 // before: append_0x80_3
2896 static void append_0x80_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
2897 {
2898 switch (offset)
2899 {
2900 case 0:
2901 w0[0] = 0x80;
2902 break;
2903
2904 case 1:
2905 w0[0] = w0[0] | 0x8000;
2906 break;
2907
2908 case 2:
2909 w0[0] = w0[0] | 0x800000;
2910 break;
2911
2912 case 3:
2913 w0[0] = w0[0] | 0x80000000;
2914 break;
2915
2916 case 4:
2917 w0[1] = 0x80;
2918 break;
2919
2920 case 5:
2921 w0[1] = w0[1] | 0x8000;
2922 break;
2923
2924 case 6:
2925 w0[1] = w0[1] | 0x800000;
2926 break;
2927
2928 case 7:
2929 w0[1] = w0[1] | 0x80000000;
2930 break;
2931
2932 case 8:
2933 w0[2] = 0x80;
2934 break;
2935
2936 case 9:
2937 w0[2] = w0[2] | 0x8000;
2938 break;
2939
2940 case 10:
2941 w0[2] = w0[2] | 0x800000;
2942 break;
2943
2944 case 11:
2945 w0[2] = w0[2] | 0x80000000;
2946 break;
2947
2948 case 12:
2949 w0[3] = 0x80;
2950 break;
2951
2952 case 13:
2953 w0[3] = w0[3] | 0x8000;
2954 break;
2955
2956 case 14:
2957 w0[3] = w0[3] | 0x800000;
2958 break;
2959
2960 case 15:
2961 w0[3] = w0[3] | 0x80000000;
2962 break;
2963
2964 case 16:
2965 w1[0] = 0x80;
2966 break;
2967
2968 case 17:
2969 w1[0] = w1[0] | 0x8000;
2970 break;
2971
2972 case 18:
2973 w1[0] = w1[0] | 0x800000;
2974 break;
2975
2976 case 19:
2977 w1[0] = w1[0] | 0x80000000;
2978 break;
2979
2980 case 20:
2981 w1[1] = 0x80;
2982 break;
2983
2984 case 21:
2985 w1[1] = w1[1] | 0x8000;
2986 break;
2987
2988 case 22:
2989 w1[1] = w1[1] | 0x800000;
2990 break;
2991
2992 case 23:
2993 w1[1] = w1[1] | 0x80000000;
2994 break;
2995
2996 case 24:
2997 w1[2] = 0x80;
2998 break;
2999
3000 case 25:
3001 w1[2] = w1[2] | 0x8000;
3002 break;
3003
3004 case 26:
3005 w1[2] = w1[2] | 0x800000;
3006 break;
3007
3008 case 27:
3009 w1[2] = w1[2] | 0x80000000;
3010 break;
3011
3012 case 28:
3013 w1[3] = 0x80;
3014 break;
3015
3016 case 29:
3017 w1[3] = w1[3] | 0x8000;
3018 break;
3019
3020 case 30:
3021 w1[3] = w1[3] | 0x800000;
3022 break;
3023
3024 case 31:
3025 w1[3] = w1[3] | 0x80000000;
3026 break;
3027
3028 case 32:
3029 w2[0] = 0x80;
3030 break;
3031
3032 case 33:
3033 w2[0] = w2[0] | 0x8000;
3034 break;
3035
3036 case 34:
3037 w2[0] = w2[0] | 0x800000;
3038 break;
3039
3040 case 35:
3041 w2[0] = w2[0] | 0x80000000;
3042 break;
3043
3044 case 36:
3045 w2[1] = 0x80;
3046 break;
3047
3048 case 37:
3049 w2[1] = w2[1] | 0x8000;
3050 break;
3051
3052 case 38:
3053 w2[1] = w2[1] | 0x800000;
3054 break;
3055
3056 case 39:
3057 w2[1] = w2[1] | 0x80000000;
3058 break;
3059
3060 case 40:
3061 w2[2] = 0x80;
3062 break;
3063
3064 case 41:
3065 w2[2] = w2[2] | 0x8000;
3066 break;
3067
3068 case 42:
3069 w2[2] = w2[2] | 0x800000;
3070 break;
3071
3072 case 43:
3073 w2[2] = w2[2] | 0x80000000;
3074 break;
3075
3076 case 44:
3077 w2[3] = 0x80;
3078 break;
3079
3080 case 45:
3081 w2[3] = w2[3] | 0x8000;
3082 break;
3083
3084 case 46:
3085 w2[3] = w2[3] | 0x800000;
3086 break;
3087
3088 case 47:
3089 w2[3] = w2[3] | 0x80000000;
3090 break;
3091 }
3092 }
3093
3094 // before: append_0x80_4
3095 static void append_0x80_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
3096 {
3097 switch (offset)
3098 {
3099 case 0:
3100 w0[0] = 0x80;
3101 break;
3102
3103 case 1:
3104 w0[0] = w0[0] | 0x8000;
3105 break;
3106
3107 case 2:
3108 w0[0] = w0[0] | 0x800000;
3109 break;
3110
3111 case 3:
3112 w0[0] = w0[0] | 0x80000000;
3113 break;
3114
3115 case 4:
3116 w0[1] = 0x80;
3117 break;
3118
3119 case 5:
3120 w0[1] = w0[1] | 0x8000;
3121 break;
3122
3123 case 6:
3124 w0[1] = w0[1] | 0x800000;
3125 break;
3126
3127 case 7:
3128 w0[1] = w0[1] | 0x80000000;
3129 break;
3130
3131 case 8:
3132 w0[2] = 0x80;
3133 break;
3134
3135 case 9:
3136 w0[2] = w0[2] | 0x8000;
3137 break;
3138
3139 case 10:
3140 w0[2] = w0[2] | 0x800000;
3141 break;
3142
3143 case 11:
3144 w0[2] = w0[2] | 0x80000000;
3145 break;
3146
3147 case 12:
3148 w0[3] = 0x80;
3149 break;
3150
3151 case 13:
3152 w0[3] = w0[3] | 0x8000;
3153 break;
3154
3155 case 14:
3156 w0[3] = w0[3] | 0x800000;
3157 break;
3158
3159 case 15:
3160 w0[3] = w0[3] | 0x80000000;
3161 break;
3162
3163 case 16:
3164 w1[0] = 0x80;
3165 break;
3166
3167 case 17:
3168 w1[0] = w1[0] | 0x8000;
3169 break;
3170
3171 case 18:
3172 w1[0] = w1[0] | 0x800000;
3173 break;
3174
3175 case 19:
3176 w1[0] = w1[0] | 0x80000000;
3177 break;
3178
3179 case 20:
3180 w1[1] = 0x80;
3181 break;
3182
3183 case 21:
3184 w1[1] = w1[1] | 0x8000;
3185 break;
3186
3187 case 22:
3188 w1[1] = w1[1] | 0x800000;
3189 break;
3190
3191 case 23:
3192 w1[1] = w1[1] | 0x80000000;
3193 break;
3194
3195 case 24:
3196 w1[2] = 0x80;
3197 break;
3198
3199 case 25:
3200 w1[2] = w1[2] | 0x8000;
3201 break;
3202
3203 case 26:
3204 w1[2] = w1[2] | 0x800000;
3205 break;
3206
3207 case 27:
3208 w1[2] = w1[2] | 0x80000000;
3209 break;
3210
3211 case 28:
3212 w1[3] = 0x80;
3213 break;
3214
3215 case 29:
3216 w1[3] = w1[3] | 0x8000;
3217 break;
3218
3219 case 30:
3220 w1[3] = w1[3] | 0x800000;
3221 break;
3222
3223 case 31:
3224 w1[3] = w1[3] | 0x80000000;
3225 break;
3226
3227 case 32:
3228 w2[0] = 0x80;
3229 break;
3230
3231 case 33:
3232 w2[0] = w2[0] | 0x8000;
3233 break;
3234
3235 case 34:
3236 w2[0] = w2[0] | 0x800000;
3237 break;
3238
3239 case 35:
3240 w2[0] = w2[0] | 0x80000000;
3241 break;
3242
3243 case 36:
3244 w2[1] = 0x80;
3245 break;
3246
3247 case 37:
3248 w2[1] = w2[1] | 0x8000;
3249 break;
3250
3251 case 38:
3252 w2[1] = w2[1] | 0x800000;
3253 break;
3254
3255 case 39:
3256 w2[1] = w2[1] | 0x80000000;
3257 break;
3258
3259 case 40:
3260 w2[2] = 0x80;
3261 break;
3262
3263 case 41:
3264 w2[2] = w2[2] | 0x8000;
3265 break;
3266
3267 case 42:
3268 w2[2] = w2[2] | 0x800000;
3269 break;
3270
3271 case 43:
3272 w2[2] = w2[2] | 0x80000000;
3273 break;
3274
3275 case 44:
3276 w2[3] = 0x80;
3277 break;
3278
3279 case 45:
3280 w2[3] = w2[3] | 0x8000;
3281 break;
3282
3283 case 46:
3284 w2[3] = w2[3] | 0x800000;
3285 break;
3286
3287 case 47:
3288 w2[3] = w2[3] | 0x80000000;
3289 break;
3290
3291 case 48:
3292 w3[0] = 0x80;
3293 break;
3294
3295 case 49:
3296 w3[0] = w3[0] | 0x8000;
3297 break;
3298
3299 case 50:
3300 w3[0] = w3[0] | 0x800000;
3301 break;
3302
3303 case 51:
3304 w3[0] = w3[0] | 0x80000000;
3305 break;
3306
3307 case 52:
3308 w3[1] = 0x80;
3309 break;
3310
3311 case 53:
3312 w3[1] = w3[1] | 0x8000;
3313 break;
3314
3315 case 54:
3316 w3[1] = w3[1] | 0x800000;
3317 break;
3318
3319 case 55:
3320 w3[1] = w3[1] | 0x80000000;
3321 break;
3322
3323 case 56:
3324 w3[2] = 0x80;
3325 break;
3326
3327 case 57:
3328 w3[2] = w3[2] | 0x8000;
3329 break;
3330
3331 case 58:
3332 w3[2] = w3[2] | 0x800000;
3333 break;
3334
3335 case 59:
3336 w3[2] = w3[2] | 0x80000000;
3337 break;
3338
3339 case 60:
3340 w3[3] = 0x80;
3341 break;
3342
3343 case 61:
3344 w3[3] = w3[3] | 0x8000;
3345 break;
3346
3347 case 62:
3348 w3[3] = w3[3] | 0x800000;
3349 break;
3350
3351 case 63:
3352 w3[3] = w3[3] | 0x80000000;
3353 break;
3354 }
3355 }
3356
3357 // before: append_0x80_8
3358 static void append_0x80_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
3359 {
3360 switch (offset)
3361 {
3362 case 0:
3363 w0[0] = 0x80;
3364 break;
3365
3366 case 1:
3367 w0[0] = w0[0] | 0x8000;
3368 break;
3369
3370 case 2:
3371 w0[0] = w0[0] | 0x800000;
3372 break;
3373
3374 case 3:
3375 w0[0] = w0[0] | 0x80000000;
3376 break;
3377
3378 case 4:
3379 w0[1] = 0x80;
3380 break;
3381
3382 case 5:
3383 w0[1] = w0[1] | 0x8000;
3384 break;
3385
3386 case 6:
3387 w0[1] = w0[1] | 0x800000;
3388 break;
3389
3390 case 7:
3391 w0[1] = w0[1] | 0x80000000;
3392 break;
3393
3394 case 8:
3395 w0[2] = 0x80;
3396 break;
3397
3398 case 9:
3399 w0[2] = w0[2] | 0x8000;
3400 break;
3401
3402 case 10:
3403 w0[2] = w0[2] | 0x800000;
3404 break;
3405
3406 case 11:
3407 w0[2] = w0[2] | 0x80000000;
3408 break;
3409
3410 case 12:
3411 w0[3] = 0x80;
3412 break;
3413
3414 case 13:
3415 w0[3] = w0[3] | 0x8000;
3416 break;
3417
3418 case 14:
3419 w0[3] = w0[3] | 0x800000;
3420 break;
3421
3422 case 15:
3423 w0[3] = w0[3] | 0x80000000;
3424 break;
3425
3426 case 16:
3427 w1[0] = 0x80;
3428 break;
3429
3430 case 17:
3431 w1[0] = w1[0] | 0x8000;
3432 break;
3433
3434 case 18:
3435 w1[0] = w1[0] | 0x800000;
3436 break;
3437
3438 case 19:
3439 w1[0] = w1[0] | 0x80000000;
3440 break;
3441
3442 case 20:
3443 w1[1] = 0x80;
3444 break;
3445
3446 case 21:
3447 w1[1] = w1[1] | 0x8000;
3448 break;
3449
3450 case 22:
3451 w1[1] = w1[1] | 0x800000;
3452 break;
3453
3454 case 23:
3455 w1[1] = w1[1] | 0x80000000;
3456 break;
3457
3458 case 24:
3459 w1[2] = 0x80;
3460 break;
3461
3462 case 25:
3463 w1[2] = w1[2] | 0x8000;
3464 break;
3465
3466 case 26:
3467 w1[2] = w1[2] | 0x800000;
3468 break;
3469
3470 case 27:
3471 w1[2] = w1[2] | 0x80000000;
3472 break;
3473
3474 case 28:
3475 w1[3] = 0x80;
3476 break;
3477
3478 case 29:
3479 w1[3] = w1[3] | 0x8000;
3480 break;
3481
3482 case 30:
3483 w1[3] = w1[3] | 0x800000;
3484 break;
3485
3486 case 31:
3487 w1[3] = w1[3] | 0x80000000;
3488 break;
3489
3490 case 32:
3491 w2[0] = 0x80;
3492 break;
3493
3494 case 33:
3495 w2[0] = w2[0] | 0x8000;
3496 break;
3497
3498 case 34:
3499 w2[0] = w2[0] | 0x800000;
3500 break;
3501
3502 case 35:
3503 w2[0] = w2[0] | 0x80000000;
3504 break;
3505
3506 case 36:
3507 w2[1] = 0x80;
3508 break;
3509
3510 case 37:
3511 w2[1] = w2[1] | 0x8000;
3512 break;
3513
3514 case 38:
3515 w2[1] = w2[1] | 0x800000;
3516 break;
3517
3518 case 39:
3519 w2[1] = w2[1] | 0x80000000;
3520 break;
3521
3522 case 40:
3523 w2[2] = 0x80;
3524 break;
3525
3526 case 41:
3527 w2[2] = w2[2] | 0x8000;
3528 break;
3529
3530 case 42:
3531 w2[2] = w2[2] | 0x800000;
3532 break;
3533
3534 case 43:
3535 w2[2] = w2[2] | 0x80000000;
3536 break;
3537
3538 case 44:
3539 w2[3] = 0x80;
3540 break;
3541
3542 case 45:
3543 w2[3] = w2[3] | 0x8000;
3544 break;
3545
3546 case 46:
3547 w2[3] = w2[3] | 0x800000;
3548 break;
3549
3550 case 47:
3551 w2[3] = w2[3] | 0x80000000;
3552 break;
3553
3554 case 48:
3555 w3[0] = 0x80;
3556 break;
3557
3558 case 49:
3559 w3[0] = w3[0] | 0x8000;
3560 break;
3561
3562 case 50:
3563 w3[0] = w3[0] | 0x800000;
3564 break;
3565
3566 case 51:
3567 w3[0] = w3[0] | 0x80000000;
3568 break;
3569
3570 case 52:
3571 w3[1] = 0x80;
3572 break;
3573
3574 case 53:
3575 w3[1] = w3[1] | 0x8000;
3576 break;
3577
3578 case 54:
3579 w3[1] = w3[1] | 0x800000;
3580 break;
3581
3582 case 55:
3583 w3[1] = w3[1] | 0x80000000;
3584 break;
3585
3586 case 56:
3587 w3[2] = 0x80;
3588 break;
3589
3590 case 57:
3591 w3[2] = w3[2] | 0x8000;
3592 break;
3593
3594 case 58:
3595 w3[2] = w3[2] | 0x800000;
3596 break;
3597
3598 case 59:
3599 w3[2] = w3[2] | 0x80000000;
3600 break;
3601
3602 case 60:
3603 w3[3] = 0x80;
3604 break;
3605
3606 case 61:
3607 w3[3] = w3[3] | 0x8000;
3608 break;
3609
3610 case 62:
3611 w3[3] = w3[3] | 0x800000;
3612 break;
3613
3614 case 63:
3615 w3[3] = w3[3] | 0x80000000;
3616 break;
3617
3618 case 64:
3619 w4[0] = 0x80;
3620 break;
3621
3622 case 65:
3623 w4[0] = w4[0] | 0x8000;
3624 break;
3625
3626 case 66:
3627 w4[0] = w4[0] | 0x800000;
3628 break;
3629
3630 case 67:
3631 w4[0] = w4[0] | 0x80000000;
3632 break;
3633
3634 case 68:
3635 w4[1] = 0x80;
3636 break;
3637
3638 case 69:
3639 w4[1] = w4[1] | 0x8000;
3640 break;
3641
3642 case 70:
3643 w4[1] = w4[1] | 0x800000;
3644 break;
3645
3646 case 71:
3647 w4[1] = w4[1] | 0x80000000;
3648 break;
3649
3650 case 72:
3651 w4[2] = 0x80;
3652 break;
3653
3654 case 73:
3655 w4[2] = w4[2] | 0x8000;
3656 break;
3657
3658 case 74:
3659 w4[2] = w4[2] | 0x800000;
3660 break;
3661
3662 case 75:
3663 w4[2] = w4[2] | 0x80000000;
3664 break;
3665
3666 case 76:
3667 w4[3] = 0x80;
3668 break;
3669
3670 case 77:
3671 w4[3] = w4[3] | 0x8000;
3672 break;
3673
3674 case 78:
3675 w4[3] = w4[3] | 0x800000;
3676 break;
3677
3678 case 79:
3679 w4[3] = w4[3] | 0x80000000;
3680 break;
3681
3682 case 80:
3683 w5[0] = 0x80;
3684 break;
3685
3686 case 81:
3687 w5[0] = w5[0] | 0x8000;
3688 break;
3689
3690 case 82:
3691 w5[0] = w5[0] | 0x800000;
3692 break;
3693
3694 case 83:
3695 w5[0] = w5[0] | 0x80000000;
3696 break;
3697
3698 case 84:
3699 w5[1] = 0x80;
3700 break;
3701
3702 case 85:
3703 w5[1] = w5[1] | 0x8000;
3704 break;
3705
3706 case 86:
3707 w5[1] = w5[1] | 0x800000;
3708 break;
3709
3710 case 87:
3711 w5[1] = w5[1] | 0x80000000;
3712 break;
3713
3714 case 88:
3715 w5[2] = 0x80;
3716 break;
3717
3718 case 89:
3719 w5[2] = w5[2] | 0x8000;
3720 break;
3721
3722 case 90:
3723 w5[2] = w5[2] | 0x800000;
3724 break;
3725
3726 case 91:
3727 w5[2] = w5[2] | 0x80000000;
3728 break;
3729
3730 case 92:
3731 w5[3] = 0x80;
3732 break;
3733
3734 case 93:
3735 w5[3] = w5[3] | 0x8000;
3736 break;
3737
3738 case 94:
3739 w5[3] = w5[3] | 0x800000;
3740 break;
3741
3742 case 95:
3743 w5[3] = w5[3] | 0x80000000;
3744 break;
3745
3746 case 96:
3747 w6[0] = 0x80;
3748 break;
3749
3750 case 97:
3751 w6[0] = w6[0] | 0x8000;
3752 break;
3753
3754 case 98:
3755 w6[0] = w6[0] | 0x800000;
3756 break;
3757
3758 case 99:
3759 w6[0] = w6[0] | 0x80000000;
3760 break;
3761
3762 case 100:
3763 w6[1] = 0x80;
3764 break;
3765
3766 case 101:
3767 w6[1] = w6[1] | 0x8000;
3768 break;
3769
3770 case 102:
3771 w6[1] = w6[1] | 0x800000;
3772 break;
3773
3774 case 103:
3775 w6[1] = w6[1] | 0x80000000;
3776 break;
3777
3778 case 104:
3779 w6[2] = 0x80;
3780 break;
3781
3782 case 105:
3783 w6[2] = w6[2] | 0x8000;
3784 break;
3785
3786 case 106:
3787 w6[2] = w6[2] | 0x800000;
3788 break;
3789
3790 case 107:
3791 w6[2] = w6[2] | 0x80000000;
3792 break;
3793
3794 case 108:
3795 w6[3] = 0x80;
3796 break;
3797
3798 case 109:
3799 w6[3] = w6[3] | 0x8000;
3800 break;
3801
3802 case 110:
3803 w6[3] = w6[3] | 0x800000;
3804 break;
3805
3806 case 111:
3807 w6[3] = w6[3] | 0x80000000;
3808 break;
3809
3810 case 112:
3811 w7[0] = 0x80;
3812 break;
3813
3814 case 113:
3815 w7[0] = w7[0] | 0x8000;
3816 break;
3817
3818 case 114:
3819 w7[0] = w7[0] | 0x800000;
3820 break;
3821
3822 case 115:
3823 w7[0] = w7[0] | 0x80000000;
3824 break;
3825
3826 case 116:
3827 w7[1] = 0x80;
3828 break;
3829
3830 case 117:
3831 w7[1] = w7[1] | 0x8000;
3832 break;
3833
3834 case 118:
3835 w7[1] = w7[1] | 0x800000;
3836 break;
3837
3838 case 119:
3839 w7[1] = w7[1] | 0x80000000;
3840 break;
3841
3842 case 120:
3843 w7[2] = 0x80;
3844 break;
3845
3846 case 121:
3847 w7[2] = w7[2] | 0x8000;
3848 break;
3849
3850 case 122:
3851 w7[2] = w7[2] | 0x800000;
3852 break;
3853
3854 case 123:
3855 w7[2] = w7[2] | 0x80000000;
3856 break;
3857
3858 case 124:
3859 w7[3] = 0x80;
3860 break;
3861
3862 case 125:
3863 w7[3] = w7[3] | 0x8000;
3864 break;
3865
3866 case 126:
3867 w7[3] = w7[3] | 0x800000;
3868 break;
3869
3870 case 127:
3871 w7[3] = w7[3] | 0x80000000;
3872 break;
3873 }
3874 }
3875
3876 // before: append_0x80_4
3877 static void append_0x80_1x16 (u32 w[16], const u32 offset)
3878 {
3879 switch (offset)
3880 {
3881 case 0:
3882 w[ 0] = 0x80;
3883 break;
3884
3885 case 1:
3886 w[ 0] = w[ 0] | 0x8000;
3887 break;
3888
3889 case 2:
3890 w[ 0] = w[ 0] | 0x800000;
3891 break;
3892
3893 case 3:
3894 w[ 0] = w[ 0] | 0x80000000;
3895 break;
3896
3897 case 4:
3898 w[ 1] = 0x80;
3899 break;
3900
3901 case 5:
3902 w[ 1] = w[ 1] | 0x8000;
3903 break;
3904
3905 case 6:
3906 w[ 1] = w[ 1] | 0x800000;
3907 break;
3908
3909 case 7:
3910 w[ 1] = w[ 1] | 0x80000000;
3911 break;
3912
3913 case 8:
3914 w[ 2] = 0x80;
3915 break;
3916
3917 case 9:
3918 w[ 2] = w[ 2] | 0x8000;
3919 break;
3920
3921 case 10:
3922 w[ 2] = w[ 2] | 0x800000;
3923 break;
3924
3925 case 11:
3926 w[ 2] = w[ 2] | 0x80000000;
3927 break;
3928
3929 case 12:
3930 w[ 3] = 0x80;
3931 break;
3932
3933 case 13:
3934 w[ 3] = w[ 3] | 0x8000;
3935 break;
3936
3937 case 14:
3938 w[ 3] = w[ 3] | 0x800000;
3939 break;
3940
3941 case 15:
3942 w[ 3] = w[ 3] | 0x80000000;
3943 break;
3944
3945 case 16:
3946 w[ 4] = 0x80;
3947 break;
3948
3949 case 17:
3950 w[ 4] = w[ 4] | 0x8000;
3951 break;
3952
3953 case 18:
3954 w[ 4] = w[ 4] | 0x800000;
3955 break;
3956
3957 case 19:
3958 w[ 4] = w[ 4] | 0x80000000;
3959 break;
3960
3961 case 20:
3962 w[ 5] = 0x80;
3963 break;
3964
3965 case 21:
3966 w[ 5] = w[ 5] | 0x8000;
3967 break;
3968
3969 case 22:
3970 w[ 5] = w[ 5] | 0x800000;
3971 break;
3972
3973 case 23:
3974 w[ 5] = w[ 5] | 0x80000000;
3975 break;
3976
3977 case 24:
3978 w[ 6] = 0x80;
3979 break;
3980
3981 case 25:
3982 w[ 6] = w[ 6] | 0x8000;
3983 break;
3984
3985 case 26:
3986 w[ 6] = w[ 6] | 0x800000;
3987 break;
3988
3989 case 27:
3990 w[ 6] = w[ 6] | 0x80000000;
3991 break;
3992
3993 case 28:
3994 w[ 7] = 0x80;
3995 break;
3996
3997 case 29:
3998 w[ 7] = w[ 7] | 0x8000;
3999 break;
4000
4001 case 30:
4002 w[ 7] = w[ 7] | 0x800000;
4003 break;
4004
4005 case 31:
4006 w[ 7] = w[ 7] | 0x80000000;
4007 break;
4008
4009 case 32:
4010 w[ 8] = 0x80;
4011 break;
4012
4013 case 33:
4014 w[ 8] = w[ 8] | 0x8000;
4015 break;
4016
4017 case 34:
4018 w[ 8] = w[ 8] | 0x800000;
4019 break;
4020
4021 case 35:
4022 w[ 8] = w[ 8] | 0x80000000;
4023 break;
4024
4025 case 36:
4026 w[ 9] = 0x80;
4027 break;
4028
4029 case 37:
4030 w[ 9] = w[ 9] | 0x8000;
4031 break;
4032
4033 case 38:
4034 w[ 9] = w[ 9] | 0x800000;
4035 break;
4036
4037 case 39:
4038 w[ 9] = w[ 9] | 0x80000000;
4039 break;
4040
4041 case 40:
4042 w[10] = 0x80;
4043 break;
4044
4045 case 41:
4046 w[10] = w[10] | 0x8000;
4047 break;
4048
4049 case 42:
4050 w[10] = w[10] | 0x800000;
4051 break;
4052
4053 case 43:
4054 w[10] = w[10] | 0x80000000;
4055 break;
4056
4057 case 44:
4058 w[11] = 0x80;
4059 break;
4060
4061 case 45:
4062 w[11] = w[11] | 0x8000;
4063 break;
4064
4065 case 46:
4066 w[11] = w[11] | 0x800000;
4067 break;
4068
4069 case 47:
4070 w[11] = w[11] | 0x80000000;
4071 break;
4072
4073 case 48:
4074 w[12] = 0x80;
4075 break;
4076
4077 case 49:
4078 w[12] = w[12] | 0x8000;
4079 break;
4080
4081 case 50:
4082 w[12] = w[12] | 0x800000;
4083 break;
4084
4085 case 51:
4086 w[12] = w[12] | 0x80000000;
4087 break;
4088
4089 case 52:
4090 w[13] = 0x80;
4091 break;
4092
4093 case 53:
4094 w[13] = w[13] | 0x8000;
4095 break;
4096
4097 case 54:
4098 w[13] = w[13] | 0x800000;
4099 break;
4100
4101 case 55:
4102 w[13] = w[13] | 0x80000000;
4103 break;
4104
4105 case 56:
4106 w[14] = 0x80;
4107 break;
4108
4109 case 57:
4110 w[14] = w[14] | 0x8000;
4111 break;
4112
4113 case 58:
4114 w[14] = w[14] | 0x800000;
4115 break;
4116
4117 case 59:
4118 w[14] = w[14] | 0x80000000;
4119 break;
4120
4121 case 60:
4122 w[15] = 0x80;
4123 break;
4124
4125 case 61:
4126 w[15] = w[15] | 0x8000;
4127 break;
4128
4129 case 62:
4130 w[15] = w[15] | 0x800000;
4131 break;
4132
4133 case 63:
4134 w[15] = w[15] | 0x80000000;
4135 break;
4136 }
4137 }
4138
4139 // before: append_0x80_8
4140 static void append_0x80_1x32 (u32 w[32], const u32 offset)
4141 {
4142 switch (offset)
4143 {
4144 case 0:
4145 w[ 0] = 0x80;
4146 break;
4147
4148 case 1:
4149 w[ 0] = w[ 0] | 0x8000;
4150 break;
4151
4152 case 2:
4153 w[ 0] = w[ 0] | 0x800000;
4154 break;
4155
4156 case 3:
4157 w[ 0] = w[ 0] | 0x80000000;
4158 break;
4159
4160 case 4:
4161 w[ 1] = 0x80;
4162 break;
4163
4164 case 5:
4165 w[ 1] = w[ 1] | 0x8000;
4166 break;
4167
4168 case 6:
4169 w[ 1] = w[ 1] | 0x800000;
4170 break;
4171
4172 case 7:
4173 w[ 1] = w[ 1] | 0x80000000;
4174 break;
4175
4176 case 8:
4177 w[ 2] = 0x80;
4178 break;
4179
4180 case 9:
4181 w[ 2] = w[ 2] | 0x8000;
4182 break;
4183
4184 case 10:
4185 w[ 2] = w[ 2] | 0x800000;
4186 break;
4187
4188 case 11:
4189 w[ 2] = w[ 2] | 0x80000000;
4190 break;
4191
4192 case 12:
4193 w[ 3] = 0x80;
4194 break;
4195
4196 case 13:
4197 w[ 3] = w[ 3] | 0x8000;
4198 break;
4199
4200 case 14:
4201 w[ 3] = w[ 3] | 0x800000;
4202 break;
4203
4204 case 15:
4205 w[ 3] = w[ 3] | 0x80000000;
4206 break;
4207
4208 case 16:
4209 w[ 4] = 0x80;
4210 break;
4211
4212 case 17:
4213 w[ 4] = w[ 4] | 0x8000;
4214 break;
4215
4216 case 18:
4217 w[ 4] = w[ 4] | 0x800000;
4218 break;
4219
4220 case 19:
4221 w[ 4] = w[ 4] | 0x80000000;
4222 break;
4223
4224 case 20:
4225 w[ 5] = 0x80;
4226 break;
4227
4228 case 21:
4229 w[ 5] = w[ 5] | 0x8000;
4230 break;
4231
4232 case 22:
4233 w[ 5] = w[ 5] | 0x800000;
4234 break;
4235
4236 case 23:
4237 w[ 5] = w[ 5] | 0x80000000;
4238 break;
4239
4240 case 24:
4241 w[ 6] = 0x80;
4242 break;
4243
4244 case 25:
4245 w[ 6] = w[ 6] | 0x8000;
4246 break;
4247
4248 case 26:
4249 w[ 6] = w[ 6] | 0x800000;
4250 break;
4251
4252 case 27:
4253 w[ 6] = w[ 6] | 0x80000000;
4254 break;
4255
4256 case 28:
4257 w[ 7] = 0x80;
4258 break;
4259
4260 case 29:
4261 w[ 7] = w[ 7] | 0x8000;
4262 break;
4263
4264 case 30:
4265 w[ 7] = w[ 7] | 0x800000;
4266 break;
4267
4268 case 31:
4269 w[ 7] = w[ 7] | 0x80000000;
4270 break;
4271
4272 case 32:
4273 w[ 8] = 0x80;
4274 break;
4275
4276 case 33:
4277 w[ 8] = w[ 8] | 0x8000;
4278 break;
4279
4280 case 34:
4281 w[ 8] = w[ 8] | 0x800000;
4282 break;
4283
4284 case 35:
4285 w[ 8] = w[ 8] | 0x80000000;
4286 break;
4287
4288 case 36:
4289 w[ 9] = 0x80;
4290 break;
4291
4292 case 37:
4293 w[ 9] = w[ 9] | 0x8000;
4294 break;
4295
4296 case 38:
4297 w[ 9] = w[ 9] | 0x800000;
4298 break;
4299
4300 case 39:
4301 w[ 9] = w[ 9] | 0x80000000;
4302 break;
4303
4304 case 40:
4305 w[10] = 0x80;
4306 break;
4307
4308 case 41:
4309 w[10] = w[10] | 0x8000;
4310 break;
4311
4312 case 42:
4313 w[10] = w[10] | 0x800000;
4314 break;
4315
4316 case 43:
4317 w[10] = w[10] | 0x80000000;
4318 break;
4319
4320 case 44:
4321 w[11] = 0x80;
4322 break;
4323
4324 case 45:
4325 w[11] = w[11] | 0x8000;
4326 break;
4327
4328 case 46:
4329 w[11] = w[11] | 0x800000;
4330 break;
4331
4332 case 47:
4333 w[11] = w[11] | 0x80000000;
4334 break;
4335
4336 case 48:
4337 w[12] = 0x80;
4338 break;
4339
4340 case 49:
4341 w[12] = w[12] | 0x8000;
4342 break;
4343
4344 case 50:
4345 w[12] = w[12] | 0x800000;
4346 break;
4347
4348 case 51:
4349 w[12] = w[12] | 0x80000000;
4350 break;
4351
4352 case 52:
4353 w[13] = 0x80;
4354 break;
4355
4356 case 53:
4357 w[13] = w[13] | 0x8000;
4358 break;
4359
4360 case 54:
4361 w[13] = w[13] | 0x800000;
4362 break;
4363
4364 case 55:
4365 w[13] = w[13] | 0x80000000;
4366 break;
4367
4368 case 56:
4369 w[14] = 0x80;
4370 break;
4371
4372 case 57:
4373 w[14] = w[14] | 0x8000;
4374 break;
4375
4376 case 58:
4377 w[14] = w[14] | 0x800000;
4378 break;
4379
4380 case 59:
4381 w[14] = w[14] | 0x80000000;
4382 break;
4383
4384 case 60:
4385 w[15] = 0x80;
4386 break;
4387
4388 case 61:
4389 w[15] = w[15] | 0x8000;
4390 break;
4391
4392 case 62:
4393 w[15] = w[15] | 0x800000;
4394 break;
4395
4396 case 63:
4397 w[15] = w[15] | 0x80000000;
4398 break;
4399
4400 case 64:
4401 w[16] = 0x80;
4402 break;
4403
4404 case 65:
4405 w[16] = w[16] | 0x8000;
4406 break;
4407
4408 case 66:
4409 w[16] = w[16] | 0x800000;
4410 break;
4411
4412 case 67:
4413 w[16] = w[16] | 0x80000000;
4414 break;
4415
4416 case 68:
4417 w[17] = 0x80;
4418 break;
4419
4420 case 69:
4421 w[17] = w[17] | 0x8000;
4422 break;
4423
4424 case 70:
4425 w[17] = w[17] | 0x800000;
4426 break;
4427
4428 case 71:
4429 w[17] = w[17] | 0x80000000;
4430 break;
4431
4432 case 72:
4433 w[18] = 0x80;
4434 break;
4435
4436 case 73:
4437 w[18] = w[18] | 0x8000;
4438 break;
4439
4440 case 74:
4441 w[18] = w[18] | 0x800000;
4442 break;
4443
4444 case 75:
4445 w[18] = w[18] | 0x80000000;
4446 break;
4447
4448 case 76:
4449 w[19] = 0x80;
4450 break;
4451
4452 case 77:
4453 w[19] = w[19] | 0x8000;
4454 break;
4455
4456 case 78:
4457 w[19] = w[19] | 0x800000;
4458 break;
4459
4460 case 79:
4461 w[19] = w[19] | 0x80000000;
4462 break;
4463
4464 case 80:
4465 w[20] = 0x80;
4466 break;
4467
4468 case 81:
4469 w[20] = w[20] | 0x8000;
4470 break;
4471
4472 case 82:
4473 w[20] = w[20] | 0x800000;
4474 break;
4475
4476 case 83:
4477 w[20] = w[20] | 0x80000000;
4478 break;
4479
4480 case 84:
4481 w[21] = 0x80;
4482 break;
4483
4484 case 85:
4485 w[21] = w[21] | 0x8000;
4486 break;
4487
4488 case 86:
4489 w[21] = w[21] | 0x800000;
4490 break;
4491
4492 case 87:
4493 w[21] = w[21] | 0x80000000;
4494 break;
4495
4496 case 88:
4497 w[22] = 0x80;
4498 break;
4499
4500 case 89:
4501 w[22] = w[22] | 0x8000;
4502 break;
4503
4504 case 90:
4505 w[22] = w[22] | 0x800000;
4506 break;
4507
4508 case 91:
4509 w[22] = w[22] | 0x80000000;
4510 break;
4511
4512 case 92:
4513 w[23] = 0x80;
4514 break;
4515
4516 case 93:
4517 w[23] = w[23] | 0x8000;
4518 break;
4519
4520 case 94:
4521 w[23] = w[23] | 0x800000;
4522 break;
4523
4524 case 95:
4525 w[23] = w[23] | 0x80000000;
4526 break;
4527
4528 case 96:
4529 w[24] = 0x80;
4530 break;
4531
4532 case 97:
4533 w[24] = w[24] | 0x8000;
4534 break;
4535
4536 case 98:
4537 w[24] = w[24] | 0x800000;
4538 break;
4539
4540 case 99:
4541 w[24] = w[24] | 0x80000000;
4542 break;
4543
4544 case 100:
4545 w[25] = 0x80;
4546 break;
4547
4548 case 101:
4549 w[25] = w[25] | 0x8000;
4550 break;
4551
4552 case 102:
4553 w[25] = w[25] | 0x800000;
4554 break;
4555
4556 case 103:
4557 w[25] = w[25] | 0x80000000;
4558 break;
4559
4560 case 104:
4561 w[26] = 0x80;
4562 break;
4563
4564 case 105:
4565 w[26] = w[26] | 0x8000;
4566 break;
4567
4568 case 106:
4569 w[26] = w[26] | 0x800000;
4570 break;
4571
4572 case 107:
4573 w[26] = w[26] | 0x80000000;
4574 break;
4575
4576 case 108:
4577 w[27] = 0x80;
4578 break;
4579
4580 case 109:
4581 w[27] = w[27] | 0x8000;
4582 break;
4583
4584 case 110:
4585 w[27] = w[27] | 0x800000;
4586 break;
4587
4588 case 111:
4589 w[27] = w[27] | 0x80000000;
4590 break;
4591
4592 case 112:
4593 w[28] = 0x80;
4594 break;
4595
4596 case 113:
4597 w[28] = w[28] | 0x8000;
4598 break;
4599
4600 case 114:
4601 w[28] = w[28] | 0x800000;
4602 break;
4603
4604 case 115:
4605 w[28] = w[28] | 0x80000000;
4606 break;
4607
4608 case 116:
4609 w[29] = 0x80;
4610 break;
4611
4612 case 117:
4613 w[29] = w[29] | 0x8000;
4614 break;
4615
4616 case 118:
4617 w[29] = w[29] | 0x800000;
4618 break;
4619
4620 case 119:
4621 w[29] = w[29] | 0x80000000;
4622 break;
4623
4624 case 120:
4625 w[30] = 0x80;
4626 break;
4627
4628 case 121:
4629 w[30] = w[30] | 0x8000;
4630 break;
4631
4632 case 122:
4633 w[30] = w[30] | 0x800000;
4634 break;
4635
4636 case 123:
4637 w[30] = w[30] | 0x80000000;
4638 break;
4639
4640 case 124:
4641 w[31] = 0x80;
4642 break;
4643
4644 case 125:
4645 w[31] = w[31] | 0x8000;
4646 break;
4647
4648 case 126:
4649 w[31] = w[31] | 0x800000;
4650 break;
4651
4652 case 127:
4653 w[31] = w[31] | 0x80000000;
4654 break;
4655 }
4656 }
4657
4658 // before: device_memcat2L
4659 static void memcat_c7_d1x2_sl1x2_sr1x2 (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2])
4660 {
4661 switch (offset)
4662 {
4663 case 1:
4664 dst0[0] = src_l0[0] | src_r0[0] << 8;
4665 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4666 break;
4667
4668 case 2:
4669 dst0[0] = src_l0[0] | src_r0[0] << 16;
4670 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4671 break;
4672
4673 case 3:
4674 dst0[0] = src_l0[0] | src_r0[0] << 24;
4675 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4676 break;
4677
4678 case 4:
4679 dst0[1] = src_r0[0];
4680 break;
4681
4682 case 5:
4683 dst0[1] = src_l0[1] | src_r0[0] << 8;
4684 break;
4685
4686 case 6:
4687 dst0[1] = src_l0[1] | src_r0[0] << 16;
4688 break;
4689
4690 case 7:
4691 dst0[1] = src_l0[1] | src_r0[0] << 24;
4692 break;
4693 }
4694 }
4695
4696 // before: device_memcat4L
4697 static void memcat_c15_d1x4_sl1x4_sr1x4 (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4])
4698 {
4699 switch (offset)
4700 {
4701 case 1:
4702 dst0[0] = src_l0[0] | src_r0[0] << 8;
4703 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4704 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4705 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4706 break;
4707
4708 case 2:
4709 dst0[0] = src_l0[0] | src_r0[0] << 16;
4710 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4711 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4712 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4713 break;
4714
4715 case 3:
4716 dst0[0] = src_l0[0] | src_r0[0] << 24;
4717 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4718 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4719 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4720 break;
4721
4722 case 4:
4723 dst0[1] = src_r0[0];
4724 dst0[2] = src_r0[1];
4725 dst0[3] = src_r0[2];
4726 break;
4727
4728 case 5:
4729 dst0[1] = src_l0[1] | src_r0[0] << 8;
4730 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4731 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4732 break;
4733
4734 case 6:
4735 dst0[1] = src_l0[1] | src_r0[0] << 16;
4736 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4737 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4738 break;
4739
4740 case 7:
4741 dst0[1] = src_l0[1] | src_r0[0] << 24;
4742 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4743 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4744 break;
4745
4746 case 8:
4747 dst0[2] = src_r0[0];
4748 dst0[3] = src_r0[1];
4749 break;
4750
4751 case 9:
4752 dst0[2] = src_l0[2] | src_r0[0] << 8;
4753 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4754 break;
4755
4756 case 10:
4757 dst0[2] = src_l0[2] | src_r0[0] << 16;
4758 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4759 break;
4760
4761 case 11:
4762 dst0[2] = src_l0[2] | src_r0[0] << 24;
4763 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4764 break;
4765
4766 case 12:
4767 dst0[3] = src_r0[0];
4768 break;
4769
4770 case 13:
4771 dst0[3] = src_l0[3] | src_r0[0] << 8;
4772 break;
4773
4774 case 14:
4775 dst0[3] = src_l0[3] | src_r0[0] << 16;
4776 break;
4777
4778 case 15:
4779 dst0[3] = src_l0[3] | src_r0[0] << 24;
4780 break;
4781 }
4782 }
4783
4784 // before: device_memcat8L
4785 static void memcat_c31_d2x4_sl2x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4])
4786 {
4787 switch (offset)
4788 {
4789 case 1:
4790 dst0[0] = src_l0[0] | src_r0[0] << 8;
4791 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4792 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4793 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4794 dst1[0] = src_r0[3] >> 24;
4795 break;
4796
4797 case 2:
4798 dst0[0] = src_l0[0] | src_r0[0] << 16;
4799 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4800 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4801 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4802 dst1[0] = src_r0[3] >> 16;
4803 break;
4804
4805 case 3:
4806 dst0[0] = src_l0[0] | src_r0[0] << 24;
4807 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4808 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4809 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4810 dst1[0] = src_r0[3] >> 8;
4811 break;
4812
4813 case 4:
4814 dst0[1] = src_r0[0];
4815 dst0[2] = src_r0[1];
4816 dst0[3] = src_r0[2];
4817 dst1[0] = src_r0[3];
4818 break;
4819
4820 case 5:
4821 dst0[1] = src_l0[1] | src_r0[0] << 8;
4822 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4823 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4824 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
4825 dst1[1] = src_r0[3] >> 24;
4826 break;
4827
4828 case 6:
4829 dst0[1] = src_l0[1] | src_r0[0] << 16;
4830 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4831 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4832 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
4833 dst1[1] = src_r0[3] >> 16;
4834 break;
4835
4836 case 7:
4837 dst0[1] = src_l0[1] | src_r0[0] << 24;
4838 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4839 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4840 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
4841 dst1[1] = src_r0[3] >> 8;
4842 break;
4843
4844 case 8:
4845 dst0[2] = src_r0[0];
4846 dst0[3] = src_r0[1];
4847 dst1[0] = src_r0[2];
4848 dst1[1] = src_r0[3];
4849 break;
4850
4851 case 9:
4852 dst0[2] = src_l0[2] | src_r0[0] << 8;
4853 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4854 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
4855 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
4856 dst1[2] = src_r0[3] >> 24;
4857 break;
4858
4859 case 10:
4860 dst0[2] = src_l0[2] | src_r0[0] << 16;
4861 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4862 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
4863 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
4864 dst1[2] = src_r0[3] >> 16;
4865 break;
4866
4867 case 11:
4868 dst0[2] = src_l0[2] | src_r0[0] << 24;
4869 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4870 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
4871 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
4872 dst1[2] = src_r0[3] >> 8;
4873 break;
4874
4875 case 12:
4876 dst0[3] = src_r0[0];
4877 dst1[0] = src_r0[1];
4878 dst1[1] = src_r0[2];
4879 dst1[2] = src_r0[3];
4880 break;
4881
4882 case 13:
4883 dst0[3] = src_l0[3] | src_r0[0] << 8;
4884 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
4885 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
4886 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
4887 dst1[3] = src_r0[3] >> 24;
4888 break;
4889
4890 case 14:
4891 dst0[3] = src_l0[3] | src_r0[0] << 16;
4892 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
4893 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
4894 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
4895 dst1[3] = src_r0[3] >> 16;
4896 break;
4897
4898 case 15:
4899 dst0[3] = src_l0[3] | src_r0[0] << 24;
4900 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
4901 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
4902 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
4903 dst1[3] = src_r0[3] >> 8;
4904 break;
4905
4906 case 16:
4907 dst1[0] = src_r0[0];
4908 dst1[1] = src_r0[1];
4909 dst1[2] = src_r0[2];
4910 dst1[3] = src_r0[3];
4911 break;
4912
4913 case 17:
4914 dst1[0] = src_l1[0] | src_r0[0] << 8;
4915 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4916 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4917 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4918 break;
4919
4920 case 18:
4921 dst1[0] = src_l1[0] | src_r0[0] << 16;
4922 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4923 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4924 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4925 break;
4926
4927 case 19:
4928 dst1[0] = src_l1[0] | src_r0[0] << 24;
4929 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4930 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4931 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4932 break;
4933
4934 case 20:
4935 dst1[1] = src_r0[0];
4936 dst1[2] = src_r0[1];
4937 dst1[3] = src_r0[2];
4938 break;
4939
4940 case 21:
4941 dst1[1] = src_l1[1] | src_r0[0] << 8;
4942 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4943 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4944 break;
4945
4946 case 22:
4947 dst1[1] = src_l1[1] | src_r0[0] << 16;
4948 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4949 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4950 break;
4951
4952 case 23:
4953 dst1[1] = src_l1[1] | src_r0[0] << 24;
4954 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4955 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4956 break;
4957
4958 case 24:
4959 dst1[2] = src_r0[0];
4960 dst1[3] = src_r0[1];
4961 break;
4962
4963 case 25:
4964 dst1[2] = src_l1[2] | src_r0[0] << 8;
4965 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4966 break;
4967
4968 case 26:
4969 dst1[2] = src_l1[2] | src_r0[0] << 16;
4970 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4971 break;
4972
4973 case 27:
4974 dst1[2] = src_l1[2] | src_r0[0] << 24;
4975 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4976 break;
4977
4978 case 28:
4979 dst1[3] = src_r0[0];
4980 break;
4981
4982 case 29:
4983 dst1[3] = src_l1[3] | src_r0[0] << 8;
4984 break;
4985
4986 case 30:
4987 dst1[3] = src_l1[3] | src_r0[0] << 16;
4988 break;
4989
4990 case 31:
4991 dst1[3] = src_l1[3] | src_r0[0] << 24;
4992 break;
4993 }
4994 }
4995
4996 // before: device_memcat12L
4997 static void memcat_c47_d3x4_sl3x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4])
4998 {
4999 switch (offset)
5000 {
5001 case 1:
5002 dst0[0] = src_l0[0] | src_r0[0] << 8;
5003 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
5004 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
5005 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
5006 dst1[0] = src_r0[3] >> 24;
5007 break;
5008
5009 case 2:
5010 dst0[0] = src_l0[0] | src_r0[0] << 16;
5011 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
5012 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
5013 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
5014 dst1[0] = src_r0[3] >> 16;
5015 break;
5016
5017 case 3:
5018 dst0[0] = src_l0[0] | src_r0[0] << 24;
5019 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
5020 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
5021 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
5022 dst1[0] = src_r0[3] >> 8;
5023 break;
5024
5025 case 4:
5026 dst0[1] = src_r0[0];
5027 dst0[2] = src_r0[1];
5028 dst0[3] = src_r0[2];
5029 dst1[0] = src_r0[3];
5030 break;
5031
5032 case 5:
5033 dst0[1] = src_l0[1] | src_r0[0] << 8;
5034 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
5035 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
5036 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
5037 dst1[1] = src_r0[3] >> 24;
5038 break;
5039
5040 case 6:
5041 dst0[1] = src_l0[1] | src_r0[0] << 16;
5042 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
5043 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
5044 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
5045 dst1[1] = src_r0[3] >> 16;
5046 break;
5047
5048 case 7:
5049 dst0[1] = src_l0[1] | src_r0[0] << 24;
5050 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
5051 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
5052 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
5053 dst1[1] = src_r0[3] >> 8;
5054 break;
5055
5056 case 8:
5057 dst0[2] = src_r0[0];
5058 dst0[3] = src_r0[1];
5059 dst1[0] = src_r0[2];
5060 dst1[1] = src_r0[3];
5061 break;
5062
5063 case 9:
5064 dst0[2] = src_l0[2] | src_r0[0] << 8;
5065 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
5066 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
5067 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
5068 dst1[2] = src_r0[3] >> 24;
5069 break;
5070
5071 case 10:
5072 dst0[2] = src_l0[2] | src_r0[0] << 16;
5073 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
5074 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
5075 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
5076 dst1[2] = src_r0[3] >> 16;
5077 break;
5078
5079 case 11:
5080 dst0[2] = src_l0[2] | src_r0[0] << 24;
5081 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
5082 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
5083 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
5084 dst1[2] = src_r0[3] >> 8;
5085 break;
5086
5087 case 12:
5088 dst0[3] = src_r0[0];
5089 dst1[0] = src_r0[1];
5090 dst1[1] = src_r0[2];
5091 dst1[2] = src_r0[3];
5092 break;
5093
5094 case 13:
5095 dst0[3] = src_l0[3] | src_r0[0] << 8;
5096 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
5097 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
5098 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
5099 dst1[3] = src_r0[3] >> 24;
5100 break;
5101
5102 case 14:
5103 dst0[3] = src_l0[3] | src_r0[0] << 16;
5104 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
5105 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
5106 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
5107 dst1[3] = src_r0[3] >> 16;
5108 break;
5109
5110 case 15:
5111 dst0[3] = src_l0[3] | src_r0[0] << 24;
5112 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
5113 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
5114 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
5115 dst1[3] = src_r0[3] >> 8;
5116 break;
5117
5118 case 16:
5119 dst1[0] = src_r0[0];
5120 dst1[1] = src_r0[1];
5121 dst1[2] = src_r0[2];
5122 dst1[3] = src_r0[3];
5123 break;
5124
5125 case 17:
5126 dst1[0] = src_l1[0] | src_r0[0] << 8;
5127 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
5128 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
5129 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
5130 dst2[0] = src_r0[3] >> 24;
5131 break;
5132
5133 case 18:
5134 dst1[0] = src_l1[0] | src_r0[0] << 16;
5135 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
5136 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
5137 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
5138 dst2[0] = src_r0[3] >> 16;
5139 break;
5140
5141 case 19:
5142 dst1[0] = src_l1[0] | src_r0[0] << 24;
5143 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
5144 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
5145 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
5146 dst2[0] = src_r0[3] >> 8;
5147 break;
5148
5149 case 20:
5150 dst1[1] = src_r0[0];
5151 dst1[2] = src_r0[1];
5152 dst1[3] = src_r0[2];
5153 dst2[0] = src_r0[3];
5154 break;
5155
5156 case 21:
5157 dst1[1] = src_l1[1] | src_r0[0] << 8;
5158 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
5159 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
5160 dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
5161 dst2[1] = src_r0[3] >> 24;
5162 break;
5163
5164 case 22:
5165 dst1[1] = src_l1[1] | src_r0[0] << 16;
5166 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
5167 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
5168 dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
5169 dst2[1] = src_r0[3] >> 16;
5170 break;
5171
5172 case 23:
5173 dst1[1] = src_l1[1] | src_r0[0] << 24;
5174 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
5175 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
5176 dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
5177 dst2[1] = src_r0[3] >> 8;
5178 break;
5179
5180 case 24:
5181 dst1[2] = src_r0[0];
5182 dst1[3] = src_r0[1];
5183 dst2[0] = src_r0[2];
5184 dst2[1] = src_r0[3];
5185 break;
5186
5187 case 25:
5188 dst1[2] = src_l1[2] | src_r0[0] << 8;
5189 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
5190 dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
5191 dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
5192 dst2[2] = src_r0[3] >> 24;
5193 break;
5194
5195 case 26:
5196 dst1[2] = src_l1[2] | src_r0[0] << 16;
5197 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
5198 dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
5199 dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
5200 dst2[2] = src_r0[3] >> 16;
5201 break;
5202
5203 case 27:
5204 dst1[2] = src_l1[2] | src_r0[0] << 24;
5205 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
5206 dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
5207 dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
5208 dst2[2] = src_r0[3] >> 8;
5209 break;
5210
5211 case 28:
5212 dst1[3] = src_r0[0];
5213 dst2[0] = src_r0[1];
5214 dst2[1] = src_r0[2];
5215 dst2[2] = src_r0[3];
5216 break;
5217
5218 case 29:
5219 dst1[3] = src_l1[3] | src_r0[0] << 8;
5220 dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
5221 dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
5222 dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
5223 dst2[3] = src_r0[3] >> 24;
5224 break;
5225
5226 case 30:
5227 dst1[3] = src_l1[3] | src_r0[0] << 16;
5228 dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
5229 dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
5230 dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
5231 dst2[3] = src_r0[3] >> 16;
5232 break;
5233
5234 case 31:
5235 dst1[3] = src_l1[3] | src_r0[0] << 24;
5236 dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
5237 dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
5238 dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
5239 dst2[3] = src_r0[3] >> 8;
5240 break;
5241
5242 case 32:
5243 dst2[0] = src_r0[0];
5244 dst2[1] = src_r0[1];
5245 dst2[2] = src_r0[2];
5246 dst2[3] = src_r0[3];
5247 break;
5248
5249 case 33:
5250 dst2[0] = src_l2[0] | src_r0[0] << 8;
5251 dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
5252 dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
5253 dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
5254 break;
5255
5256 case 34:
5257 dst2[0] = src_l2[0] | src_r0[0] << 16;
5258 dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
5259 dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
5260 dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
5261 break;
5262
5263 case 35:
5264 dst2[0] = src_l2[0] | src_r0[0] << 24;
5265 dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
5266 dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
5267 dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
5268 break;
5269
5270 case 36:
5271 dst2[1] = src_r0[0];
5272 dst2[2] = src_r0[1];
5273 dst2[3] = src_r0[2];
5274 break;
5275
5276 case 37:
5277 dst2[1] = src_l2[1] | src_r0[0] << 8;
5278 dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
5279 dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
5280 break;
5281
5282 case 38:
5283 dst2[1] = src_l2[1] | src_r0[0] << 16;
5284 dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
5285 dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
5286 break;
5287
5288 case 39:
5289 dst2[1] = src_l2[1] | src_r0[0] << 24;
5290 dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
5291 dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
5292 break;
5293
5294 case 40:
5295 dst2[2] = src_r0[0];
5296 dst2[3] = src_r0[1];
5297 break;
5298
5299 case 41:
5300 dst2[2] = src_l2[2] | src_r0[0] << 8;
5301 dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
5302 break;
5303
5304 case 42:
5305 dst2[2] = src_l2[2] | src_r0[0] << 16;
5306 dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
5307 break;
5308
5309 case 43:
5310 dst2[2] = src_l2[2] | src_r0[0] << 24;
5311 dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
5312 break;
5313
5314 case 44:
5315 dst2[3] = src_r0[0];
5316 break;
5317
5318 case 45:
5319 dst2[3] = src_l2[3] | src_r0[0] << 8;
5320 break;
5321
5322 case 46:
5323 dst2[3] = src_l2[3] | src_r0[0] << 16;
5324 break;
5325
5326 case 47:
5327 dst2[3] = src_l2[3] | src_r0[0] << 24;
5328 break;
5329 }
5330 }
5331
5332 // before: device_memcat12L
5333 static void memcat_c47_d3x4_sl3x4_sr2x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4])
5334 {
5335 switch (offset)
5336 {
5337 case 0:
5338 dst0[0] = src_r0[0];
5339 dst0[1] = src_r0[1];
5340 dst0[2] = src_r0[2];
5341 dst0[3] = src_r0[3];
5342 dst1[0] = src_r1[0];
5343 dst1[1] = src_r1[1];
5344 dst1[2] = src_r1[2];
5345 dst1[3] = src_r1[3];
5346 break;
5347
5348 case 1:
5349 dst0[0] = src_l0[0] | src_r0[0] << 8;
5350 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
5351 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
5352 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
5353 dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8;
5354 dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8;
5355 dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8;
5356 dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8;
5357 dst2[0] = src_r1[3] >> 24;
5358 break;
5359
5360 case 2:
5361 dst0[0] = src_l0[0] | src_r0[0] << 16;
5362 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
5363 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
5364 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
5365 dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16;
5366 dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16;
5367 dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16;
5368 dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16;
5369 dst2[0] = src_r1[3] >> 16;
5370 break;
5371
5372 case 3:
5373 dst0[0] = src_l0[0] | src_r0[0] << 24;
5374 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
5375 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
5376 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
5377 dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24;
5378 dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24;
5379 dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24;
5380 dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24;
5381 dst2[0] = src_r1[3] >> 8;
5382 break;
5383
5384 case 4:
5385 dst0[1] = src_r0[0];
5386 dst0[2] = src_r0[1];
5387 dst0[3] = src_r0[2];
5388 dst1[0] = src_r0[3];
5389 dst1[1] = src_r1[0];
5390 dst1[2] = src_r1[1];
5391 dst1[3] = src_r1[2];
5392 dst2[0] = src_r1[3];
5393 break;
5394
5395 case 5:
5396 dst0[1] = src_l0[1] | src_r0[0] << 8;
5397 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
5398 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
5399 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
5400 dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8;
5401 dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8;
5402 dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8;
5403 dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8;
5404 dst2[1] = src_r1[3] >> 24;
5405 break;
5406
5407 case 6:
5408 dst0[1] = src_l0[1] | src_r0[0] << 16;
5409 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
5410 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
5411 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
5412 dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16;
5413 dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16;
5414 dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16;
5415 dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16;
5416 dst2[1] = src_r1[3] >> 16;
5417 break;
5418
5419 case 7:
5420 dst0[1] = src_l0[1] | src_r0[0] << 24;
5421 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
5422 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
5423 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
5424 dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24;
5425 dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24;
5426 dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24;
5427 dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24;
5428 dst2[1] = src_r1[3] >> 8;
5429 break;
5430
5431 case 8:
5432 dst0[2] = src_r0[0];
5433 dst0[3] = src_r0[1];
5434 dst1[0] = src_r0[2];
5435 dst1[1] = src_r0[3];
5436 dst1[2] = src_r1[0];
5437 dst1[3] = src_r1[1];
5438 dst2[0] = src_r1[2];
5439 dst2[1] = src_r1[3];
5440 break;
5441
5442 case 9:
5443 dst0[2] = src_l0[2] | src_r0[0] << 8;
5444 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
5445 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
5446 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
5447 dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8;
5448 dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8;
5449 dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8;
5450 dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8;
5451 dst2[2] = src_r1[3] >> 24;
5452 break;
5453
5454 case 10:
5455 dst0[2] = src_l0[2] | src_r0[0] << 16;
5456 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
5457 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
5458 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
5459 dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16;
5460 dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16;
5461 dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16;
5462 dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16;
5463 dst2[2] = src_r1[3] >> 16;
5464 break;
5465
5466 case 11:
5467 dst0[2] = src_l0[2] | src_r0[0] << 24;
5468 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
5469 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
5470 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
5471 dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24;
5472 dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24;
5473 dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24;
5474 dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24;
5475 dst2[2] = src_r1[3] >> 8;
5476 break;
5477
5478 case 12:
5479 dst0[3] = src_r0[0];
5480 dst1[0] = src_r0[1];
5481 dst1[1] = src_r0[2];
5482 dst1[2] = src_r0[3];
5483 dst1[3] = src_r1[0];
5484 dst2[0] = src_r1[1];
5485 dst2[1] = src_r1[2];
5486 dst2[2] = src_r1[3];
5487 break;
5488
5489 case 13:
5490 dst0[3] = src_l0[3] | src_r0[0] << 8;
5491 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
5492 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
5493 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
5494 dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8;
5495 dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8;
5496 dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8;
5497 dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8;
5498 dst2[3] = src_r1[3] >> 24;
5499 break;
5500
5501 case 14:
5502 dst0[3] = src_l0[3] | src_r0[0] << 16;
5503 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
5504 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
5505 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
5506 dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16;
5507 dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16;
5508 dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16;
5509 dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16;
5510 dst2[3] = src_r1[3] >> 16;
5511 break;
5512
5513 case 15:
5514 dst0[3] = src_l0[3] | src_r0[0] << 24;
5515 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
5516 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
5517 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
5518 dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24;
5519 dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24;
5520 dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24;
5521 dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24;
5522 dst2[3] = src_r1[3] >> 8;
5523 break;
5524
5525 case 16:
5526 dst1[0] = src_r0[0];
5527 dst1[1] = src_r0[1];
5528 dst1[2] = src_r0[2];
5529 dst1[3] = src_r0[3];
5530 dst2[0] = src_r1[0];
5531 dst2[1] = src_r1[1];
5532 dst2[2] = src_r1[2];
5533 dst2[3] = src_r1[3];
5534 break;
5535
5536 case 17:
5537 dst1[0] = src_l1[0] | src_r0[0] << 8;
5538 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
5539 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
5540 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
5541 dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8;
5542 dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8;
5543 dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8;
5544 dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8;
5545 break;
5546
5547 case 18:
5548 dst1[0] = src_l1[0] | src_r0[0] << 16;
5549 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
5550 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
5551 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
5552 dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16;
5553 dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16;
5554 dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16;
5555 dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16;
5556 break;
5557
5558 case 19:
5559 dst1[0] = src_l1[0] | src_r0[0] << 24;
5560 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
5561 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
5562 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
5563 dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24;
5564 dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24;
5565 dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24;
5566 dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24;
5567 break;
5568
5569 case 20:
5570 dst1[1] = src_r1[0];
5571 dst1[2] = src_r0[1];
5572 dst1[3] = src_r0[2];
5573 dst2[0] = src_r0[3];
5574 dst2[1] = src_r1[0];
5575 dst2[2] = src_r1[1];
5576 dst2[3] = src_r1[2];
5577 break;
5578
5579 case 21:
5580 dst1[1] = src_l1[1] | src_r0[0] << 8;
5581 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
5582 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
5583 dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
5584 dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8;
5585 dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8;
5586 dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8;
5587 break;
5588
5589 case 22:
5590 dst1[1] = src_l1[1] | src_r0[0] << 16;
5591 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
5592 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
5593 dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
5594 dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16;
5595 dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16;
5596 dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16;
5597 break;
5598
5599 case 23:
5600 dst1[1] = src_l1[1] | src_r0[0] << 24;
5601 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
5602 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
5603 dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
5604 dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24;
5605 dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24;
5606 dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24;
5607 break;
5608
5609 case 24:
5610 dst1[2] = src_r1[0];
5611 dst1[3] = src_r0[1];
5612 dst2[0] = src_r0[2];
5613 dst2[1] = src_r0[3];
5614 dst2[2] = src_r1[0];
5615 dst2[3] = src_r1[1];
5616 break;
5617
5618 case 25:
5619 dst1[2] = src_l1[2] | src_r0[0] << 8;
5620 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
5621 dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
5622 dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
5623 dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8;
5624 dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8;
5625 break;
5626
5627 case 26:
5628 dst1[2] = src_l1[2] | src_r0[0] << 16;
5629 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
5630 dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
5631 dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
5632 dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16;
5633 dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16;
5634 break;
5635
5636 case 27:
5637 dst1[2] = src_l1[2] | src_r0[0] << 24;
5638 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
5639 dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
5640 dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
5641 dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24;
5642 dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24;
5643 break;
5644
5645 case 28:
5646 dst1[3] = src_r1[0];
5647 dst2[0] = src_r0[1];
5648 dst2[1] = src_r0[2];
5649 dst2[2] = src_r0[3];
5650 dst2[3] = src_r1[0];
5651 break;
5652
5653 case 29:
5654 dst1[3] = src_l1[3] | src_r0[0] << 8;
5655 dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
5656 dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
5657 dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
5658 dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8;
5659 break;
5660
5661 case 30:
5662 dst1[3] = src_l1[3] | src_r0[0] << 16;
5663 dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
5664 dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
5665 dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
5666 dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16;
5667 break;
5668
5669 case 31:
5670 dst1[3] = src_l1[3] | src_r0[0] << 24;
5671 dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
5672 dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
5673 dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
5674 dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24;
5675 break;
5676
5677 case 32:
5678 dst2[0] = src_r0[0];
5679 dst2[1] = src_r0[1];
5680 dst2[2] = src_r0[2];
5681 dst2[3] = src_r0[3];
5682 break;
5683
5684 case 33:
5685 dst2[0] = src_l2[0] | src_r0[0] << 8;
5686 dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
5687 dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
5688 dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
5689 break;
5690
5691 case 34:
5692 dst2[0] = src_l2[0] | src_r0[0] << 16;
5693 dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
5694 dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
5695 dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
5696 break;
5697
5698 case 35:
5699 dst2[0] = src_l2[0] | src_r0[0] << 24;
5700 dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
5701 dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
5702 dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
5703 break;
5704
5705 case 36:
5706 dst2[1] = src_r0[0];
5707 dst2[2] = src_r0[1];
5708 dst2[3] = src_r0[2];
5709 break;
5710
5711 case 37:
5712 dst2[1] = src_l2[1] | src_r0[0] << 8;
5713 dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
5714 dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
5715 break;
5716
5717 case 38:
5718 dst2[1] = src_l2[1] | src_r0[0] << 16;
5719 dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
5720 dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
5721 break;
5722
5723 case 39:
5724 dst2[1] = src_l2[1] | src_r0[0] << 24;
5725 dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
5726 dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
5727 break;
5728
5729 case 40:
5730 dst2[2] = src_r0[0];
5731 dst2[3] = src_r0[1];
5732 break;
5733
5734 case 41:
5735 dst2[2] = src_l2[2] | src_r0[0] << 8;
5736 dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
5737 break;
5738
5739 case 42:
5740 dst2[2] = src_l2[2] | src_r0[0] << 16;
5741 dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
5742 break;
5743
5744 case 43:
5745 dst2[2] = src_l2[2] | src_r0[0] << 24;
5746 dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
5747 break;
5748
5749 case 44:
5750 dst2[3] = src_r0[0];
5751 break;
5752
5753 case 45:
5754 dst2[3] = src_l2[3] | src_r0[0] << 8;
5755 break;
5756
5757 case 46:
5758 dst2[3] = src_l2[3] | src_r0[0] << 16;
5759 break;
5760
5761 case 47:
5762 dst2[3] = src_l2[3] | src_r0[0] << 24;
5763 break;
5764 }
5765 }
5766
5767 // before: memcat16_9
5768 static void memcat_c15_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
5769 {
5770 switch (offset)
5771 {
5772 case 0:
5773 w0[0] = append0[0];
5774 w0[1] = append0[1];
5775 w0[2] = append0[2];
5776 w0[3] = append0[3];
5777 w1[0] = append1[0];
5778 w1[1] = append1[1];
5779 w1[2] = append1[2];
5780 w1[3] = append1[3];
5781 w2[0] = append2[0];
5782 break;
5783
5784 case 1:
5785 w0[0] = w0[0] | append0[0] << 8;
5786 w0[1] = append0[0] >> 24 | append0[1] << 8;
5787 w0[2] = append0[1] >> 24 | append0[2] << 8;
5788 w0[3] = append0[2] >> 24 | append0[3] << 8;
5789 w1[0] = append0[3] >> 24 | append1[0] << 8;
5790 w1[1] = append1[0] >> 24 | append1[1] << 8;
5791 w1[2] = append1[1] >> 24 | append1[2] << 8;
5792 w1[3] = append1[2] >> 24 | append1[3] << 8;
5793 w2[0] = append1[3] >> 24 | append2[0] << 8;
5794 w2[1] = append2[0] >> 24;
5795 break;
5796
5797 case 2:
5798 w0[0] = w0[0] | append0[0] << 16;
5799 w0[1] = append0[0] >> 16 | append0[1] << 16;
5800 w0[2] = append0[1] >> 16 | append0[2] << 16;
5801 w0[3] = append0[2] >> 16 | append0[3] << 16;
5802 w1[0] = append0[3] >> 16 | append1[0] << 16;
5803 w1[1] = append1[0] >> 16 | append1[1] << 16;
5804 w1[2] = append1[1] >> 16 | append1[2] << 16;
5805 w1[3] = append1[2] >> 16 | append1[3] << 16;
5806 w2[0] = append1[3] >> 16 | append2[0] << 16;
5807 w2[1] = append2[0] >> 16;
5808 break;
5809
5810 case 3:
5811 w0[0] = w0[0] | append0[0] << 24;
5812 w0[1] = append0[0] >> 8 | append0[1] << 24;
5813 w0[2] = append0[1] >> 8 | append0[2] << 24;
5814 w0[3] = append0[2] >> 8 | append0[3] << 24;
5815 w1[0] = append0[3] >> 8 | append1[0] << 24;
5816 w1[1] = append1[0] >> 8 | append1[1] << 24;
5817 w1[2] = append1[1] >> 8 | append1[2] << 24;
5818 w1[3] = append1[2] >> 8 | append1[3] << 24;
5819 w2[0] = append1[3] >> 8 | append2[0] << 24;
5820 w2[1] = append2[0] >> 8;
5821 break;
5822
5823 case 4:
5824 w0[1] = append0[0];
5825 w0[2] = append0[1];
5826 w0[3] = append0[2];
5827 w1[0] = append0[3];
5828 w1[1] = append1[0];
5829 w1[2] = append1[1];
5830 w1[3] = append1[2];
5831 w2[0] = append1[3];
5832 w2[1] = append2[0];
5833 break;
5834
5835 case 5:
5836 w0[1] = w0[1] | append0[0] << 8;
5837 w0[2] = append0[0] >> 24 | append0[1] << 8;
5838 w0[3] = append0[1] >> 24 | append0[2] << 8;
5839 w1[0] = append0[2] >> 24 | append0[3] << 8;
5840 w1[1] = append0[3] >> 24 | append1[0] << 8;
5841 w1[2] = append1[0] >> 24 | append1[1] << 8;
5842 w1[3] = append1[1] >> 24 | append1[2] << 8;
5843 w2[0] = append1[2] >> 24 | append1[3] << 8;
5844 w2[1] = append1[3] >> 24 | append2[0] << 8;
5845 w2[2] = append2[0] >> 24;
5846 break;
5847
5848 case 6:
5849 w0[1] = w0[1] | append0[0] << 16;
5850 w0[2] = append0[0] >> 16 | append0[1] << 16;
5851 w0[3] = append0[1] >> 16 | append0[2] << 16;
5852 w1[0] = append0[2] >> 16 | append0[3] << 16;
5853 w1[1] = append0[3] >> 16 | append1[0] << 16;
5854 w1[2] = append1[0] >> 16 | append1[1] << 16;
5855 w1[3] = append1[1] >> 16 | append1[2] << 16;
5856 w2[0] = append1[2] >> 16 | append1[3] << 16;
5857 w2[1] = append1[3] >> 16 | append2[0] << 16;
5858 w2[2] = append2[0] >> 16;
5859 break;
5860
5861 case 7:
5862 w0[1] = w0[1] | append0[0] << 24;
5863 w0[2] = append0[0] >> 8 | append0[1] << 24;
5864 w0[3] = append0[1] >> 8 | append0[2] << 24;
5865 w1[0] = append0[2] >> 8 | append0[3] << 24;
5866 w1[1] = append0[3] >> 8 | append1[0] << 24;
5867 w1[2] = append1[0] >> 8 | append1[1] << 24;
5868 w1[3] = append1[1] >> 8 | append1[2] << 24;
5869 w2[0] = append1[2] >> 8 | append1[3] << 24;
5870 w2[1] = append1[3] >> 8 | append2[0] << 24;
5871 w2[2] = append2[0] >> 8;
5872 break;
5873
5874 case 8:
5875 w0[2] = append0[0];
5876 w0[3] = append0[1];
5877 w1[0] = append0[2];
5878 w1[1] = append0[3];
5879 w1[2] = append1[0];
5880 w1[3] = append1[1];
5881 w2[0] = append1[2];
5882 w2[1] = append1[3];
5883 w2[2] = append2[0];
5884 break;
5885
5886 case 9:
5887 w0[2] = w0[2] | append0[0] << 8;
5888 w0[3] = append0[0] >> 24 | append0[1] << 8;
5889 w1[0] = append0[1] >> 24 | append0[2] << 8;
5890 w1[1] = append0[2] >> 24 | append0[3] << 8;
5891 w1[2] = append0[3] >> 24 | append1[0] << 8;
5892 w1[3] = append1[0] >> 24 | append1[1] << 8;
5893 w2[0] = append1[1] >> 24 | append1[2] << 8;
5894 w2[1] = append1[2] >> 24 | append1[3] << 8;
5895 w2[2] = append1[3] >> 24 | append2[0] << 8;
5896 w2[3] = append2[0] >> 24;
5897 break;
5898
5899 case 10:
5900 w0[2] = w0[2] | append0[0] << 16;
5901 w0[3] = append0[0] >> 16 | append0[1] << 16;
5902 w1[0] = append0[1] >> 16 | append0[2] << 16;
5903 w1[1] = append0[2] >> 16 | append0[3] << 16;
5904 w1[2] = append0[3] >> 16 | append1[0] << 16;
5905 w1[3] = append1[0] >> 16 | append1[1] << 16;
5906 w2[0] = append1[1] >> 16 | append1[2] << 16;
5907 w2[1] = append1[2] >> 16 | append1[3] << 16;
5908 w2[2] = append1[3] >> 16 | append2[0] << 16;
5909 w2[3] = append2[0] >> 16;
5910 break;
5911
5912 case 11:
5913 w0[2] = w0[2] | append0[0] << 24;
5914 w0[3] = append0[0] >> 8 | append0[1] << 24;
5915 w1[0] = append0[1] >> 8 | append0[2] << 24;
5916 w1[1] = append0[2] >> 8 | append0[3] << 24;
5917 w1[2] = append0[3] >> 8 | append1[0] << 24;
5918 w1[3] = append1[0] >> 8 | append1[1] << 24;
5919 w2[0] = append1[1] >> 8 | append1[2] << 24;
5920 w2[1] = append1[2] >> 8 | append1[3] << 24;
5921 w2[2] = append1[3] >> 8 | append2[0] << 24;
5922 w2[3] = append2[0] >> 8;
5923 break;
5924
5925 case 12:
5926 w0[3] = append0[0];
5927 w1[0] = append0[1];
5928 w1[1] = append0[2];
5929 w1[2] = append0[3];
5930 w1[3] = append1[0];
5931 w2[0] = append1[1];
5932 w2[1] = append1[2];
5933 w2[2] = append1[3];
5934 w2[3] = append2[0];
5935 break;
5936
5937 case 13:
5938 w0[3] = w0[3] | append0[0] << 8;
5939 w1[0] = append0[0] >> 24 | append0[1] << 8;
5940 w1[1] = append0[1] >> 24 | append0[2] << 8;
5941 w1[2] = append0[2] >> 24 | append0[3] << 8;
5942 w1[3] = append0[3] >> 24 | append1[0] << 8;
5943 w2[0] = append1[0] >> 24 | append1[1] << 8;
5944 w2[1] = append1[1] >> 24 | append1[2] << 8;
5945 w2[2] = append1[2] >> 24 | append1[3] << 8;
5946 w2[3] = append1[3] >> 24 | append2[0] << 8;
5947 w3[0] = append2[0] >> 24;
5948 break;
5949
5950 case 14:
5951 w0[3] = w0[3] | append0[0] << 16;
5952 w1[0] = append0[0] >> 16 | append0[1] << 16;
5953 w1[1] = append0[1] >> 16 | append0[2] << 16;
5954 w1[2] = append0[2] >> 16 | append0[3] << 16;
5955 w1[3] = append0[3] >> 16 | append1[0] << 16;
5956 w2[0] = append1[0] >> 16 | append1[1] << 16;
5957 w2[1] = append1[1] >> 16 | append1[2] << 16;
5958 w2[2] = append1[2] >> 16 | append1[3] << 16;
5959 w2[3] = append1[3] >> 16 | append2[0] << 16;
5960 w3[0] = append2[0] >> 16;
5961 break;
5962
5963 case 15:
5964 w0[3] = w0[3] | append0[0] << 24;
5965 w1[0] = append0[0] >> 8 | append0[1] << 24;
5966 w1[1] = append0[1] >> 8 | append0[2] << 24;
5967 w1[2] = append0[2] >> 8 | append0[3] << 24;
5968 w1[3] = append0[3] >> 8 | append1[0] << 24;
5969 w2[0] = append1[0] >> 8 | append1[1] << 24;
5970 w2[1] = append1[1] >> 8 | append1[2] << 24;
5971 w2[2] = append1[2] >> 8 | append1[3] << 24;
5972 w2[3] = append1[3] >> 8 | append2[0] << 24;
5973 w3[0] = append2[0] >> 8;
5974 break;
5975 }
5976 }
5977
5978 // before: memcat32_8
5979 static void memcat_c32_w4x4_a2x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset)
5980 {
5981 switch (offset)
5982 {
5983 case 0:
5984 w0[0] = append0[0];
5985 w0[1] = append0[1];
5986 w0[2] = append0[2];
5987 w0[3] = append0[3];
5988 w1[0] = append1[0];
5989 w1[1] = append1[1];
5990 w1[2] = append1[2];
5991 w1[3] = append1[3];
5992 break;
5993
5994 case 1:
5995 w0[0] = w0[0] | append0[0] << 8;
5996 w0[1] = append0[0] >> 24 | append0[1] << 8;
5997 w0[2] = append0[1] >> 24 | append0[2] << 8;
5998 w0[3] = append0[2] >> 24 | append0[3] << 8;
5999 w1[0] = append0[3] >> 24 | append1[0] << 8;
6000 w1[1] = append1[0] >> 24 | append1[1] << 8;
6001 w1[2] = append1[1] >> 24 | append1[2] << 8;
6002 w1[3] = append1[2] >> 24 | append1[3] << 8;
6003 w2[0] = append1[3] >> 24;
6004 break;
6005
6006 case 2:
6007 w0[0] = w0[0] | append0[0] << 16;
6008 w0[1] = append0[0] >> 16 | append0[1] << 16;
6009 w0[2] = append0[1] >> 16 | append0[2] << 16;
6010 w0[3] = append0[2] >> 16 | append0[3] << 16;
6011 w1[0] = append0[3] >> 16 | append1[0] << 16;
6012 w1[1] = append1[0] >> 16 | append1[1] << 16;
6013 w1[2] = append1[1] >> 16 | append1[2] << 16;
6014 w1[3] = append1[2] >> 16 | append1[3] << 16;
6015 w2[0] = append1[3] >> 16;
6016 break;
6017
6018 case 3:
6019 w0[0] = w0[0] | append0[0] << 24;
6020 w0[1] = append0[0] >> 8 | append0[1] << 24;
6021 w0[2] = append0[1] >> 8 | append0[2] << 24;
6022 w0[3] = append0[2] >> 8 | append0[3] << 24;
6023 w1[0] = append0[3] >> 8 | append1[0] << 24;
6024 w1[1] = append1[0] >> 8 | append1[1] << 24;
6025 w1[2] = append1[1] >> 8 | append1[2] << 24;
6026 w1[3] = append1[2] >> 8 | append1[3] << 24;
6027 w2[0] = append1[3] >> 8;
6028 break;
6029
6030 case 4:
6031 w0[1] = append0[0];
6032 w0[2] = append0[1];
6033 w0[3] = append0[2];
6034 w1[0] = append0[3];
6035 w1[1] = append1[0];
6036 w1[2] = append1[1];
6037 w1[3] = append1[2];
6038 w2[0] = append1[3];
6039 break;
6040
6041 case 5:
6042 w0[1] = w0[1] | append0[0] << 8;
6043 w0[2] = append0[0] >> 24 | append0[1] << 8;
6044 w0[3] = append0[1] >> 24 | append0[2] << 8;
6045 w1[0] = append0[2] >> 24 | append0[3] << 8;
6046 w1[1] = append0[3] >> 24 | append1[0] << 8;
6047 w1[2] = append1[0] >> 24 | append1[1] << 8;
6048 w1[3] = append1[1] >> 24 | append1[2] << 8;
6049 w2[0] = append1[2] >> 24 | append1[3] << 8;
6050 w2[1] = append1[3] >> 24;
6051 break;
6052
6053 case 6:
6054 w0[1] = w0[1] | append0[0] << 16;
6055 w0[2] = append0[0] >> 16 | append0[1] << 16;
6056 w0[3] = append0[1] >> 16 | append0[2] << 16;
6057 w1[0] = append0[2] >> 16 | append0[3] << 16;
6058 w1[1] = append0[3] >> 16 | append1[0] << 16;
6059 w1[2] = append1[0] >> 16 | append1[1] << 16;
6060 w1[3] = append1[1] >> 16 | append1[2] << 16;
6061 w2[0] = append1[2] >> 16 | append1[3] << 16;
6062 w2[1] = append1[3] >> 16;
6063 break;
6064
6065 case 7:
6066 w0[1] = w0[1] | append0[0] << 24;
6067 w0[2] = append0[0] >> 8 | append0[1] << 24;
6068 w0[3] = append0[1] >> 8 | append0[2] << 24;
6069 w1[0] = append0[2] >> 8 | append0[3] << 24;
6070 w1[1] = append0[3] >> 8 | append1[0] << 24;
6071 w1[2] = append1[0] >> 8 | append1[1] << 24;
6072 w1[3] = append1[1] >> 8 | append1[2] << 24;
6073 w2[0] = append1[2] >> 8 | append1[3] << 24;
6074 w2[1] = append1[3] >> 8;
6075 break;
6076
6077 case 8:
6078 w0[2] = append0[0];
6079 w0[3] = append0[1];
6080 w1[0] = append0[2];
6081 w1[1] = append0[3];
6082 w1[2] = append1[0];
6083 w1[3] = append1[1];
6084 w2[0] = append1[2];
6085 w2[1] = append1[3];
6086 break;
6087
6088 case 9:
6089 w0[2] = w0[2] | append0[0] << 8;
6090 w0[3] = append0[0] >> 24 | append0[1] << 8;
6091 w1[0] = append0[1] >> 24 | append0[2] << 8;
6092 w1[1] = append0[2] >> 24 | append0[3] << 8;
6093 w1[2] = append0[3] >> 24 | append1[0] << 8;
6094 w1[3] = append1[0] >> 24 | append1[1] << 8;
6095 w2[0] = append1[1] >> 24 | append1[2] << 8;
6096 w2[1] = append1[2] >> 24 | append1[3] << 8;
6097 w2[2] = append1[3] >> 24;
6098 break;
6099
6100 case 10:
6101 w0[2] = w0[2] | append0[0] << 16;
6102 w0[3] = append0[0] >> 16 | append0[1] << 16;
6103 w1[0] = append0[1] >> 16 | append0[2] << 16;
6104 w1[1] = append0[2] >> 16 | append0[3] << 16;
6105 w1[2] = append0[3] >> 16 | append1[0] << 16;
6106 w1[3] = append1[0] >> 16 | append1[1] << 16;
6107 w2[0] = append1[1] >> 16 | append1[2] << 16;
6108 w2[1] = append1[2] >> 16 | append1[3] << 16;
6109 w2[2] = append1[3] >> 16;
6110 break;
6111
6112 case 11:
6113 w0[2] = w0[2] | append0[0] << 24;
6114 w0[3] = append0[0] >> 8 | append0[1] << 24;
6115 w1[0] = append0[1] >> 8 | append0[2] << 24;
6116 w1[1] = append0[2] >> 8 | append0[3] << 24;
6117 w1[2] = append0[3] >> 8 | append1[0] << 24;
6118 w1[3] = append1[0] >> 8 | append1[1] << 24;
6119 w2[0] = append1[1] >> 8 | append1[2] << 24;
6120 w2[1] = append1[2] >> 8 | append1[3] << 24;
6121 w2[2] = append1[3] >> 8;
6122 break;
6123
6124 case 12:
6125 w0[3] = append0[0];
6126 w1[0] = append0[1];
6127 w1[1] = append0[2];
6128 w1[2] = append0[3];
6129 w1[3] = append1[0];
6130 w2[0] = append1[1];
6131 w2[1] = append1[2];
6132 w2[2] = append1[3];
6133 break;
6134
6135 case 13:
6136 w0[3] = w0[3] | append0[0] << 8;
6137 w1[0] = append0[0] >> 24 | append0[1] << 8;
6138 w1[1] = append0[1] >> 24 | append0[2] << 8;
6139 w1[2] = append0[2] >> 24 | append0[3] << 8;
6140 w1[3] = append0[3] >> 24 | append1[0] << 8;
6141 w2[0] = append1[0] >> 24 | append1[1] << 8;
6142 w2[1] = append1[1] >> 24 | append1[2] << 8;
6143 w2[2] = append1[2] >> 24 | append1[3] << 8;
6144 w2[3] = append1[3] >> 24;
6145 break;
6146
6147 case 14:
6148 w0[3] = w0[3] | append0[0] << 16;
6149 w1[0] = append0[0] >> 16 | append0[1] << 16;
6150 w1[1] = append0[1] >> 16 | append0[2] << 16;
6151 w1[2] = append0[2] >> 16 | append0[3] << 16;
6152 w1[3] = append0[3] >> 16 | append1[0] << 16;
6153 w2[0] = append1[0] >> 16 | append1[1] << 16;
6154 w2[1] = append1[1] >> 16 | append1[2] << 16;
6155 w2[2] = append1[2] >> 16 | append1[3] << 16;
6156 w2[3] = append1[3] >> 16;
6157 break;
6158
6159 case 15:
6160 w0[3] = w0[3] | append0[0] << 24;
6161 w1[0] = append0[0] >> 8 | append0[1] << 24;
6162 w1[1] = append0[1] >> 8 | append0[2] << 24;
6163 w1[2] = append0[2] >> 8 | append0[3] << 24;
6164 w1[3] = append0[3] >> 8 | append1[0] << 24;
6165 w2[0] = append1[0] >> 8 | append1[1] << 24;
6166 w2[1] = append1[1] >> 8 | append1[2] << 24;
6167 w2[2] = append1[2] >> 8 | append1[3] << 24;
6168 w2[3] = append1[3] >> 8;
6169 break;
6170
6171 case 16:
6172 w1[0] = append0[0];
6173 w1[1] = append0[1];
6174 w1[2] = append0[2];
6175 w1[3] = append0[3];
6176 w2[0] = append1[0];
6177 w2[1] = append1[1];
6178 w2[2] = append1[2];
6179 w2[3] = append1[3];
6180 break;
6181
6182 case 17:
6183 w1[0] = w1[0] | append0[0] << 8;
6184 w1[1] = append0[0] >> 24 | append0[1] << 8;
6185 w1[2] = append0[1] >> 24 | append0[2] << 8;
6186 w1[3] = append0[2] >> 24 | append0[3] << 8;
6187 w2[0] = append0[3] >> 24 | append1[0] << 8;
6188 w2[1] = append1[0] >> 24 | append1[1] << 8;
6189 w2[2] = append1[1] >> 24 | append1[2] << 8;
6190 w2[3] = append1[2] >> 24 | append1[3] << 8;
6191 w3[0] = append1[3] >> 24;
6192 break;
6193
6194 case 18:
6195 w1[0] = w1[0] | append0[0] << 16;
6196 w1[1] = append0[0] >> 16 | append0[1] << 16;
6197 w1[2] = append0[1] >> 16 | append0[2] << 16;
6198 w1[3] = append0[2] >> 16 | append0[3] << 16;
6199 w2[0] = append0[3] >> 16 | append1[0] << 16;
6200 w2[1] = append1[0] >> 16 | append1[1] << 16;
6201 w2[2] = append1[1] >> 16 | append1[2] << 16;
6202 w2[3] = append1[2] >> 16 | append1[3] << 16;
6203 w3[0] = append1[3] >> 16;
6204 break;
6205
6206 case 19:
6207 w1[0] = w1[0] | append0[0] << 24;
6208 w1[1] = append0[0] >> 8 | append0[1] << 24;
6209 w1[2] = append0[1] >> 8 | append0[2] << 24;
6210 w1[3] = append0[2] >> 8 | append0[3] << 24;
6211 w2[0] = append0[3] >> 8 | append1[0] << 24;
6212 w2[1] = append1[0] >> 8 | append1[1] << 24;
6213 w2[2] = append1[1] >> 8 | append1[2] << 24;
6214 w2[3] = append1[2] >> 8 | append1[3] << 24;
6215 w3[0] = append1[3] >> 8;
6216 break;
6217
6218 case 20:
6219 w1[1] = append0[0];
6220 w1[2] = append0[1];
6221 w1[3] = append0[2];
6222 w2[0] = append0[3];
6223 w2[1] = append1[0];
6224 w2[2] = append1[1];
6225 w2[3] = append1[2];
6226 w3[0] = append1[3];
6227 break;
6228
6229 case 21:
6230 w1[1] = w1[1] | append0[0] << 8;
6231 w1[2] = append0[0] >> 24 | append0[1] << 8;
6232 w1[3] = append0[1] >> 24 | append0[2] << 8;
6233 w2[0] = append0[2] >> 24 | append0[3] << 8;
6234 w2[1] = append0[3] >> 24 | append1[0] << 8;
6235 w2[2] = append1[0] >> 24 | append1[1] << 8;
6236 w2[3] = append1[1] >> 24 | append1[2] << 8;
6237 w3[0] = append1[2] >> 24 | append1[3] << 8;
6238 w3[1] = append1[3] >> 24;
6239 break;
6240
6241 case 22:
6242 w1[1] = w1[1] | append0[0] << 16;
6243 w1[2] = append0[0] >> 16 | append0[1] << 16;
6244 w1[3] = append0[1] >> 16 | append0[2] << 16;
6245 w2[0] = append0[2] >> 16 | append0[3] << 16;
6246 w2[1] = append0[3] >> 16 | append1[0] << 16;
6247 w2[2] = append1[0] >> 16 | append1[1] << 16;
6248 w2[3] = append1[1] >> 16 | append1[2] << 16;
6249 w3[0] = append1[2] >> 16 | append1[3] << 16;
6250 w3[1] = append1[3] >> 16;
6251 break;
6252
6253 case 23:
6254 w1[1] = w1[1] | append0[0] << 24;
6255 w1[2] = append0[0] >> 8 | append0[1] << 24;
6256 w1[3] = append0[1] >> 8 | append0[2] << 24;
6257 w2[0] = append0[2] >> 8 | append0[3] << 24;
6258 w2[1] = append0[3] >> 8 | append1[0] << 24;
6259 w2[2] = append1[0] >> 8 | append1[1] << 24;
6260 w2[3] = append1[1] >> 8 | append1[2] << 24;
6261 w3[0] = append1[2] >> 8 | append1[3] << 24;
6262 w3[1] = append1[3] >> 8;
6263 break;
6264
6265 case 24:
6266 w1[2] = append0[0];
6267 w1[3] = append0[1];
6268 w2[0] = append0[2];
6269 w2[1] = append0[3];
6270 w2[2] = append1[0];
6271 w2[3] = append1[1];
6272 w3[0] = append1[2];
6273 w3[1] = append1[3];
6274 break;
6275
6276 case 25:
6277 w1[2] = w1[2] | append0[0] << 8;
6278 w1[3] = append0[0] >> 24 | append0[1] << 8;
6279 w2[0] = append0[1] >> 24 | append0[2] << 8;
6280 w2[1] = append0[2] >> 24 | append0[3] << 8;
6281 w2[2] = append0[3] >> 24 | append1[0] << 8;
6282 w2[3] = append1[0] >> 24 | append1[1] << 8;
6283 w3[0] = append1[1] >> 24 | append1[2] << 8;
6284 w3[1] = append1[2] >> 24 | append1[3] << 8;
6285 break;
6286
6287 case 26:
6288 w1[2] = w1[2] | append0[0] << 16;
6289 w1[3] = append0[0] >> 16 | append0[1] << 16;
6290 w2[0] = append0[1] >> 16 | append0[2] << 16;
6291 w2[1] = append0[2] >> 16 | append0[3] << 16;
6292 w2[2] = append0[3] >> 16 | append1[0] << 16;
6293 w2[3] = append1[0] >> 16 | append1[1] << 16;
6294 w3[0] = append1[1] >> 16 | append1[2] << 16;
6295 w3[1] = append1[2] >> 16 | append1[3] << 16;
6296 break;
6297
6298 case 27:
6299 w1[2] = w1[2] | append0[0] << 24;
6300 w1[3] = append0[0] >> 8 | append0[1] << 24;
6301 w2[0] = append0[1] >> 8 | append0[2] << 24;
6302 w2[1] = append0[2] >> 8 | append0[3] << 24;
6303 w2[2] = append0[3] >> 8 | append1[0] << 24;
6304 w2[3] = append1[0] >> 8 | append1[1] << 24;
6305 w3[0] = append1[1] >> 8 | append1[2] << 24;
6306 w3[1] = append1[2] >> 8 | append1[3] << 24;
6307 break;
6308
6309 case 28:
6310 w1[3] = append0[0];
6311 w2[0] = append0[1];
6312 w2[1] = append0[2];
6313 w2[2] = append0[3];
6314 w2[3] = append1[0];
6315 w3[0] = append1[1];
6316 w3[1] = append1[2];
6317 break;
6318
6319 case 29:
6320 w1[3] = w1[3] | append0[0] << 8;
6321 w2[0] = append0[0] >> 24 | append0[1] << 8;
6322 w2[1] = append0[1] >> 24 | append0[2] << 8;
6323 w2[2] = append0[2] >> 24 | append0[3] << 8;
6324 w2[3] = append0[3] >> 24 | append1[0] << 8;
6325 w3[0] = append1[0] >> 24 | append1[1] << 8;
6326 w3[1] = append1[1] >> 24 | append1[2] << 8;
6327 break;
6328
6329 case 30:
6330 w1[3] = w1[3] | append0[0] << 16;
6331 w2[0] = append0[0] >> 16 | append0[1] << 16;
6332 w2[1] = append0[1] >> 16 | append0[2] << 16;
6333 w2[2] = append0[2] >> 16 | append0[3] << 16;
6334 w2[3] = append0[3] >> 16 | append1[0] << 16;
6335 w3[0] = append1[0] >> 16 | append1[1] << 16;
6336 w3[1] = append1[1] >> 16 | append1[2] << 16;
6337 break;
6338
6339 case 31:
6340 w1[3] = w1[3] | append0[0] << 24;
6341 w2[0] = append0[0] >> 8 | append0[1] << 24;
6342 w2[1] = append0[1] >> 8 | append0[2] << 24;
6343 w2[2] = append0[2] >> 8 | append0[3] << 24;
6344 w2[3] = append0[3] >> 8 | append1[0] << 24;
6345 w3[0] = append1[0] >> 8 | append1[1] << 24;
6346 w3[1] = append1[1] >> 8 | append1[2] << 24;
6347 break;
6348
6349 case 32:
6350 w2[0] = append0[0];
6351 w2[1] = append0[1];
6352 w2[2] = append0[2];
6353 w2[3] = append0[3];
6354 w3[0] = append1[0];
6355 w3[1] = append1[1];
6356 break;
6357 }
6358 }
6359
6360 // before: memcat32_9
6361 static void memcat_c32_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
6362 {
6363 switch (offset)
6364 {
6365 case 0:
6366 w0[0] = append0[0];
6367 w0[1] = append0[1];
6368 w0[2] = append0[2];
6369 w0[3] = append0[3];
6370 w1[0] = append1[0];
6371 w1[1] = append1[1];
6372 w1[2] = append1[2];
6373 w1[3] = append1[3];
6374 w2[0] = append2[0];
6375 break;
6376
6377 case 1:
6378 w0[0] = w0[0] | append0[0] << 8;
6379 w0[1] = append0[0] >> 24 | append0[1] << 8;
6380 w0[2] = append0[1] >> 24 | append0[2] << 8;
6381 w0[3] = append0[2] >> 24 | append0[3] << 8;
6382 w1[0] = append0[3] >> 24 | append1[0] << 8;
6383 w1[1] = append1[0] >> 24 | append1[1] << 8;
6384 w1[2] = append1[1] >> 24 | append1[2] << 8;
6385 w1[3] = append1[2] >> 24 | append1[3] << 8;
6386 w2[0] = append1[3] >> 24 | append2[0] << 8;
6387 w2[1] = append2[0] >> 24;
6388 break;
6389
6390 case 2:
6391 w0[0] = w0[0] | append0[0] << 16;
6392 w0[1] = append0[0] >> 16 | append0[1] << 16;
6393 w0[2] = append0[1] >> 16 | append0[2] << 16;
6394 w0[3] = append0[2] >> 16 | append0[3] << 16;
6395 w1[0] = append0[3] >> 16 | append1[0] << 16;
6396 w1[1] = append1[0] >> 16 | append1[1] << 16;
6397 w1[2] = append1[1] >> 16 | append1[2] << 16;
6398 w1[3] = append1[2] >> 16 | append1[3] << 16;
6399 w2[0] = append1[3] >> 16 | append2[0] << 16;
6400 w2[1] = append2[0] >> 16;
6401 break;
6402
6403 case 3:
6404 w0[0] = w0[0] | append0[0] << 24;
6405 w0[1] = append0[0] >> 8 | append0[1] << 24;
6406 w0[2] = append0[1] >> 8 | append0[2] << 24;
6407 w0[3] = append0[2] >> 8 | append0[3] << 24;
6408 w1[0] = append0[3] >> 8 | append1[0] << 24;
6409 w1[1] = append1[0] >> 8 | append1[1] << 24;
6410 w1[2] = append1[1] >> 8 | append1[2] << 24;
6411 w1[3] = append1[2] >> 8 | append1[3] << 24;
6412 w2[0] = append1[3] >> 8 | append2[0] << 24;
6413 w2[1] = append2[0] >> 8;
6414 break;
6415
6416 case 4:
6417 w0[1] = append0[0];
6418 w0[2] = append0[1];
6419 w0[3] = append0[2];
6420 w1[0] = append0[3];
6421 w1[1] = append1[0];
6422 w1[2] = append1[1];
6423 w1[3] = append1[2];
6424 w2[0] = append1[3];
6425 w2[1] = append2[0];
6426 break;
6427
6428 case 5:
6429 w0[1] = w0[1] | append0[0] << 8;
6430 w0[2] = append0[0] >> 24 | append0[1] << 8;
6431 w0[3] = append0[1] >> 24 | append0[2] << 8;
6432 w1[0] = append0[2] >> 24 | append0[3] << 8;
6433 w1[1] = append0[3] >> 24 | append1[0] << 8;
6434 w1[2] = append1[0] >> 24 | append1[1] << 8;
6435 w1[3] = append1[1] >> 24 | append1[2] << 8;
6436 w2[0] = append1[2] >> 24 | append1[3] << 8;
6437 w2[1] = append1[3] >> 24 | append2[0] << 8;
6438 w2[2] = append2[0] >> 24;
6439 break;
6440
6441 case 6:
6442 w0[1] = w0[1] | append0[0] << 16;
6443 w0[2] = append0[0] >> 16 | append0[1] << 16;
6444 w0[3] = append0[1] >> 16 | append0[2] << 16;
6445 w1[0] = append0[2] >> 16 | append0[3] << 16;
6446 w1[1] = append0[3] >> 16 | append1[0] << 16;
6447 w1[2] = append1[0] >> 16 | append1[1] << 16;
6448 w1[3] = append1[1] >> 16 | append1[2] << 16;
6449 w2[0] = append1[2] >> 16 | append1[3] << 16;
6450 w2[1] = append1[3] >> 16 | append2[0] << 16;
6451 w2[2] = append2[0] >> 16;
6452 break;
6453
6454 case 7:
6455 w0[1] = w0[1] | append0[0] << 24;
6456 w0[2] = append0[0] >> 8 | append0[1] << 24;
6457 w0[3] = append0[1] >> 8 | append0[2] << 24;
6458 w1[0] = append0[2] >> 8 | append0[3] << 24;
6459 w1[1] = append0[3] >> 8 | append1[0] << 24;
6460 w1[2] = append1[0] >> 8 | append1[1] << 24;
6461 w1[3] = append1[1] >> 8 | append1[2] << 24;
6462 w2[0] = append1[2] >> 8 | append1[3] << 24;
6463 w2[1] = append1[3] >> 8 | append2[0] << 24;
6464 w2[2] = append2[0] >> 8;
6465 break;
6466
6467 case 8:
6468 w0[2] = append0[0];
6469 w0[3] = append0[1];
6470 w1[0] = append0[2];
6471 w1[1] = append0[3];
6472 w1[2] = append1[0];
6473 w1[3] = append1[1];
6474 w2[0] = append1[2];
6475 w2[1] = append1[3];
6476 w2[2] = append2[0];
6477 break;
6478
6479 case 9:
6480 w0[2] = w0[2] | append0[0] << 8;
6481 w0[3] = append0[0] >> 24 | append0[1] << 8;
6482 w1[0] = append0[1] >> 24 | append0[2] << 8;
6483 w1[1] = append0[2] >> 24 | append0[3] << 8;
6484 w1[2] = append0[3] >> 24 | append1[0] << 8;
6485 w1[3] = append1[0] >> 24 | append1[1] << 8;
6486 w2[0] = append1[1] >> 24 | append1[2] << 8;
6487 w2[1] = append1[2] >> 24 | append1[3] << 8;
6488 w2[2] = append1[3] >> 24 | append2[0] << 8;
6489 w2[3] = append2[0] >> 24;
6490 break;
6491
6492 case 10:
6493 w0[2] = w0[2] | append0[0] << 16;
6494 w0[3] = append0[0] >> 16 | append0[1] << 16;
6495 w1[0] = append0[1] >> 16 | append0[2] << 16;
6496 w1[1] = append0[2] >> 16 | append0[3] << 16;
6497 w1[2] = append0[3] >> 16 | append1[0] << 16;
6498 w1[3] = append1[0] >> 16 | append1[1] << 16;
6499 w2[0] = append1[1] >> 16 | append1[2] << 16;
6500 w2[1] = append1[2] >> 16 | append1[3] << 16;
6501 w2[2] = append1[3] >> 16 | append2[0] << 16;
6502 w2[3] = append2[0] >> 16;
6503 break;
6504
6505 case 11:
6506 w0[2] = w0[2] | append0[0] << 24;
6507 w0[3] = append0[0] >> 8 | append0[1] << 24;
6508 w1[0] = append0[1] >> 8 | append0[2] << 24;
6509 w1[1] = append0[2] >> 8 | append0[3] << 24;
6510 w1[2] = append0[3] >> 8 | append1[0] << 24;
6511 w1[3] = append1[0] >> 8 | append1[1] << 24;
6512 w2[0] = append1[1] >> 8 | append1[2] << 24;
6513 w2[1] = append1[2] >> 8 | append1[3] << 24;
6514 w2[2] = append1[3] >> 8 | append2[0] << 24;
6515 w2[3] = append2[0] >> 8;
6516 break;
6517
6518 case 12:
6519 w0[3] = append0[0];
6520 w1[0] = append0[1];
6521 w1[1] = append0[2];
6522 w1[2] = append0[3];
6523 w1[3] = append1[0];
6524 w2[0] = append1[1];
6525 w2[1] = append1[2];
6526 w2[2] = append1[3];
6527 w2[3] = append2[0];
6528 break;
6529
6530 case 13:
6531 w0[3] = w0[3] | append0[0] << 8;
6532 w1[0] = append0[0] >> 24 | append0[1] << 8;
6533 w1[1] = append0[1] >> 24 | append0[2] << 8;
6534 w1[2] = append0[2] >> 24 | append0[3] << 8;
6535 w1[3] = append0[3] >> 24 | append1[0] << 8;
6536 w2[0] = append1[0] >> 24 | append1[1] << 8;
6537 w2[1] = append1[1] >> 24 | append1[2] << 8;
6538 w2[2] = append1[2] >> 24 | append1[3] << 8;
6539 w2[3] = append1[3] >> 24 | append2[0] << 8;
6540 w3[0] = append2[0] >> 24;
6541 break;
6542
6543 case 14:
6544 w0[3] = w0[3] | append0[0] << 16;
6545 w1[0] = append0[0] >> 16 | append0[1] << 16;
6546 w1[1] = append0[1] >> 16 | append0[2] << 16;
6547 w1[2] = append0[2] >> 16 | append0[3] << 16;
6548 w1[3] = append0[3] >> 16 | append1[0] << 16;
6549 w2[0] = append1[0] >> 16 | append1[1] << 16;
6550 w2[1] = append1[1] >> 16 | append1[2] << 16;
6551 w2[2] = append1[2] >> 16 | append1[3] << 16;
6552 w2[3] = append1[3] >> 16 | append2[0] << 16;
6553 w3[0] = append2[0] >> 16;
6554 break;
6555
6556 case 15:
6557 w0[3] = w0[3] | append0[0] << 24;
6558 w1[0] = append0[0] >> 8 | append0[1] << 24;
6559 w1[1] = append0[1] >> 8 | append0[2] << 24;
6560 w1[2] = append0[2] >> 8 | append0[3] << 24;
6561 w1[3] = append0[3] >> 8 | append1[0] << 24;
6562 w2[0] = append1[0] >> 8 | append1[1] << 24;
6563 w2[1] = append1[1] >> 8 | append1[2] << 24;
6564 w2[2] = append1[2] >> 8 | append1[3] << 24;
6565 w2[3] = append1[3] >> 8 | append2[0] << 24;
6566 w3[0] = append2[0] >> 8;
6567 break;
6568
6569 case 16:
6570 w1[0] = append0[0];
6571 w1[1] = append0[1];
6572 w1[2] = append0[2];
6573 w1[3] = append0[3];
6574 w2[0] = append1[0];
6575 w2[1] = append1[1];
6576 w2[2] = append1[2];
6577 w2[3] = append1[3];
6578 w3[0] = append2[0];
6579 break;
6580
6581 case 17:
6582 w1[0] = w1[0] | append0[0] << 8;
6583 w1[1] = append0[0] >> 24 | append0[1] << 8;
6584 w1[2] = append0[1] >> 24 | append0[2] << 8;
6585 w1[3] = append0[2] >> 24 | append0[3] << 8;
6586 w2[0] = append0[3] >> 24 | append1[0] << 8;
6587 w2[1] = append1[0] >> 24 | append1[1] << 8;
6588 w2[2] = append1[1] >> 24 | append1[2] << 8;
6589 w2[3] = append1[2] >> 24 | append1[3] << 8;
6590 w3[0] = append1[3] >> 24 | append2[0] << 8;
6591 w3[1] = append2[0] >> 24;
6592 break;
6593
6594 case 18:
6595 w1[0] = w1[0] | append0[0] << 16;
6596 w1[1] = append0[0] >> 16 | append0[1] << 16;
6597 w1[2] = append0[1] >> 16 | append0[2] << 16;
6598 w1[3] = append0[2] >> 16 | append0[3] << 16;
6599 w2[0] = append0[3] >> 16 | append1[0] << 16;
6600 w2[1] = append1[0] >> 16 | append1[1] << 16;
6601 w2[2] = append1[1] >> 16 | append1[2] << 16;
6602 w2[3] = append1[2] >> 16 | append1[3] << 16;
6603 w3[0] = append1[3] >> 16 | append2[0] << 16;
6604 w3[1] = append2[0] >> 16;
6605 break;
6606
6607 case 19:
6608 w1[0] = w1[0] | append0[0] << 24;
6609 w1[1] = append0[0] >> 8 | append0[1] << 24;
6610 w1[2] = append0[1] >> 8 | append0[2] << 24;
6611 w1[3] = append0[2] >> 8 | append0[3] << 24;
6612 w2[0] = append0[3] >> 8 | append1[0] << 24;
6613 w2[1] = append1[0] >> 8 | append1[1] << 24;
6614 w2[2] = append1[1] >> 8 | append1[2] << 24;
6615 w2[3] = append1[2] >> 8 | append1[3] << 24;
6616 w3[0] = append1[3] >> 8 | append2[0] << 24;
6617 w3[1] = append2[0] >> 8;
6618 break;
6619
6620 case 20:
6621 w1[1] = append0[0];
6622 w1[2] = append0[1];
6623 w1[3] = append0[2];
6624 w2[0] = append0[3];
6625 w2[1] = append1[0];
6626 w2[2] = append1[1];
6627 w2[3] = append1[2];
6628 w3[0] = append1[3];
6629 w3[1] = append2[0];
6630 break;
6631
6632 case 21:
6633 w1[1] = w1[1] | append0[0] << 8;
6634 w1[2] = append0[0] >> 24 | append0[1] << 8;
6635 w1[3] = append0[1] >> 24 | append0[2] << 8;
6636 w2[0] = append0[2] >> 24 | append0[3] << 8;
6637 w2[1] = append0[3] >> 24 | append1[0] << 8;
6638 w2[2] = append1[0] >> 24 | append1[1] << 8;
6639 w2[3] = append1[1] >> 24 | append1[2] << 8;
6640 w3[0] = append1[2] >> 24 | append1[3] << 8;
6641 w3[1] = append1[3] >> 24 | append2[0] << 8;
6642 break;
6643
6644 case 22:
6645 w1[1] = w1[1] | append0[0] << 16;
6646 w1[2] = append0[0] >> 16 | append0[1] << 16;
6647 w1[3] = append0[1] >> 16 | append0[2] << 16;
6648 w2[0] = append0[2] >> 16 | append0[3] << 16;
6649 w2[1] = append0[3] >> 16 | append1[0] << 16;
6650 w2[2] = append1[0] >> 16 | append1[1] << 16;
6651 w2[3] = append1[1] >> 16 | append1[2] << 16;
6652 w3[0] = append1[2] >> 16 | append1[3] << 16;
6653 w3[1] = append1[3] >> 16 | append2[0] << 16;
6654 break;
6655
6656 case 23:
6657 w1[1] = w1[1] | append0[0] << 24;
6658 w1[2] = append0[0] >> 8 | append0[1] << 24;
6659 w1[3] = append0[1] >> 8 | append0[2] << 24;
6660 w2[0] = append0[2] >> 8 | append0[3] << 24;
6661 w2[1] = append0[3] >> 8 | append1[0] << 24;
6662 w2[2] = append1[0] >> 8 | append1[1] << 24;
6663 w2[3] = append1[1] >> 8 | append1[2] << 24;
6664 w3[0] = append1[2] >> 8 | append1[3] << 24;
6665 w3[1] = append1[3] >> 8 | append2[0] << 24;
6666 break;
6667
6668 case 24:
6669 w1[2] = append0[0];
6670 w1[3] = append0[1];
6671 w2[0] = append0[2];
6672 w2[1] = append0[3];
6673 w2[2] = append1[0];
6674 w2[3] = append1[1];
6675 w3[0] = append1[2];
6676 w3[1] = append1[3];
6677 break;
6678
6679 case 25:
6680 w1[2] = w1[2] | append0[0] << 8;
6681 w1[3] = append0[0] >> 24 | append0[1] << 8;
6682 w2[0] = append0[1] >> 24 | append0[2] << 8;
6683 w2[1] = append0[2] >> 24 | append0[3] << 8;
6684 w2[2] = append0[3] >> 24 | append1[0] << 8;
6685 w2[3] = append1[0] >> 24 | append1[1] << 8;
6686 w3[0] = append1[1] >> 24 | append1[2] << 8;
6687 w3[1] = append1[2] >> 24 | append1[3] << 8;
6688 break;
6689
6690 case 26:
6691 w1[2] = w1[2] | append0[0] << 16;
6692 w1[3] = append0[0] >> 16 | append0[1] << 16;
6693 w2[0] = append0[1] >> 16 | append0[2] << 16;
6694 w2[1] = append0[2] >> 16 | append0[3] << 16;
6695 w2[2] = append0[3] >> 16 | append1[0] << 16;
6696 w2[3] = append1[0] >> 16 | append1[1] << 16;
6697 w3[0] = append1[1] >> 16 | append1[2] << 16;
6698 w3[1] = append1[2] >> 16 | append1[3] << 16;
6699 break;
6700
6701 case 27:
6702 w1[2] = w1[2] | append0[0] << 24;
6703 w1[3] = append0[0] >> 8 | append0[1] << 24;
6704 w2[0] = append0[1] >> 8 | append0[2] << 24;
6705 w2[1] = append0[2] >> 8 | append0[3] << 24;
6706 w2[2] = append0[3] >> 8 | append1[0] << 24;
6707 w2[3] = append1[0] >> 8 | append1[1] << 24;
6708 w3[0] = append1[1] >> 8 | append1[2] << 24;
6709 w3[1] = append1[2] >> 8 | append1[3] << 24;
6710 break;
6711
6712 case 28:
6713 w1[3] = append0[0];
6714 w2[0] = append0[1];
6715 w2[1] = append0[2];
6716 w2[2] = append0[3];
6717 w2[3] = append1[0];
6718 w3[0] = append1[1];
6719 w3[1] = append1[2];
6720 break;
6721
6722 case 29:
6723 w1[3] = w1[3] | append0[0] << 8;
6724 w2[0] = append0[0] >> 24 | append0[1] << 8;
6725 w2[1] = append0[1] >> 24 | append0[2] << 8;
6726 w2[2] = append0[2] >> 24 | append0[3] << 8;
6727 w2[3] = append0[3] >> 24 | append1[0] << 8;
6728 w3[0] = append1[0] >> 24 | append1[1] << 8;
6729 w3[1] = append1[1] >> 24 | append1[2] << 8;
6730 break;
6731
6732 case 30:
6733 w1[3] = w1[3] | append0[0] << 16;
6734 w2[0] = append0[0] >> 16 | append0[1] << 16;
6735 w2[1] = append0[1] >> 16 | append0[2] << 16;
6736 w2[2] = append0[2] >> 16 | append0[3] << 16;
6737 w2[3] = append0[3] >> 16 | append1[0] << 16;
6738 w3[0] = append1[0] >> 16 | append1[1] << 16;
6739 w3[1] = append1[1] >> 16 | append1[2] << 16;
6740 break;
6741
6742 case 31:
6743 w1[3] = w1[3] | append0[0] << 24;
6744 w2[0] = append0[0] >> 8 | append0[1] << 24;
6745 w2[1] = append0[1] >> 8 | append0[2] << 24;
6746 w2[2] = append0[2] >> 8 | append0[3] << 24;
6747 w2[3] = append0[3] >> 8 | append1[0] << 24;
6748 w3[0] = append1[0] >> 8 | append1[1] << 24;
6749 w3[1] = append1[1] >> 8 | append1[2] << 24;
6750 break;
6751
6752 case 32:
6753 w2[0] = append0[0];
6754 w2[1] = append0[1];
6755 w2[2] = append0[2];
6756 w2[3] = append0[3];
6757 w3[0] = append1[0];
6758 w3[1] = append1[1];
6759 break;
6760 }
6761 }
6762
6763 static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
6764 {
6765 #ifdef IS_AMD
6766 const int offset_mod_4 = offset & 3;
6767
6768 const int offset_minus_4 = 4 - offset;
6769
6770 switch (offset / 4)
6771 {
6772 case 0:
6773 w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4);
6774 w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
6775 w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
6776 w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
6777 w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
6778 w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
6779 w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
6780 w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6781 w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6782 w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6783 w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6784 w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6785 w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6786 w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6787 w0[0] = amd_bytealign (w0[0], 0, offset_minus_4);
6788
6789 if (offset_mod_4 == 0)
6790 {
6791 w0[0] = w0[1];
6792 w0[1] = w0[2];
6793 w0[2] = w0[3];
6794 w0[3] = w1[0];
6795 w1[0] = w1[1];
6796 w1[1] = w1[2];
6797 w1[2] = w1[3];
6798 w1[3] = w2[0];
6799 w2[0] = w2[1];
6800 w2[1] = w2[2];
6801 w2[2] = w2[3];
6802 w2[3] = w3[0];
6803 w3[0] = w3[1];
6804 w3[1] = w3[2];
6805 w3[2] = 0;
6806 }
6807
6808 break;
6809
6810 case 1:
6811 w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4);
6812 w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
6813 w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
6814 w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
6815 w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
6816 w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
6817 w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6818 w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6819 w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6820 w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6821 w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6822 w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6823 w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6824 w0[1] = amd_bytealign (w0[0], 0, offset_minus_4);
6825 w0[0] = 0;
6826
6827 if (offset_mod_4 == 0)
6828 {
6829 w0[1] = w0[2];
6830 w0[2] = w0[3];
6831 w0[3] = w1[0];
6832 w1[0] = w1[1];
6833 w1[1] = w1[2];
6834 w1[2] = w1[3];
6835 w1[3] = w2[0];
6836 w2[0] = w2[1];
6837 w2[1] = w2[2];
6838 w2[2] = w2[3];
6839 w2[3] = w3[0];
6840 w3[0] = w3[1];
6841 w3[1] = w3[2];
6842 w3[2] = 0;
6843 }
6844
6845 break;
6846
6847 case 2:
6848 w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4);
6849 w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
6850 w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
6851 w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
6852 w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
6853 w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6854 w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6855 w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6856 w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6857 w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6858 w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6859 w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6860 w0[2] = amd_bytealign (w0[0], 0, offset_minus_4);
6861 w0[1] = 0;
6862 w0[0] = 0;
6863
6864 if (offset_mod_4 == 0)
6865 {
6866 w0[2] = w0[3];
6867 w0[3] = w1[0];
6868 w1[0] = w1[1];
6869 w1[1] = w1[2];
6870 w1[2] = w1[3];
6871 w1[3] = w2[0];
6872 w2[0] = w2[1];
6873 w2[1] = w2[2];
6874 w2[2] = w2[3];
6875 w2[3] = w3[0];
6876 w3[0] = w3[1];
6877 w3[1] = w3[2];
6878 w3[2] = 0;
6879 }
6880
6881 break;
6882
6883 case 3:
6884 w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4);
6885 w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
6886 w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
6887 w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
6888 w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6889 w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6890 w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6891 w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6892 w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6893 w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6894 w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6895 w0[3] = amd_bytealign (w0[0], 0, offset_minus_4);
6896 w0[2] = 0;
6897 w0[1] = 0;
6898 w0[0] = 0;
6899
6900 if (offset_mod_4 == 0)
6901 {
6902 w0[3] = w1[0];
6903 w1[0] = w1[1];
6904 w1[1] = w1[2];
6905 w1[2] = w1[3];
6906 w1[3] = w2[0];
6907 w2[0] = w2[1];
6908 w2[1] = w2[2];
6909 w2[2] = w2[3];
6910 w2[3] = w3[0];
6911 w3[0] = w3[1];
6912 w3[1] = w3[2];
6913 w3[2] = 0;
6914 }
6915
6916 break;
6917
6918 case 4:
6919 w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4);
6920 w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
6921 w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
6922 w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6923 w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6924 w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6925 w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6926 w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6927 w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6928 w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6929 w1[0] = amd_bytealign (w0[0], 0, offset_minus_4);
6930 w0[3] = 0;
6931 w0[2] = 0;
6932 w0[1] = 0;
6933 w0[0] = 0;
6934
6935 if (offset_mod_4 == 0)
6936 {
6937 w1[0] = w1[1];
6938 w1[1] = w1[2];
6939 w1[2] = w1[3];
6940 w1[3] = w2[0];
6941 w2[0] = w2[1];
6942 w2[1] = w2[2];
6943 w2[2] = w2[3];
6944 w2[3] = w3[0];
6945 w3[0] = w3[1];
6946 w3[1] = w3[2];
6947 w3[2] = 0;
6948 }
6949
6950 break;
6951
6952 case 5:
6953 w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4);
6954 w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
6955 w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6956 w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6957 w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6958 w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6959 w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6960 w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6961 w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6962 w1[1] = amd_bytealign (w0[0], 0, offset_minus_4);
6963 w1[0] = 0;
6964 w0[3] = 0;
6965 w0[2] = 0;
6966 w0[1] = 0;
6967 w0[0] = 0;
6968
6969 if (offset_mod_4 == 0)
6970 {
6971 w1[1] = w1[2];
6972 w1[2] = w1[3];
6973 w1[3] = w2[0];
6974 w2[0] = w2[1];
6975 w2[1] = w2[2];
6976 w2[2] = w2[3];
6977 w2[3] = w3[0];
6978 w3[0] = w3[1];
6979 w3[1] = w3[2];
6980 w3[2] = 0;
6981 }
6982
6983 break;
6984
6985 case 6:
6986 w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4);
6987 w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6988 w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6989 w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6990 w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6991 w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6992 w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6993 w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6994 w1[2] = amd_bytealign (w0[0], 0, offset_minus_4);
6995 w1[1] = 0;
6996 w1[0] = 0;
6997 w0[3] = 0;
6998 w0[2] = 0;
6999 w0[1] = 0;
7000 w0[0] = 0;
7001
7002 if (offset_mod_4 == 0)
7003 {
7004 w1[2] = w1[3];
7005 w1[3] = w2[0];
7006 w2[0] = w2[1];
7007 w2[1] = w2[2];
7008 w2[2] = w2[3];
7009 w2[3] = w3[0];
7010 w3[0] = w3[1];
7011 w3[1] = w3[2];
7012 w3[2] = 0;
7013 }
7014
7015 break;
7016
7017 case 7:
7018 w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4);
7019 w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
7020 w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
7021 w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
7022 w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
7023 w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
7024 w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
7025 w1[3] = amd_bytealign (w0[0], 0, offset_minus_4);
7026 w1[2] = 0;
7027 w1[1] = 0;
7028 w1[0] = 0;
7029 w0[3] = 0;
7030 w0[2] = 0;
7031 w0[1] = 0;
7032 w0[0] = 0;
7033
7034 if (offset_mod_4 == 0)
7035 {
7036 w1[3] = w2[0];
7037 w2[0] = w2[1];
7038 w2[1] = w2[2];
7039 w2[2] = w2[3];
7040 w2[3] = w3[0];
7041 w3[0] = w3[1];
7042 w3[1] = w3[2];
7043 w3[2] = 0;
7044 }
7045
7046 break;
7047
7048 case 8:
7049 w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4);
7050 w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
7051 w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
7052 w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
7053 w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
7054 w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
7055 w2[0] = amd_bytealign (w0[0], 0, offset_minus_4);
7056 w1[3] = 0;
7057 w1[2] = 0;
7058 w1[1] = 0;
7059 w1[0] = 0;
7060 w0[3] = 0;
7061 w0[2] = 0;
7062 w0[1] = 0;
7063 w0[0] = 0;
7064
7065 if (offset_mod_4 == 0)
7066 {
7067 w2[0] = w2[1];
7068 w2[1] = w2[2];
7069 w2[2] = w2[3];
7070 w2[3] = w3[0];
7071 w3[0] = w3[1];
7072 w3[1] = w3[2];
7073 w3[2] = 0;
7074 }
7075
7076 break;
7077
7078 case 9:
7079 w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4);
7080 w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
7081 w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
7082 w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
7083 w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
7084 w2[1] = amd_bytealign (w0[0], 0, offset_minus_4);
7085 w2[0] = 0;
7086 w1[3] = 0;
7087 w1[2] = 0;
7088 w1[1] = 0;
7089 w1[0] = 0;
7090 w0[3] = 0;
7091 w0[2] = 0;
7092 w0[1] = 0;
7093 w0[0] = 0;
7094
7095 if (offset_mod_4 == 0)
7096 {
7097 w2[1] = w2[2];
7098 w2[2] = w2[3];
7099 w2[3] = w3[0];
7100 w3[0] = w3[1];
7101 w3[1] = w3[2];
7102 w3[2] = 0;
7103 }
7104
7105 break;
7106
7107 case 10:
7108 w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4);
7109 w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
7110 w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
7111 w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
7112 w2[2] = amd_bytealign (w0[0], 0, offset_minus_4);
7113 w2[1] = 0;
7114 w2[0] = 0;
7115 w1[3] = 0;
7116 w1[2] = 0;
7117 w1[1] = 0;
7118 w1[0] = 0;
7119 w0[3] = 0;
7120 w0[2] = 0;
7121 w0[1] = 0;
7122 w0[0] = 0;
7123
7124 if (offset_mod_4 == 0)
7125 {
7126 w2[2] = w2[3];
7127 w2[3] = w3[0];
7128 w3[0] = w3[1];
7129 w3[1] = w3[2];
7130 w3[2] = 0;
7131 }
7132
7133 break;
7134
7135 case 11:
7136 w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4);
7137 w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
7138 w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
7139 w2[3] = amd_bytealign (w0[0], 0, offset_minus_4);
7140 w2[2] = 0;
7141 w2[1] = 0;
7142 w2[0] = 0;
7143 w1[3] = 0;
7144 w1[2] = 0;
7145 w1[1] = 0;
7146 w1[0] = 0;
7147 w0[3] = 0;
7148 w0[2] = 0;
7149 w0[1] = 0;
7150 w0[0] = 0;
7151
7152 if (offset_mod_4 == 0)
7153 {
7154 w2[3] = w3[0];
7155 w3[0] = w3[1];
7156 w3[1] = w3[2];
7157 w3[2] = 0;
7158 }
7159
7160 break;
7161
7162 case 12:
7163 w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4);
7164 w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
7165 w3[0] = amd_bytealign (w0[0], 0, offset_minus_4);
7166 w2[3] = 0;
7167 w2[2] = 0;
7168 w2[1] = 0;
7169 w2[0] = 0;
7170 w1[3] = 0;
7171 w1[2] = 0;
7172 w1[1] = 0;
7173 w1[0] = 0;
7174 w0[3] = 0;
7175 w0[2] = 0;
7176 w0[1] = 0;
7177 w0[0] = 0;
7178
7179 if (offset_mod_4 == 0)
7180 {
7181 w3[0] = w3[1];
7182 w3[1] = w3[2];
7183 w3[2] = 0;
7184 }
7185
7186 break;
7187
7188 case 13:
7189 w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4);
7190 w3[1] = amd_bytealign (w0[0], 0, offset_minus_4);
7191 w3[0] = 0;
7192 w2[3] = 0;
7193 w2[2] = 0;
7194 w2[1] = 0;
7195 w2[0] = 0;
7196 w1[3] = 0;
7197 w1[2] = 0;
7198 w1[1] = 0;
7199 w1[0] = 0;
7200 w0[3] = 0;
7201 w0[2] = 0;
7202 w0[1] = 0;
7203 w0[0] = 0;
7204
7205 if (offset_mod_4 == 0)
7206 {
7207 w3[1] = w3[2];
7208 w3[2] = 0;
7209 }
7210
7211 break;
7212 }
7213 #endif
7214
7215 #ifdef IS_NV
7216 const int offset_minus_4 = 4 - (offset % 4);
7217
7218 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
7219
7220 switch (offset / 4)
7221 {
7222 case 0:
7223 w3[1] = __byte_perm (w3[0], w3[1], selector);
7224 w3[0] = __byte_perm (w2[3], w3[0], selector);
7225 w2[3] = __byte_perm (w2[2], w2[3], selector);
7226 w2[2] = __byte_perm (w2[1], w2[2], selector);
7227 w2[1] = __byte_perm (w2[0], w2[1], selector);
7228 w2[0] = __byte_perm (w1[3], w2[0], selector);
7229 w1[3] = __byte_perm (w1[2], w1[3], selector);
7230 w1[2] = __byte_perm (w1[1], w1[2], selector);
7231 w1[1] = __byte_perm (w1[0], w1[1], selector);
7232 w1[0] = __byte_perm (w0[3], w1[0], selector);
7233 w0[3] = __byte_perm (w0[2], w0[3], selector);
7234 w0[2] = __byte_perm (w0[1], w0[2], selector);
7235 w0[1] = __byte_perm (w0[0], w0[1], selector);
7236 w0[0] = __byte_perm ( 0, w0[0], selector);
7237
7238 break;
7239
7240 case 1:
7241 w3[1] = __byte_perm (w2[3], w3[0], selector);
7242 w3[0] = __byte_perm (w2[2], w2[3], selector);
7243 w2[3] = __byte_perm (w2[1], w2[2], selector);
7244 w2[2] = __byte_perm (w2[0], w2[1], selector);
7245 w2[1] = __byte_perm (w1[3], w2[0], selector);
7246 w2[0] = __byte_perm (w1[2], w1[3], selector);
7247 w1[3] = __byte_perm (w1[1], w1[2], selector);
7248 w1[2] = __byte_perm (w1[0], w1[1], selector);
7249 w1[1] = __byte_perm (w0[3], w1[0], selector);
7250 w1[0] = __byte_perm (w0[2], w0[3], selector);
7251 w0[3] = __byte_perm (w0[1], w0[2], selector);
7252 w0[2] = __byte_perm (w0[0], w0[1], selector);
7253 w0[1] = __byte_perm ( 0, w0[0], selector);
7254 w0[0] = 0;
7255
7256 break;
7257
7258 case 2:
7259 w3[1] = __byte_perm (w2[2], w2[3], selector);
7260 w3[0] = __byte_perm (w2[1], w2[2], selector);
7261 w2[3] = __byte_perm (w2[0], w2[1], selector);
7262 w2[2] = __byte_perm (w1[3], w2[0], selector);
7263 w2[1] = __byte_perm (w1[2], w1[3], selector);
7264 w2[0] = __byte_perm (w1[1], w1[2], selector);
7265 w1[3] = __byte_perm (w1[0], w1[1], selector);
7266 w1[2] = __byte_perm (w0[3], w1[0], selector);
7267 w1[1] = __byte_perm (w0[2], w0[3], selector);
7268 w1[0] = __byte_perm (w0[1], w0[2], selector);
7269 w0[3] = __byte_perm (w0[0], w0[1], selector);
7270 w0[2] = __byte_perm ( 0, w0[0], selector);
7271 w0[1] = 0;
7272 w0[0] = 0;
7273
7274 break;
7275
7276 case 3:
7277 w3[1] = __byte_perm (w2[1], w2[2], selector);
7278 w3[0] = __byte_perm (w2[0], w2[1], selector);
7279 w2[3] = __byte_perm (w1[3], w2[0], selector);
7280 w2[2] = __byte_perm (w1[2], w1[3], selector);
7281 w2[1] = __byte_perm (w1[1], w1[2], selector);
7282 w2[0] = __byte_perm (w1[0], w1[1], selector);
7283 w1[3] = __byte_perm (w0[3], w1[0], selector);
7284 w1[2] = __byte_perm (w0[2], w0[3], selector);
7285 w1[1] = __byte_perm (w0[1], w0[2], selector);
7286 w1[0] = __byte_perm (w0[0], w0[1], selector);
7287 w0[3] = __byte_perm ( 0, w0[0], selector);
7288 w0[2] = 0;
7289 w0[1] = 0;
7290 w0[0] = 0;
7291
7292 break;
7293
7294 case 4:
7295 w3[1] = __byte_perm (w2[0], w2[1], selector);
7296 w3[0] = __byte_perm (w1[3], w2[0], selector);
7297 w2[3] = __byte_perm (w1[2], w1[3], selector);
7298 w2[2] = __byte_perm (w1[1], w1[2], selector);
7299 w2[1] = __byte_perm (w1[0], w1[1], selector);
7300 w2[0] = __byte_perm (w0[3], w1[0], selector);
7301 w1[3] = __byte_perm (w0[2], w0[3], selector);
7302 w1[2] = __byte_perm (w0[1], w0[2], selector);
7303 w1[1] = __byte_perm (w0[0], w0[1], selector);
7304 w1[0] = __byte_perm ( 0, w0[0], selector);
7305 w0[3] = 0;
7306 w0[2] = 0;
7307 w0[1] = 0;
7308 w0[0] = 0;
7309
7310 break;
7311
7312 case 5:
7313 w3[1] = __byte_perm (w1[3], w2[0], selector);
7314 w3[0] = __byte_perm (w1[2], w1[3], selector);
7315 w2[3] = __byte_perm (w1[1], w1[2], selector);
7316 w2[2] = __byte_perm (w1[0], w1[1], selector);
7317 w2[1] = __byte_perm (w0[3], w1[0], selector);
7318 w2[0] = __byte_perm (w0[2], w0[3], selector);
7319 w1[3] = __byte_perm (w0[1], w0[2], selector);
7320 w1[2] = __byte_perm (w0[0], w0[1], selector);
7321 w1[1] = __byte_perm ( 0, w0[0], selector);
7322 w1[0] = 0;
7323 w0[3] = 0;
7324 w0[2] = 0;
7325 w0[1] = 0;
7326 w0[0] = 0;
7327
7328 break;
7329
7330 case 6:
7331 w3[1] = __byte_perm (w1[2], w1[3], selector);
7332 w3[0] = __byte_perm (w1[1], w1[2], selector);
7333 w2[3] = __byte_perm (w1[0], w1[1], selector);
7334 w2[2] = __byte_perm (w0[3], w1[0], selector);
7335 w2[1] = __byte_perm (w0[2], w0[3], selector);
7336 w2[0] = __byte_perm (w0[1], w0[2], selector);
7337 w1[3] = __byte_perm (w0[0], w0[1], selector);
7338 w1[2] = __byte_perm ( 0, w0[0], selector);
7339 w1[1] = 0;
7340 w1[0] = 0;
7341 w0[3] = 0;
7342 w0[2] = 0;
7343 w0[1] = 0;
7344 w0[0] = 0;
7345
7346 break;
7347
7348 case 7:
7349 w3[1] = __byte_perm (w1[1], w1[2], selector);
7350 w3[0] = __byte_perm (w1[0], w1[1], selector);
7351 w2[3] = __byte_perm (w0[3], w1[0], selector);
7352 w2[2] = __byte_perm (w0[2], w0[3], selector);
7353 w2[1] = __byte_perm (w0[1], w0[2], selector);
7354 w2[0] = __byte_perm (w0[0], w0[1], selector);
7355 w1[3] = __byte_perm ( 0, w0[0], selector);
7356 w1[2] = 0;
7357 w1[1] = 0;
7358 w1[0] = 0;
7359 w0[3] = 0;
7360 w0[2] = 0;
7361 w0[1] = 0;
7362 w0[0] = 0;
7363
7364 break;
7365
7366 case 8:
7367 w3[1] = __byte_perm (w1[0], w1[1], selector);
7368 w3[0] = __byte_perm (w0[3], w1[0], selector);
7369 w2[3] = __byte_perm (w0[2], w0[3], selector);
7370 w2[2] = __byte_perm (w0[1], w0[2], selector);
7371 w2[1] = __byte_perm (w0[0], w0[1], selector);
7372 w2[0] = __byte_perm ( 0, w0[0], selector);
7373 w1[3] = 0;
7374 w1[2] = 0;
7375 w1[1] = 0;
7376 w1[0] = 0;
7377 w0[3] = 0;
7378 w0[2] = 0;
7379 w0[1] = 0;
7380 w0[0] = 0;
7381
7382 break;
7383
7384 case 9:
7385 w3[1] = __byte_perm (w0[3], w1[0], selector);
7386 w3[0] = __byte_perm (w0[2], w0[3], selector);
7387 w2[3] = __byte_perm (w0[1], w0[2], selector);
7388 w2[2] = __byte_perm (w0[0], w0[1], selector);
7389 w2[1] = __byte_perm ( 0, w0[0], selector);
7390 w2[0] = 0;
7391 w1[3] = 0;
7392 w1[2] = 0;
7393 w1[1] = 0;
7394 w1[0] = 0;
7395 w0[3] = 0;
7396 w0[2] = 0;
7397 w0[1] = 0;
7398 w0[0] = 0;
7399
7400 break;
7401
7402 case 10:
7403 w3[1] = __byte_perm (w0[2], w0[3], selector);
7404 w3[0] = __byte_perm (w0[1], w0[2], selector);
7405 w2[3] = __byte_perm (w0[0], w0[1], selector);
7406 w2[2] = __byte_perm ( 0, w0[0], selector);
7407 w2[1] = 0;
7408 w2[0] = 0;
7409 w1[3] = 0;
7410 w1[2] = 0;
7411 w1[1] = 0;
7412 w1[0] = 0;
7413 w0[3] = 0;
7414 w0[2] = 0;
7415 w0[1] = 0;
7416 w0[0] = 0;
7417
7418 break;
7419
7420 case 11:
7421 w3[1] = __byte_perm (w0[1], w0[2], selector);
7422 w3[0] = __byte_perm (w0[0], w0[1], selector);
7423 w2[3] = __byte_perm ( 0, w0[0], selector);
7424 w2[2] = 0;
7425 w2[1] = 0;
7426 w2[0] = 0;
7427 w1[3] = 0;
7428 w1[2] = 0;
7429 w1[1] = 0;
7430 w1[0] = 0;
7431 w0[3] = 0;
7432 w0[2] = 0;
7433 w0[1] = 0;
7434 w0[0] = 0;
7435
7436 break;
7437
7438 case 12:
7439 w3[1] = __byte_perm (w0[0], w0[1], selector);
7440 w3[0] = __byte_perm ( 0, w0[0], selector);
7441 w2[3] = 0;
7442 w2[2] = 0;
7443 w2[1] = 0;
7444 w2[0] = 0;
7445 w1[3] = 0;
7446 w1[2] = 0;
7447 w1[1] = 0;
7448 w1[0] = 0;
7449 w0[3] = 0;
7450 w0[2] = 0;
7451 w0[1] = 0;
7452 w0[0] = 0;
7453
7454 break;
7455
7456 case 13:
7457 w3[1] = __byte_perm ( 0, w0[0], selector);
7458 w3[0] = 0;
7459 w2[3] = 0;
7460 w2[2] = 0;
7461 w2[1] = 0;
7462 w2[0] = 0;
7463 w1[3] = 0;
7464 w1[2] = 0;
7465 w1[1] = 0;
7466 w1[0] = 0;
7467 w0[3] = 0;
7468 w0[2] = 0;
7469 w0[1] = 0;
7470 w0[0] = 0;
7471
7472 break;
7473 }
7474 #endif
7475 }
7476
7477 static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
7478 {
7479 #ifdef IS_AMD
7480 switch (offset / 4)
7481 {
7482 case 0:
7483 w3[2] = amd_bytealign (w3[1], 0, offset);
7484 w3[1] = amd_bytealign (w3[0], w3[1], offset);
7485 w3[0] = amd_bytealign (w2[3], w3[0], offset);
7486 w2[3] = amd_bytealign (w2[2], w2[3], offset);
7487 w2[2] = amd_bytealign (w2[1], w2[2], offset);
7488 w2[1] = amd_bytealign (w2[0], w2[1], offset);
7489 w2[0] = amd_bytealign (w1[3], w2[0], offset);
7490 w1[3] = amd_bytealign (w1[2], w1[3], offset);
7491 w1[2] = amd_bytealign (w1[1], w1[2], offset);
7492 w1[1] = amd_bytealign (w1[0], w1[1], offset);
7493 w1[0] = amd_bytealign (w0[3], w1[0], offset);
7494 w0[3] = amd_bytealign (w0[2], w0[3], offset);
7495 w0[2] = amd_bytealign (w0[1], w0[2], offset);
7496 w0[1] = amd_bytealign (w0[0], w0[1], offset);
7497 w0[0] = amd_bytealign ( 0, w0[0], offset);
7498 break;
7499
7500 case 1:
7501 w3[2] = amd_bytealign (w3[0], 0, offset);
7502 w3[1] = amd_bytealign (w2[3], w3[0], offset);
7503 w3[0] = amd_bytealign (w2[2], w2[3], offset);
7504 w2[3] = amd_bytealign (w2[1], w2[2], offset);
7505 w2[2] = amd_bytealign (w2[0], w2[1], offset);
7506 w2[1] = amd_bytealign (w1[3], w2[0], offset);
7507 w2[0] = amd_bytealign (w1[2], w1[3], offset);
7508 w1[3] = amd_bytealign (w1[1], w1[2], offset);
7509 w1[2] = amd_bytealign (w1[0], w1[1], offset);
7510 w1[1] = amd_bytealign (w0[3], w1[0], offset);
7511 w1[0] = amd_bytealign (w0[2], w0[3], offset);
7512 w0[3] = amd_bytealign (w0[1], w0[2], offset);
7513 w0[2] = amd_bytealign (w0[0], w0[1], offset);
7514 w0[1] = amd_bytealign ( 0, w0[0], offset);
7515 w0[0] = 0;
7516 break;
7517
7518 case 2:
7519 w3[2] = amd_bytealign (w2[3], 0, offset);
7520 w3[1] = amd_bytealign (w2[2], w2[3], offset);
7521 w3[0] = amd_bytealign (w2[1], w2[2], offset);
7522 w2[3] = amd_bytealign (w2[0], w2[1], offset);
7523 w2[2] = amd_bytealign (w1[3], w2[0], offset);
7524 w2[1] = amd_bytealign (w1[2], w1[3], offset);
7525 w2[0] = amd_bytealign (w1[1], w1[2], offset);
7526 w1[3] = amd_bytealign (w1[0], w1[1], offset);
7527 w1[2] = amd_bytealign (w0[3], w1[0], offset);
7528 w1[1] = amd_bytealign (w0[2], w0[3], offset);
7529 w1[0] = amd_bytealign (w0[1], w0[2], offset);
7530 w0[3] = amd_bytealign (w0[0], w0[1], offset);
7531 w0[2] = amd_bytealign ( 0, w0[0], offset);
7532 w0[1] = 0;
7533 w0[0] = 0;
7534 break;
7535
7536 case 3:
7537 w3[2] = amd_bytealign (w2[2], 0, offset);
7538 w3[1] = amd_bytealign (w2[1], w2[2], offset);
7539 w3[0] = amd_bytealign (w2[0], w2[1], offset);
7540 w2[3] = amd_bytealign (w1[3], w2[0], offset);
7541 w2[2] = amd_bytealign (w1[2], w1[3], offset);
7542 w2[1] = amd_bytealign (w1[1], w1[2], offset);
7543 w2[0] = amd_bytealign (w1[0], w1[1], offset);
7544 w1[3] = amd_bytealign (w0[3], w1[0], offset);
7545 w1[2] = amd_bytealign (w0[2], w0[3], offset);
7546 w1[1] = amd_bytealign (w0[1], w0[2], offset);
7547 w1[0] = amd_bytealign (w0[0], w0[1], offset);
7548 w0[3] = amd_bytealign ( 0, w0[0], offset);
7549 w0[2] = 0;
7550 w0[1] = 0;
7551 w0[0] = 0;
7552 break;
7553
7554 case 4:
7555 w3[2] = amd_bytealign (w2[1], 0, offset);
7556 w3[1] = amd_bytealign (w2[0], w2[1], offset);
7557 w3[0] = amd_bytealign (w1[3], w2[0], offset);
7558 w2[3] = amd_bytealign (w1[2], w1[3], offset);
7559 w2[2] = amd_bytealign (w1[1], w1[2], offset);
7560 w2[1] = amd_bytealign (w1[0], w1[1], offset);
7561 w2[0] = amd_bytealign (w0[3], w1[0], offset);
7562 w1[3] = amd_bytealign (w0[2], w0[3], offset);
7563 w1[2] = amd_bytealign (w0[1], w0[2], offset);
7564 w1[1] = amd_bytealign (w0[0], w0[1], offset);
7565 w1[0] = amd_bytealign ( 0, w0[0], offset);
7566 w0[3] = 0;
7567 w0[2] = 0;
7568 w0[1] = 0;
7569 w0[0] = 0;
7570 break;
7571
7572 case 5:
7573 w3[2] = amd_bytealign (w2[0], 0, offset);
7574 w3[1] = amd_bytealign (w1[3], w2[0], offset);
7575 w3[0] = amd_bytealign (w1[2], w1[3], offset);
7576 w2[3] = amd_bytealign (w1[1], w1[2], offset);
7577 w2[2] = amd_bytealign (w1[0], w1[1], offset);
7578 w2[1] = amd_bytealign (w0[3], w1[0], offset);
7579 w2[0] = amd_bytealign (w0[2], w0[3], offset);
7580 w1[3] = amd_bytealign (w0[1], w0[2], offset);
7581 w1[2] = amd_bytealign (w0[0], w0[1], offset);
7582 w1[1] = amd_bytealign ( 0, w0[0], offset);
7583 w1[0] = 0;
7584 w0[3] = 0;
7585 w0[2] = 0;
7586 w0[1] = 0;
7587 w0[0] = 0;
7588 break;
7589
7590 case 6:
7591 w3[2] = amd_bytealign (w1[3], 0, offset);
7592 w3[1] = amd_bytealign (w1[2], w1[3], offset);
7593 w3[0] = amd_bytealign (w1[1], w1[2], offset);
7594 w2[3] = amd_bytealign (w1[0], w1[1], offset);
7595 w2[2] = amd_bytealign (w0[3], w1[0], offset);
7596 w2[1] = amd_bytealign (w0[2], w0[3], offset);
7597 w2[0] = amd_bytealign (w0[1], w0[2], offset);
7598 w1[3] = amd_bytealign (w0[0], w0[1], offset);
7599 w1[2] = amd_bytealign ( 0, w0[0], offset);
7600 w1[1] = 0;
7601 w1[0] = 0;
7602 w0[3] = 0;
7603 w0[2] = 0;
7604 w0[1] = 0;
7605 w0[0] = 0;
7606 break;
7607
7608 case 7:
7609 w3[2] = amd_bytealign (w1[2], 0, offset);
7610 w3[1] = amd_bytealign (w1[1], w1[2], offset);
7611 w3[0] = amd_bytealign (w1[0], w1[1], offset);
7612 w2[3] = amd_bytealign (w0[3], w1[0], offset);
7613 w2[2] = amd_bytealign (w0[2], w0[3], offset);
7614 w2[1] = amd_bytealign (w0[1], w0[2], offset);
7615 w2[0] = amd_bytealign (w0[0], w0[1], offset);
7616 w1[3] = amd_bytealign ( 0, w0[0], offset);
7617 w1[2] = 0;
7618 w1[1] = 0;
7619 w1[0] = 0;
7620 w0[3] = 0;
7621 w0[2] = 0;
7622 w0[1] = 0;
7623 w0[0] = 0;
7624 break;
7625
7626 case 8:
7627 w3[2] = amd_bytealign (w1[1], 0, offset);
7628 w3[1] = amd_bytealign (w1[0], w1[1], offset);
7629 w3[0] = amd_bytealign (w0[3], w1[0], offset);
7630 w2[3] = amd_bytealign (w0[2], w0[3], offset);
7631 w2[2] = amd_bytealign (w0[1], w0[2], offset);
7632 w2[1] = amd_bytealign (w0[0], w0[1], offset);
7633 w2[0] = amd_bytealign ( 0, w0[0], offset);
7634 w1[3] = 0;
7635 w1[2] = 0;
7636 w1[1] = 0;
7637 w1[0] = 0;
7638 w0[3] = 0;
7639 w0[2] = 0;
7640 w0[1] = 0;
7641 w0[0] = 0;
7642 break;
7643
7644 case 9:
7645 w3[2] = amd_bytealign (w1[0], 0, offset);
7646 w3[1] = amd_bytealign (w0[3], w1[0], offset);
7647 w3[0] = amd_bytealign (w0[2], w0[3], offset);
7648 w2[3] = amd_bytealign (w0[1], w0[2], offset);
7649 w2[2] = amd_bytealign (w0[0], w0[1], offset);
7650 w2[1] = amd_bytealign ( 0, w0[0], offset);
7651 w2[0] = 0;
7652 w1[3] = 0;
7653 w1[2] = 0;
7654 w1[1] = 0;
7655 w1[0] = 0;
7656 w0[3] = 0;
7657 w0[2] = 0;
7658 w0[1] = 0;
7659 w0[0] = 0;
7660 break;
7661
7662 case 10:
7663 w3[2] = amd_bytealign (w0[3], 0, offset);
7664 w3[1] = amd_bytealign (w0[2], w0[3], offset);
7665 w3[0] = amd_bytealign (w0[1], w0[2], offset);
7666 w2[3] = amd_bytealign (w0[0], w0[1], offset);
7667 w2[2] = amd_bytealign ( 0, w0[0], offset);
7668 w2[1] = 0;
7669 w2[0] = 0;
7670 w1[3] = 0;
7671 w1[2] = 0;
7672 w1[1] = 0;
7673 w1[0] = 0;
7674 w0[3] = 0;
7675 w0[2] = 0;
7676 w0[1] = 0;
7677 w0[0] = 0;
7678 break;
7679
7680 case 11:
7681 w3[2] = amd_bytealign (w0[2], 0, offset);
7682 w3[1] = amd_bytealign (w0[1], w0[2], offset);
7683 w3[0] = amd_bytealign (w0[0], w0[1], offset);
7684 w2[3] = amd_bytealign ( 0, w0[0], offset);
7685 w2[2] = 0;
7686 w2[1] = 0;
7687 w2[0] = 0;
7688 w1[3] = 0;
7689 w1[2] = 0;
7690 w1[1] = 0;
7691 w1[0] = 0;
7692 w0[3] = 0;
7693 w0[2] = 0;
7694 w0[1] = 0;
7695 w0[0] = 0;
7696 break;
7697
7698 case 12:
7699 w3[2] = amd_bytealign (w0[1], 0, offset);
7700 w3[1] = amd_bytealign (w0[0], w0[1], offset);
7701 w3[0] = amd_bytealign ( 0, w0[0], offset);
7702 w2[3] = 0;
7703 w2[2] = 0;
7704 w2[1] = 0;
7705 w2[0] = 0;
7706 w1[3] = 0;
7707 w1[2] = 0;
7708 w1[1] = 0;
7709 w1[0] = 0;
7710 w0[3] = 0;
7711 w0[2] = 0;
7712 w0[1] = 0;
7713 w0[0] = 0;
7714 break;
7715
7716 case 13:
7717 w3[2] = amd_bytealign (w0[0], 0, offset);
7718 w3[1] = amd_bytealign ( 0, w0[0], offset);
7719 w3[0] = 0;
7720 w2[3] = 0;
7721 w2[2] = 0;
7722 w2[1] = 0;
7723 w2[0] = 0;
7724 w1[3] = 0;
7725 w1[2] = 0;
7726 w1[1] = 0;
7727 w1[0] = 0;
7728 w0[3] = 0;
7729 w0[2] = 0;
7730 w0[1] = 0;
7731 w0[0] = 0;
7732 break;
7733 }
7734 #endif
7735
7736 #ifdef IS_NV
7737 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
7738
7739 switch (offset / 4)
7740 {
7741 case 0:
7742 w3[1] = __byte_perm (w3[1], w3[0], selector);
7743 w3[0] = __byte_perm (w3[0], w2[3], selector);
7744 w2[3] = __byte_perm (w2[3], w2[2], selector);
7745 w2[2] = __byte_perm (w2[2], w2[1], selector);
7746 w2[1] = __byte_perm (w2[1], w2[0], selector);
7747 w2[0] = __byte_perm (w2[0], w1[3], selector);
7748 w1[3] = __byte_perm (w1[3], w1[2], selector);
7749 w1[2] = __byte_perm (w1[2], w1[1], selector);
7750 w1[1] = __byte_perm (w1[1], w1[0], selector);
7751 w1[0] = __byte_perm (w1[0], w0[3], selector);
7752 w0[3] = __byte_perm (w0[3], w0[2], selector);
7753 w0[2] = __byte_perm (w0[2], w0[1], selector);
7754 w0[1] = __byte_perm (w0[1], w0[0], selector);
7755 w0[0] = __byte_perm (w0[0], 0, selector);
7756 break;
7757
7758 case 1:
7759 w3[1] = __byte_perm (w3[0], w2[3], selector);
7760 w3[0] = __byte_perm (w2[3], w2[2], selector);
7761 w2[3] = __byte_perm (w2[2], w2[1], selector);
7762 w2[2] = __byte_perm (w2[1], w2[0], selector);
7763 w2[1] = __byte_perm (w2[0], w1[3], selector);
7764 w2[0] = __byte_perm (w1[3], w1[2], selector);
7765 w1[3] = __byte_perm (w1[2], w1[1], selector);
7766 w1[2] = __byte_perm (w1[1], w1[0], selector);
7767 w1[1] = __byte_perm (w1[0], w0[3], selector);
7768 w1[0] = __byte_perm (w0[3], w0[2], selector);
7769 w0[3] = __byte_perm (w0[2], w0[1], selector);
7770 w0[2] = __byte_perm (w0[1], w0[0], selector);
7771 w0[1] = __byte_perm (w0[0], 0, selector);
7772 w0[0] = 0;
7773 break;
7774
7775 case 2:
7776 w3[1] = __byte_perm (w2[3], w2[2], selector);
7777 w3[0] = __byte_perm (w2[2], w2[1], selector);
7778 w2[3] = __byte_perm (w2[1], w2[0], selector);
7779 w2[2] = __byte_perm (w2[0], w1[3], selector);
7780 w2[1] = __byte_perm (w1[3], w1[2], selector);
7781 w2[0] = __byte_perm (w1[2], w1[1], selector);
7782 w1[3] = __byte_perm (w1[1], w1[0], selector);
7783 w1[2] = __byte_perm (w1[0], w0[3], selector);
7784 w1[1] = __byte_perm (w0[3], w0[2], selector);
7785 w1[0] = __byte_perm (w0[2], w0[1], selector);
7786 w0[3] = __byte_perm (w0[1], w0[0], selector);
7787 w0[2] = __byte_perm (w0[0], 0, selector);
7788 w0[1] = 0;
7789 w0[0] = 0;
7790 break;
7791
7792 case 3:
7793 w3[1] = __byte_perm (w2[2], w2[1], selector);
7794 w3[0] = __byte_perm (w2[1], w2[0], selector);
7795 w2[3] = __byte_perm (w2[0], w1[3], selector);
7796 w2[2] = __byte_perm (w1[3], w1[2], selector);
7797 w2[1] = __byte_perm (w1[2], w1[1], selector);
7798 w2[0] = __byte_perm (w1[1], w1[0], selector);
7799 w1[3] = __byte_perm (w1[0], w0[3], selector);
7800 w1[2] = __byte_perm (w0[3], w0[2], selector);
7801 w1[1] = __byte_perm (w0[2], w0[1], selector);
7802 w1[0] = __byte_perm (w0[1], w0[0], selector);
7803 w0[3] = __byte_perm (w0[0], 0, selector);
7804 w0[2] = 0;
7805 w0[1] = 0;
7806 w0[0] = 0;
7807 break;
7808
7809 case 4:
7810 w3[1] = __byte_perm (w2[1], w2[0], selector);
7811 w3[0] = __byte_perm (w2[0], w1[3], selector);
7812 w2[3] = __byte_perm (w1[3], w1[2], selector);
7813 w2[2] = __byte_perm (w1[2], w1[1], selector);
7814 w2[1] = __byte_perm (w1[1], w1[0], selector);
7815 w2[0] = __byte_perm (w1[0], w0[3], selector);
7816 w1[3] = __byte_perm (w0[3], w0[2], selector);
7817 w1[2] = __byte_perm (w0[2], w0[1], selector);
7818 w1[1] = __byte_perm (w0[1], w0[0], selector);
7819 w1[0] = __byte_perm (w0[0], 0, selector);
7820 w0[3] = 0;
7821 w0[2] = 0;
7822 w0[1] = 0;
7823 w0[0] = 0;
7824 break;
7825
7826 case 5:
7827 w3[1] = __byte_perm (w2[0], w1[3], selector);
7828 w3[0] = __byte_perm (w1[3], w1[2], selector);
7829 w2[3] = __byte_perm (w1[2], w1[1], selector);
7830 w2[2] = __byte_perm (w1[1], w1[0], selector);
7831 w2[1] = __byte_perm (w1[0], w0[3], selector);
7832 w2[0] = __byte_perm (w0[3], w0[2], selector);
7833 w1[3] = __byte_perm (w0[2], w0[1], selector);
7834 w1[2] = __byte_perm (w0[1], w0[0], selector);
7835 w1[1] = __byte_perm (w0[0], 0, selector);
7836 w1[0] = 0;
7837 w0[3] = 0;
7838 w0[2] = 0;
7839 w0[1] = 0;
7840 w0[0] = 0;
7841 break;
7842
7843 case 6:
7844 w3[1] = __byte_perm (w1[3], w1[2], selector);
7845 w3[0] = __byte_perm (w1[2], w1[1], selector);
7846 w2[3] = __byte_perm (w1[1], w1[0], selector);
7847 w2[2] = __byte_perm (w1[0], w0[3], selector);
7848 w2[1] = __byte_perm (w0[3], w0[2], selector);
7849 w2[0] = __byte_perm (w0[2], w0[1], selector);
7850 w1[3] = __byte_perm (w0[1], w0[0], selector);
7851 w1[2] = __byte_perm (w0[0], 0, selector);
7852 w1[1] = 0;
7853 w1[0] = 0;
7854 w0[3] = 0;
7855 w0[2] = 0;
7856 w0[1] = 0;
7857 w0[0] = 0;
7858 break;
7859
7860 case 7:
7861 w3[1] = __byte_perm (w1[2], w1[1], selector);
7862 w3[0] = __byte_perm (w1[1], w1[0], selector);
7863 w2[3] = __byte_perm (w1[0], w0[3], selector);
7864 w2[2] = __byte_perm (w0[3], w0[2], selector);
7865 w2[1] = __byte_perm (w0[2], w0[1], selector);
7866 w2[0] = __byte_perm (w0[1], w0[0], selector);
7867 w1[3] = __byte_perm (w0[0], 0, selector);
7868 w1[2] = 0;
7869 w1[1] = 0;
7870 w1[0] = 0;
7871 w0[3] = 0;
7872 w0[2] = 0;
7873 w0[1] = 0;
7874 w0[0] = 0;
7875 break;
7876
7877 case 8:
7878 w3[1] = __byte_perm (w1[1], w1[0], selector);
7879 w3[0] = __byte_perm (w1[0], w0[3], selector);
7880 w2[3] = __byte_perm (w0[3], w0[2], selector);
7881 w2[2] = __byte_perm (w0[2], w0[1], selector);
7882 w2[1] = __byte_perm (w0[1], w0[0], selector);
7883 w2[0] = __byte_perm (w0[0], 0, selector);
7884 w1[3] = 0;
7885 w1[2] = 0;
7886 w1[1] = 0;
7887 w1[0] = 0;
7888 w0[3] = 0;
7889 w0[2] = 0;
7890 w0[1] = 0;
7891 w0[0] = 0;
7892 break;
7893
7894 case 9:
7895 w3[1] = __byte_perm (w1[0], w0[3], selector);
7896 w3[0] = __byte_perm (w0[3], w0[2], selector);
7897 w2[3] = __byte_perm (w0[2], w0[1], selector);
7898 w2[2] = __byte_perm (w0[1], w0[0], selector);
7899 w2[1] = __byte_perm (w0[0], 0, selector);
7900 w2[0] = 0;
7901 w1[3] = 0;
7902 w1[2] = 0;
7903 w1[1] = 0;
7904 w1[0] = 0;
7905 w0[3] = 0;
7906 w0[2] = 0;
7907 w0[1] = 0;
7908 w0[0] = 0;
7909 break;
7910
7911 case 10:
7912 w3[1] = __byte_perm (w0[3], w0[2], selector);
7913 w3[0] = __byte_perm (w0[2], w0[1], selector);
7914 w2[3] = __byte_perm (w0[1], w0[0], selector);
7915 w2[2] = __byte_perm (w0[0], 0, selector);
7916 w2[1] = 0;
7917 w2[0] = 0;
7918 w1[3] = 0;
7919 w1[2] = 0;
7920 w1[1] = 0;
7921 w1[0] = 0;
7922 w0[3] = 0;
7923 w0[2] = 0;
7924 w0[1] = 0;
7925 w0[0] = 0;
7926 break;
7927
7928 case 11:
7929 w3[1] = __byte_perm (w0[2], w0[1], selector);
7930 w3[0] = __byte_perm (w0[1], w0[0], selector);
7931 w2[3] = __byte_perm (w0[0], 0, selector);
7932 w2[2] = 0;
7933 w2[1] = 0;
7934 w2[0] = 0;
7935 w1[3] = 0;
7936 w1[2] = 0;
7937 w1[1] = 0;
7938 w1[0] = 0;
7939 w0[3] = 0;
7940 w0[2] = 0;
7941 w0[1] = 0;
7942 w0[0] = 0;
7943 break;
7944
7945 case 12:
7946 w3[1] = __byte_perm (w0[1], w0[0], selector);
7947 w3[0] = __byte_perm (w0[0], 0, selector);
7948 w2[3] = 0;
7949 w2[2] = 0;
7950 w2[1] = 0;
7951 w2[0] = 0;
7952 w1[3] = 0;
7953 w1[2] = 0;
7954 w1[1] = 0;
7955 w1[0] = 0;
7956 w0[3] = 0;
7957 w0[2] = 0;
7958 w0[1] = 0;
7959 w0[0] = 0;
7960 break;
7961
7962 case 13:
7963 w3[1] = __byte_perm (w0[0], 0, selector);
7964 w3[0] = 0;
7965 w2[3] = 0;
7966 w2[2] = 0;
7967 w2[1] = 0;
7968 w2[0] = 0;
7969 w1[3] = 0;
7970 w1[2] = 0;
7971 w1[1] = 0;
7972 w1[0] = 0;
7973 w0[3] = 0;
7974 w0[2] = 0;
7975 w0[1] = 0;
7976 w0[0] = 0;
7977 break;
7978 }
7979 #endif
7980 }