Fix append_* function calls
[hashcat.git] / OpenCL / common.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 static int hash_comp (const u32 d1[4], __global u32 *d2)
7 {
8 if (d1[3] > d2[DGST_R3]) return ( 1);
9 if (d1[3] < d2[DGST_R3]) return (-1);
10 if (d1[2] > d2[DGST_R2]) return ( 1);
11 if (d1[2] < d2[DGST_R2]) return (-1);
12 if (d1[1] > d2[DGST_R1]) return ( 1);
13 if (d1[1] < d2[DGST_R1]) return (-1);
14 if (d1[0] > d2[DGST_R0]) return ( 1);
15 if (d1[0] < d2[DGST_R0]) return (-1);
16
17 return (0);
18 }
19
20 static int find_hash (const u32 digest[4], const u32 digests_cnt, __global digest_t *digests_buf)
21 {
22 for (u32 l = 0, r = digests_cnt; r; r >>= 1)
23 {
24 const u32 m = r >> 1;
25
26 const u32 c = l + m;
27
28 const int cmp = hash_comp (digest, digests_buf[c].digest_buf);
29
30 if (cmp > 0)
31 {
32 l += m + 1;
33
34 r--;
35 }
36
37 if (cmp == 0) return (c);
38 }
39
40 return (-1);
41 }
42
43 static u32 check_bitmap (__global u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest)
44 {
45 return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f)));
46 }
47
48 static u32 check (const u32 digest[2], __global u32 *bitmap_s1_a, __global u32 *bitmap_s1_b, __global u32 *bitmap_s1_c, __global u32 *bitmap_s1_d, __global u32 *bitmap_s2_a, __global u32 *bitmap_s2_b, __global u32 *bitmap_s2_c, __global u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2)
49 {
50 if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0);
51 if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0);
52 if (check_bitmap (bitmap_s1_c, bitmap_mask, bitmap_shift1, digest[2]) == 0) return (0);
53 if (check_bitmap (bitmap_s1_d, bitmap_mask, bitmap_shift1, digest[3]) == 0) return (0);
54
55 if (check_bitmap (bitmap_s2_a, bitmap_mask, bitmap_shift2, digest[0]) == 0) return (0);
56 if (check_bitmap (bitmap_s2_b, bitmap_mask, bitmap_shift2, digest[1]) == 0) return (0);
57 if (check_bitmap (bitmap_s2_c, bitmap_mask, bitmap_shift2, digest[2]) == 0) return (0);
58 if (check_bitmap (bitmap_s2_d, bitmap_mask, bitmap_shift2, digest[3]) == 0) return (0);
59
60 return (1);
61 }
62
63 static void mark_hash (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos)
64 {
65 hashes_shown[hash_pos] = 1;
66
67 plains_buf[hash_pos].gidvid = (gid * 1) + 0;
68 plains_buf[hash_pos].il_pos = il_pos;
69 }
70
71 static void truncate_block (u32 w[4], const u32 len)
72 {
73 switch (len)
74 {
75 case 0: w[0] &= 0;
76 w[1] &= 0;
77 w[2] &= 0;
78 w[3] &= 0;
79 break;
80 case 1: w[0] &= 0x000000FF;
81 w[1] &= 0;
82 w[2] &= 0;
83 w[3] &= 0;
84 break;
85 case 2: w[0] &= 0x0000FFFF;
86 w[1] &= 0;
87 w[2] &= 0;
88 w[3] &= 0;
89 break;
90 case 3: w[0] &= 0x00FFFFFF;
91 w[1] &= 0;
92 w[2] &= 0;
93 w[3] &= 0;
94 break;
95 case 4: w[1] &= 0;
96 w[2] &= 0;
97 w[3] &= 0;
98 break;
99 case 5: w[1] &= 0x000000FF;
100 w[2] &= 0;
101 w[3] &= 0;
102 break;
103 case 6: w[1] &= 0x0000FFFF;
104 w[2] &= 0;
105 w[3] &= 0;
106 break;
107 case 7: w[1] &= 0x00FFFFFF;
108 w[2] &= 0;
109 w[3] &= 0;
110 break;
111 case 8: w[2] &= 0;
112 w[3] &= 0;
113 break;
114 case 9: w[2] &= 0x000000FF;
115 w[3] &= 0;
116 break;
117 case 10: w[2] &= 0x0000FFFF;
118 w[3] &= 0;
119 break;
120 case 11: w[2] &= 0x00FFFFFF;
121 w[3] &= 0;
122 break;
123 case 12: w[3] &= 0;
124 break;
125 case 13: w[3] &= 0x000000FF;
126 break;
127 case 14: w[3] &= 0x0000FFFF;
128 break;
129 case 15: w[3] &= 0x00FFFFFF;
130 break;
131 }
132 }
133
134 static void make_unicode (const u32 in[4], u32 out1[4], u32 out2[4])
135 {
136 #ifdef IS_NV
137 out2[3] = __byte_perm (in[3], 0, 0x7372);
138 out2[2] = __byte_perm (in[3], 0, 0x7170);
139 out2[1] = __byte_perm (in[2], 0, 0x7372);
140 out2[0] = __byte_perm (in[2], 0, 0x7170);
141 out1[3] = __byte_perm (in[1], 0, 0x7372);
142 out1[2] = __byte_perm (in[1], 0, 0x7170);
143 out1[1] = __byte_perm (in[0], 0, 0x7372);
144 out1[0] = __byte_perm (in[0], 0, 0x7170);
145 #endif
146
147 #ifdef IS_AMD
148 out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
149 out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
150 out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
151 out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF);
152 out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF);
153 out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
154 out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
155 out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
156 #endif
157 }
158
159 static void undo_unicode (const u32 in1[4], const u32 in2[4], u32 out[4])
160 {
161 #ifdef IS_NV
162 out[0] = __byte_perm (in1[0], in1[1], 0x6420);
163 out[1] = __byte_perm (in1[2], in1[3], 0x6420);
164 out[2] = __byte_perm (in2[0], in2[1], 0x6420);
165 out[3] = __byte_perm (in2[2], in2[3], 0x6420);
166 #endif
167
168 #ifdef IS_AMD
169 out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
170 | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
171 out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
172 | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8);
173 out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8)
174 | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
175 out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
176 | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
177 #endif
178 }
179
180 // before: append_0x01_1
181 static void append_0x01_1x4 (u32 w0[4], const u32 offset)
182 {
183 switch (offset)
184 {
185 case 0:
186 w0[0] = 0x01;
187 break;
188
189 case 1:
190 w0[0] = w0[0] | 0x0100;
191 break;
192
193 case 2:
194 w0[0] = w0[0] | 0x010000;
195 break;
196
197 case 3:
198 w0[0] = w0[0] | 0x01000000;
199 break;
200
201 case 4:
202 w0[1] = 0x01;
203 break;
204
205 case 5:
206 w0[1] = w0[1] | 0x0100;
207 break;
208
209 case 6:
210 w0[1] = w0[1] | 0x010000;
211 break;
212
213 case 7:
214 w0[1] = w0[1] | 0x01000000;
215 break;
216
217 case 8:
218 w0[2] = 0x01;
219 break;
220
221 case 9:
222 w0[2] = w0[2] | 0x0100;
223 break;
224
225 case 10:
226 w0[2] = w0[2] | 0x010000;
227 break;
228
229 case 11:
230 w0[2] = w0[2] | 0x01000000;
231 break;
232
233 case 12:
234 w0[3] = 0x01;
235 break;
236
237 case 13:
238 w0[3] = w0[3] | 0x0100;
239 break;
240
241 case 14:
242 w0[3] = w0[3] | 0x010000;
243 break;
244
245 case 15:
246 w0[3] = w0[3] | 0x01000000;
247 break;
248 }
249 }
250
251 // before: append_0x01_2
252 static void append_0x01_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
253 {
254 switch (offset)
255 {
256 case 0:
257 w0[0] = 0x01;
258 break;
259
260 case 1:
261 w0[0] = w0[0] | 0x0100;
262 break;
263
264 case 2:
265 w0[0] = w0[0] | 0x010000;
266 break;
267
268 case 3:
269 w0[0] = w0[0] | 0x01000000;
270 break;
271
272 case 4:
273 w0[1] = 0x01;
274 break;
275
276 case 5:
277 w0[1] = w0[1] | 0x0100;
278 break;
279
280 case 6:
281 w0[1] = w0[1] | 0x010000;
282 break;
283
284 case 7:
285 w0[1] = w0[1] | 0x01000000;
286 break;
287
288 case 8:
289 w0[2] = 0x01;
290 break;
291
292 case 9:
293 w0[2] = w0[2] | 0x0100;
294 break;
295
296 case 10:
297 w0[2] = w0[2] | 0x010000;
298 break;
299
300 case 11:
301 w0[2] = w0[2] | 0x01000000;
302 break;
303
304 case 12:
305 w0[3] = 0x01;
306 break;
307
308 case 13:
309 w0[3] = w0[3] | 0x0100;
310 break;
311
312 case 14:
313 w0[3] = w0[3] | 0x010000;
314 break;
315
316 case 15:
317 w0[3] = w0[3] | 0x01000000;
318 break;
319
320 case 16:
321 w1[0] = 0x01;
322 break;
323
324 case 17:
325 w1[0] = w1[0] | 0x0100;
326 break;
327
328 case 18:
329 w1[0] = w1[0] | 0x010000;
330 break;
331
332 case 19:
333 w1[0] = w1[0] | 0x01000000;
334 break;
335
336 case 20:
337 w1[1] = 0x01;
338 break;
339
340 case 21:
341 w1[1] = w1[1] | 0x0100;
342 break;
343
344 case 22:
345 w1[1] = w1[1] | 0x010000;
346 break;
347
348 case 23:
349 w1[1] = w1[1] | 0x01000000;
350 break;
351
352 case 24:
353 w1[2] = 0x01;
354 break;
355
356 case 25:
357 w1[2] = w1[2] | 0x0100;
358 break;
359
360 case 26:
361 w1[2] = w1[2] | 0x010000;
362 break;
363
364 case 27:
365 w1[2] = w1[2] | 0x01000000;
366 break;
367
368 case 28:
369 w1[3] = 0x01;
370 break;
371
372 case 29:
373 w1[3] = w1[3] | 0x0100;
374 break;
375
376 case 30:
377 w1[3] = w1[3] | 0x010000;
378 break;
379
380 case 31:
381 w1[3] = w1[3] | 0x01000000;
382 break;
383 }
384 }
385
386 // before: append_0x01_3
387 static void append_0x01_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
388 {
389 switch (offset)
390 {
391 case 0:
392 w0[0] = 0x01;
393 break;
394
395 case 1:
396 w0[0] = w0[0] | 0x0100;
397 break;
398
399 case 2:
400 w0[0] = w0[0] | 0x010000;
401 break;
402
403 case 3:
404 w0[0] = w0[0] | 0x01000000;
405 break;
406
407 case 4:
408 w0[1] = 0x01;
409 break;
410
411 case 5:
412 w0[1] = w0[1] | 0x0100;
413 break;
414
415 case 6:
416 w0[1] = w0[1] | 0x010000;
417 break;
418
419 case 7:
420 w0[1] = w0[1] | 0x01000000;
421 break;
422
423 case 8:
424 w0[2] = 0x01;
425 break;
426
427 case 9:
428 w0[2] = w0[2] | 0x0100;
429 break;
430
431 case 10:
432 w0[2] = w0[2] | 0x010000;
433 break;
434
435 case 11:
436 w0[2] = w0[2] | 0x01000000;
437 break;
438
439 case 12:
440 w0[3] = 0x01;
441 break;
442
443 case 13:
444 w0[3] = w0[3] | 0x0100;
445 break;
446
447 case 14:
448 w0[3] = w0[3] | 0x010000;
449 break;
450
451 case 15:
452 w0[3] = w0[3] | 0x01000000;
453 break;
454
455 case 16:
456 w1[0] = 0x01;
457 break;
458
459 case 17:
460 w1[0] = w1[0] | 0x0100;
461 break;
462
463 case 18:
464 w1[0] = w1[0] | 0x010000;
465 break;
466
467 case 19:
468 w1[0] = w1[0] | 0x01000000;
469 break;
470
471 case 20:
472 w1[1] = 0x01;
473 break;
474
475 case 21:
476 w1[1] = w1[1] | 0x0100;
477 break;
478
479 case 22:
480 w1[1] = w1[1] | 0x010000;
481 break;
482
483 case 23:
484 w1[1] = w1[1] | 0x01000000;
485 break;
486
487 case 24:
488 w1[2] = 0x01;
489 break;
490
491 case 25:
492 w1[2] = w1[2] | 0x0100;
493 break;
494
495 case 26:
496 w1[2] = w1[2] | 0x010000;
497 break;
498
499 case 27:
500 w1[2] = w1[2] | 0x01000000;
501 break;
502
503 case 28:
504 w1[3] = 0x01;
505 break;
506
507 case 29:
508 w1[3] = w1[3] | 0x0100;
509 break;
510
511 case 30:
512 w1[3] = w1[3] | 0x010000;
513 break;
514
515 case 31:
516 w1[3] = w1[3] | 0x01000000;
517 break;
518
519 case 32:
520 w2[0] = 0x01;
521 break;
522
523 case 33:
524 w2[0] = w2[0] | 0x0100;
525 break;
526
527 case 34:
528 w2[0] = w2[0] | 0x010000;
529 break;
530
531 case 35:
532 w2[0] = w2[0] | 0x01000000;
533 break;
534
535 case 36:
536 w2[1] = 0x01;
537 break;
538
539 case 37:
540 w2[1] = w2[1] | 0x0100;
541 break;
542
543 case 38:
544 w2[1] = w2[1] | 0x010000;
545 break;
546
547 case 39:
548 w2[1] = w2[1] | 0x01000000;
549 break;
550
551 case 40:
552 w2[2] = 0x01;
553 break;
554
555 case 41:
556 w2[2] = w2[2] | 0x0100;
557 break;
558
559 case 42:
560 w2[2] = w2[2] | 0x010000;
561 break;
562
563 case 43:
564 w2[2] = w2[2] | 0x01000000;
565 break;
566
567 case 44:
568 w2[3] = 0x01;
569 break;
570
571 case 45:
572 w2[3] = w2[3] | 0x0100;
573 break;
574
575 case 46:
576 w2[3] = w2[3] | 0x010000;
577 break;
578
579 case 47:
580 w2[3] = w2[3] | 0x01000000;
581 break;
582 }
583 }
584
585 // before: append_0x01_4
586 static void append_0x01_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
587 {
588 switch (offset)
589 {
590 case 0:
591 w0[0] = 0x01;
592 break;
593
594 case 1:
595 w0[0] = w0[0] | 0x0100;
596 break;
597
598 case 2:
599 w0[0] = w0[0] | 0x010000;
600 break;
601
602 case 3:
603 w0[0] = w0[0] | 0x01000000;
604 break;
605
606 case 4:
607 w0[1] = 0x01;
608 break;
609
610 case 5:
611 w0[1] = w0[1] | 0x0100;
612 break;
613
614 case 6:
615 w0[1] = w0[1] | 0x010000;
616 break;
617
618 case 7:
619 w0[1] = w0[1] | 0x01000000;
620 break;
621
622 case 8:
623 w0[2] = 0x01;
624 break;
625
626 case 9:
627 w0[2] = w0[2] | 0x0100;
628 break;
629
630 case 10:
631 w0[2] = w0[2] | 0x010000;
632 break;
633
634 case 11:
635 w0[2] = w0[2] | 0x01000000;
636 break;
637
638 case 12:
639 w0[3] = 0x01;
640 break;
641
642 case 13:
643 w0[3] = w0[3] | 0x0100;
644 break;
645
646 case 14:
647 w0[3] = w0[3] | 0x010000;
648 break;
649
650 case 15:
651 w0[3] = w0[3] | 0x01000000;
652 break;
653
654 case 16:
655 w1[0] = 0x01;
656 break;
657
658 case 17:
659 w1[0] = w1[0] | 0x0100;
660 break;
661
662 case 18:
663 w1[0] = w1[0] | 0x010000;
664 break;
665
666 case 19:
667 w1[0] = w1[0] | 0x01000000;
668 break;
669
670 case 20:
671 w1[1] = 0x01;
672 break;
673
674 case 21:
675 w1[1] = w1[1] | 0x0100;
676 break;
677
678 case 22:
679 w1[1] = w1[1] | 0x010000;
680 break;
681
682 case 23:
683 w1[1] = w1[1] | 0x01000000;
684 break;
685
686 case 24:
687 w1[2] = 0x01;
688 break;
689
690 case 25:
691 w1[2] = w1[2] | 0x0100;
692 break;
693
694 case 26:
695 w1[2] = w1[2] | 0x010000;
696 break;
697
698 case 27:
699 w1[2] = w1[2] | 0x01000000;
700 break;
701
702 case 28:
703 w1[3] = 0x01;
704 break;
705
706 case 29:
707 w1[3] = w1[3] | 0x0100;
708 break;
709
710 case 30:
711 w1[3] = w1[3] | 0x010000;
712 break;
713
714 case 31:
715 w1[3] = w1[3] | 0x01000000;
716 break;
717
718 case 32:
719 w2[0] = 0x01;
720 break;
721
722 case 33:
723 w2[0] = w2[0] | 0x0100;
724 break;
725
726 case 34:
727 w2[0] = w2[0] | 0x010000;
728 break;
729
730 case 35:
731 w2[0] = w2[0] | 0x01000000;
732 break;
733
734 case 36:
735 w2[1] = 0x01;
736 break;
737
738 case 37:
739 w2[1] = w2[1] | 0x0100;
740 break;
741
742 case 38:
743 w2[1] = w2[1] | 0x010000;
744 break;
745
746 case 39:
747 w2[1] = w2[1] | 0x01000000;
748 break;
749
750 case 40:
751 w2[2] = 0x01;
752 break;
753
754 case 41:
755 w2[2] = w2[2] | 0x0100;
756 break;
757
758 case 42:
759 w2[2] = w2[2] | 0x010000;
760 break;
761
762 case 43:
763 w2[2] = w2[2] | 0x01000000;
764 break;
765
766 case 44:
767 w2[3] = 0x01;
768 break;
769
770 case 45:
771 w2[3] = w2[3] | 0x0100;
772 break;
773
774 case 46:
775 w2[3] = w2[3] | 0x010000;
776 break;
777
778 case 47:
779 w2[3] = w2[3] | 0x01000000;
780 break;
781
782 case 48:
783 w3[0] = 0x01;
784 break;
785
786 case 49:
787 w3[0] = w3[0] | 0x0100;
788 break;
789
790 case 50:
791 w3[0] = w3[0] | 0x010000;
792 break;
793
794 case 51:
795 w3[0] = w3[0] | 0x01000000;
796 break;
797
798 case 52:
799 w3[1] = 0x01;
800 break;
801
802 case 53:
803 w3[1] = w3[1] | 0x0100;
804 break;
805
806 case 54:
807 w3[1] = w3[1] | 0x010000;
808 break;
809
810 case 55:
811 w3[1] = w3[1] | 0x01000000;
812 break;
813
814 case 56:
815 w3[2] = 0x01;
816 break;
817
818 case 57:
819 w3[2] = w3[2] | 0x0100;
820 break;
821
822 case 58:
823 w3[2] = w3[2] | 0x010000;
824 break;
825
826 case 59:
827 w3[2] = w3[2] | 0x01000000;
828 break;
829
830 case 60:
831 w3[3] = 0x01;
832 break;
833
834 case 61:
835 w3[3] = w3[3] | 0x0100;
836 break;
837
838 case 62:
839 w3[3] = w3[3] | 0x010000;
840 break;
841
842 case 63:
843 w3[3] = w3[3] | 0x01000000;
844 break;
845 }
846 }
847
848 // before: append_0x01_8
849 static void append_0x01_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
850 {
851 switch (offset)
852 {
853 case 0:
854 w0[0] = 0x01;
855 break;
856
857 case 1:
858 w0[0] = w0[0] | 0x0100;
859 break;
860
861 case 2:
862 w0[0] = w0[0] | 0x010000;
863 break;
864
865 case 3:
866 w0[0] = w0[0] | 0x01000000;
867 break;
868
869 case 4:
870 w0[1] = 0x01;
871 break;
872
873 case 5:
874 w0[1] = w0[1] | 0x0100;
875 break;
876
877 case 6:
878 w0[1] = w0[1] | 0x010000;
879 break;
880
881 case 7:
882 w0[1] = w0[1] | 0x01000000;
883 break;
884
885 case 8:
886 w0[2] = 0x01;
887 break;
888
889 case 9:
890 w0[2] = w0[2] | 0x0100;
891 break;
892
893 case 10:
894 w0[2] = w0[2] | 0x010000;
895 break;
896
897 case 11:
898 w0[2] = w0[2] | 0x01000000;
899 break;
900
901 case 12:
902 w0[3] = 0x01;
903 break;
904
905 case 13:
906 w0[3] = w0[3] | 0x0100;
907 break;
908
909 case 14:
910 w0[3] = w0[3] | 0x010000;
911 break;
912
913 case 15:
914 w0[3] = w0[3] | 0x01000000;
915 break;
916
917 case 16:
918 w1[0] = 0x01;
919 break;
920
921 case 17:
922 w1[0] = w1[0] | 0x0100;
923 break;
924
925 case 18:
926 w1[0] = w1[0] | 0x010000;
927 break;
928
929 case 19:
930 w1[0] = w1[0] | 0x01000000;
931 break;
932
933 case 20:
934 w1[1] = 0x01;
935 break;
936
937 case 21:
938 w1[1] = w1[1] | 0x0100;
939 break;
940
941 case 22:
942 w1[1] = w1[1] | 0x010000;
943 break;
944
945 case 23:
946 w1[1] = w1[1] | 0x01000000;
947 break;
948
949 case 24:
950 w1[2] = 0x01;
951 break;
952
953 case 25:
954 w1[2] = w1[2] | 0x0100;
955 break;
956
957 case 26:
958 w1[2] = w1[2] | 0x010000;
959 break;
960
961 case 27:
962 w1[2] = w1[2] | 0x01000000;
963 break;
964
965 case 28:
966 w1[3] = 0x01;
967 break;
968
969 case 29:
970 w1[3] = w1[3] | 0x0100;
971 break;
972
973 case 30:
974 w1[3] = w1[3] | 0x010000;
975 break;
976
977 case 31:
978 w1[3] = w1[3] | 0x01000000;
979 break;
980
981 case 32:
982 w2[0] = 0x01;
983 break;
984
985 case 33:
986 w2[0] = w2[0] | 0x0100;
987 break;
988
989 case 34:
990 w2[0] = w2[0] | 0x010000;
991 break;
992
993 case 35:
994 w2[0] = w2[0] | 0x01000000;
995 break;
996
997 case 36:
998 w2[1] = 0x01;
999 break;
1000
1001 case 37:
1002 w2[1] = w2[1] | 0x0100;
1003 break;
1004
1005 case 38:
1006 w2[1] = w2[1] | 0x010000;
1007 break;
1008
1009 case 39:
1010 w2[1] = w2[1] | 0x01000000;
1011 break;
1012
1013 case 40:
1014 w2[2] = 0x01;
1015 break;
1016
1017 case 41:
1018 w2[2] = w2[2] | 0x0100;
1019 break;
1020
1021 case 42:
1022 w2[2] = w2[2] | 0x010000;
1023 break;
1024
1025 case 43:
1026 w2[2] = w2[2] | 0x01000000;
1027 break;
1028
1029 case 44:
1030 w2[3] = 0x01;
1031 break;
1032
1033 case 45:
1034 w2[3] = w2[3] | 0x0100;
1035 break;
1036
1037 case 46:
1038 w2[3] = w2[3] | 0x010000;
1039 break;
1040
1041 case 47:
1042 w2[3] = w2[3] | 0x01000000;
1043 break;
1044
1045 case 48:
1046 w3[0] = 0x01;
1047 break;
1048
1049 case 49:
1050 w3[0] = w3[0] | 0x0100;
1051 break;
1052
1053 case 50:
1054 w3[0] = w3[0] | 0x010000;
1055 break;
1056
1057 case 51:
1058 w3[0] = w3[0] | 0x01000000;
1059 break;
1060
1061 case 52:
1062 w3[1] = 0x01;
1063 break;
1064
1065 case 53:
1066 w3[1] = w3[1] | 0x0100;
1067 break;
1068
1069 case 54:
1070 w3[1] = w3[1] | 0x010000;
1071 break;
1072
1073 case 55:
1074 w3[1] = w3[1] | 0x01000000;
1075 break;
1076
1077 case 56:
1078 w3[2] = 0x01;
1079 break;
1080
1081 case 57:
1082 w3[2] = w3[2] | 0x0100;
1083 break;
1084
1085 case 58:
1086 w3[2] = w3[2] | 0x010000;
1087 break;
1088
1089 case 59:
1090 w3[2] = w3[2] | 0x01000000;
1091 break;
1092
1093 case 60:
1094 w3[3] = 0x01;
1095 break;
1096
1097 case 61:
1098 w3[3] = w3[3] | 0x0100;
1099 break;
1100
1101 case 62:
1102 w3[3] = w3[3] | 0x010000;
1103 break;
1104
1105 case 63:
1106 w3[3] = w3[3] | 0x01000000;
1107 break;
1108
1109 case 64:
1110 w4[0] = 0x01;
1111 break;
1112
1113 case 65:
1114 w4[0] = w4[0] | 0x0100;
1115 break;
1116
1117 case 66:
1118 w4[0] = w4[0] | 0x010000;
1119 break;
1120
1121 case 67:
1122 w4[0] = w4[0] | 0x01000000;
1123 break;
1124
1125 case 68:
1126 w4[1] = 0x01;
1127 break;
1128
1129 case 69:
1130 w4[1] = w4[1] | 0x0100;
1131 break;
1132
1133 case 70:
1134 w4[1] = w4[1] | 0x010000;
1135 break;
1136
1137 case 71:
1138 w4[1] = w4[1] | 0x01000000;
1139 break;
1140
1141 case 72:
1142 w4[2] = 0x01;
1143 break;
1144
1145 case 73:
1146 w4[2] = w4[2] | 0x0100;
1147 break;
1148
1149 case 74:
1150 w4[2] = w4[2] | 0x010000;
1151 break;
1152
1153 case 75:
1154 w4[2] = w4[2] | 0x01000000;
1155 break;
1156
1157 case 76:
1158 w4[3] = 0x01;
1159 break;
1160
1161 case 77:
1162 w4[3] = w4[3] | 0x0100;
1163 break;
1164
1165 case 78:
1166 w4[3] = w4[3] | 0x010000;
1167 break;
1168
1169 case 79:
1170 w4[3] = w4[3] | 0x01000000;
1171 break;
1172
1173 case 80:
1174 w5[0] = 0x01;
1175 break;
1176
1177 case 81:
1178 w5[0] = w5[0] | 0x0100;
1179 break;
1180
1181 case 82:
1182 w5[0] = w5[0] | 0x010000;
1183 break;
1184
1185 case 83:
1186 w5[0] = w5[0] | 0x01000000;
1187 break;
1188
1189 case 84:
1190 w5[1] = 0x01;
1191 break;
1192
1193 case 85:
1194 w5[1] = w5[1] | 0x0100;
1195 break;
1196
1197 case 86:
1198 w5[1] = w5[1] | 0x010000;
1199 break;
1200
1201 case 87:
1202 w5[1] = w5[1] | 0x01000000;
1203 break;
1204
1205 case 88:
1206 w5[2] = 0x01;
1207 break;
1208
1209 case 89:
1210 w5[2] = w5[2] | 0x0100;
1211 break;
1212
1213 case 90:
1214 w5[2] = w5[2] | 0x010000;
1215 break;
1216
1217 case 91:
1218 w5[2] = w5[2] | 0x01000000;
1219 break;
1220
1221 case 92:
1222 w5[3] = 0x01;
1223 break;
1224
1225 case 93:
1226 w5[3] = w5[3] | 0x0100;
1227 break;
1228
1229 case 94:
1230 w5[3] = w5[3] | 0x010000;
1231 break;
1232
1233 case 95:
1234 w5[3] = w5[3] | 0x01000000;
1235 break;
1236
1237 case 96:
1238 w6[0] = 0x01;
1239 break;
1240
1241 case 97:
1242 w6[0] = w6[0] | 0x0100;
1243 break;
1244
1245 case 98:
1246 w6[0] = w6[0] | 0x010000;
1247 break;
1248
1249 case 99:
1250 w6[0] = w6[0] | 0x01000000;
1251 break;
1252
1253 case 100:
1254 w6[1] = 0x01;
1255 break;
1256
1257 case 101:
1258 w6[1] = w6[1] | 0x0100;
1259 break;
1260
1261 case 102:
1262 w6[1] = w6[1] | 0x010000;
1263 break;
1264
1265 case 103:
1266 w6[1] = w6[1] | 0x01000000;
1267 break;
1268
1269 case 104:
1270 w6[2] = 0x01;
1271 break;
1272
1273 case 105:
1274 w6[2] = w6[2] | 0x0100;
1275 break;
1276
1277 case 106:
1278 w6[2] = w6[2] | 0x010000;
1279 break;
1280
1281 case 107:
1282 w6[2] = w6[2] | 0x01000000;
1283 break;
1284
1285 case 108:
1286 w6[3] = 0x01;
1287 break;
1288
1289 case 109:
1290 w6[3] = w6[3] | 0x0100;
1291 break;
1292
1293 case 110:
1294 w6[3] = w6[3] | 0x010000;
1295 break;
1296
1297 case 111:
1298 w6[3] = w6[3] | 0x01000000;
1299 break;
1300
1301 case 112:
1302 w7[0] = 0x01;
1303 break;
1304
1305 case 113:
1306 w7[0] = w7[0] | 0x0100;
1307 break;
1308
1309 case 114:
1310 w7[0] = w7[0] | 0x010000;
1311 break;
1312
1313 case 115:
1314 w7[0] = w7[0] | 0x01000000;
1315 break;
1316
1317 case 116:
1318 w7[1] = 0x01;
1319 break;
1320
1321 case 117:
1322 w7[1] = w7[1] | 0x0100;
1323 break;
1324
1325 case 118:
1326 w7[1] = w7[1] | 0x010000;
1327 break;
1328
1329 case 119:
1330 w7[1] = w7[1] | 0x01000000;
1331 break;
1332
1333 case 120:
1334 w7[2] = 0x01;
1335 break;
1336
1337 case 121:
1338 w7[2] = w7[2] | 0x0100;
1339 break;
1340
1341 case 122:
1342 w7[2] = w7[2] | 0x010000;
1343 break;
1344
1345 case 123:
1346 w7[2] = w7[2] | 0x01000000;
1347 break;
1348
1349 case 124:
1350 w7[3] = 0x01;
1351 break;
1352
1353 case 125:
1354 w7[3] = w7[3] | 0x0100;
1355 break;
1356
1357 case 126:
1358 w7[3] = w7[3] | 0x010000;
1359 break;
1360
1361 case 127:
1362 w7[3] = w7[3] | 0x01000000;
1363 break;
1364 }
1365 }
1366
1367 // before: append_0x02_1
1368 static void append_0x02_1x4 (u32 w0[4], const u32 offset)
1369 {
1370 switch (offset)
1371 {
1372 case 0:
1373 w0[0] = 0x02;
1374 break;
1375
1376 case 1:
1377 w0[0] = w0[0] | 0x0200;
1378 break;
1379
1380 case 2:
1381 w0[0] = w0[0] | 0x020000;
1382 break;
1383
1384 case 3:
1385 w0[0] = w0[0] | 0x02000000;
1386 break;
1387
1388 case 4:
1389 w0[1] = 0x02;
1390 break;
1391
1392 case 5:
1393 w0[1] = w0[1] | 0x0200;
1394 break;
1395
1396 case 6:
1397 w0[1] = w0[1] | 0x020000;
1398 break;
1399
1400 case 7:
1401 w0[1] = w0[1] | 0x02000000;
1402 break;
1403
1404 case 8:
1405 w0[2] = 0x02;
1406 break;
1407
1408 case 9:
1409 w0[2] = w0[2] | 0x0200;
1410 break;
1411
1412 case 10:
1413 w0[2] = w0[2] | 0x020000;
1414 break;
1415
1416 case 11:
1417 w0[2] = w0[2] | 0x02000000;
1418 break;
1419
1420 case 12:
1421 w0[3] = 0x02;
1422 break;
1423
1424 case 13:
1425 w0[3] = w0[3] | 0x0200;
1426 break;
1427
1428 case 14:
1429 w0[3] = w0[3] | 0x020000;
1430 break;
1431
1432 case 15:
1433 w0[3] = w0[3] | 0x02000000;
1434 break;
1435 }
1436 }
1437
1438 // before: append_0x02_2
1439 static void append_0x02_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
1440 {
1441 switch (offset)
1442 {
1443 case 0:
1444 w0[0] = 0x02;
1445 break;
1446
1447 case 1:
1448 w0[0] = w0[0] | 0x0200;
1449 break;
1450
1451 case 2:
1452 w0[0] = w0[0] | 0x020000;
1453 break;
1454
1455 case 3:
1456 w0[0] = w0[0] | 0x02000000;
1457 break;
1458
1459 case 4:
1460 w0[1] = 0x02;
1461 break;
1462
1463 case 5:
1464 w0[1] = w0[1] | 0x0200;
1465 break;
1466
1467 case 6:
1468 w0[1] = w0[1] | 0x020000;
1469 break;
1470
1471 case 7:
1472 w0[1] = w0[1] | 0x02000000;
1473 break;
1474
1475 case 8:
1476 w0[2] = 0x02;
1477 break;
1478
1479 case 9:
1480 w0[2] = w0[2] | 0x0200;
1481 break;
1482
1483 case 10:
1484 w0[2] = w0[2] | 0x020000;
1485 break;
1486
1487 case 11:
1488 w0[2] = w0[2] | 0x02000000;
1489 break;
1490
1491 case 12:
1492 w0[3] = 0x02;
1493 break;
1494
1495 case 13:
1496 w0[3] = w0[3] | 0x0200;
1497 break;
1498
1499 case 14:
1500 w0[3] = w0[3] | 0x020000;
1501 break;
1502
1503 case 15:
1504 w0[3] = w0[3] | 0x02000000;
1505 break;
1506
1507 case 16:
1508 w1[0] = 0x02;
1509 break;
1510
1511 case 17:
1512 w1[0] = w1[0] | 0x0200;
1513 break;
1514
1515 case 18:
1516 w1[0] = w1[0] | 0x020000;
1517 break;
1518
1519 case 19:
1520 w1[0] = w1[0] | 0x02000000;
1521 break;
1522
1523 case 20:
1524 w1[1] = 0x02;
1525 break;
1526
1527 case 21:
1528 w1[1] = w1[1] | 0x0200;
1529 break;
1530
1531 case 22:
1532 w1[1] = w1[1] | 0x020000;
1533 break;
1534
1535 case 23:
1536 w1[1] = w1[1] | 0x02000000;
1537 break;
1538
1539 case 24:
1540 w1[2] = 0x02;
1541 break;
1542
1543 case 25:
1544 w1[2] = w1[2] | 0x0200;
1545 break;
1546
1547 case 26:
1548 w1[2] = w1[2] | 0x020000;
1549 break;
1550
1551 case 27:
1552 w1[2] = w1[2] | 0x02000000;
1553 break;
1554
1555 case 28:
1556 w1[3] = 0x02;
1557 break;
1558
1559 case 29:
1560 w1[3] = w1[3] | 0x0200;
1561 break;
1562
1563 case 30:
1564 w1[3] = w1[3] | 0x020000;
1565 break;
1566
1567 case 31:
1568 w1[3] = w1[3] | 0x02000000;
1569 break;
1570 }
1571 }
1572
1573 // before: append_0x02_3
1574 static void append_0x02_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
1575 {
1576 switch (offset)
1577 {
1578 case 0:
1579 w0[0] = 0x02;
1580 break;
1581
1582 case 1:
1583 w0[0] = w0[0] | 0x0200;
1584 break;
1585
1586 case 2:
1587 w0[0] = w0[0] | 0x020000;
1588 break;
1589
1590 case 3:
1591 w0[0] = w0[0] | 0x02000000;
1592 break;
1593
1594 case 4:
1595 w0[1] = 0x02;
1596 break;
1597
1598 case 5:
1599 w0[1] = w0[1] | 0x0200;
1600 break;
1601
1602 case 6:
1603 w0[1] = w0[1] | 0x020000;
1604 break;
1605
1606 case 7:
1607 w0[1] = w0[1] | 0x02000000;
1608 break;
1609
1610 case 8:
1611 w0[2] = 0x02;
1612 break;
1613
1614 case 9:
1615 w0[2] = w0[2] | 0x0200;
1616 break;
1617
1618 case 10:
1619 w0[2] = w0[2] | 0x020000;
1620 break;
1621
1622 case 11:
1623 w0[2] = w0[2] | 0x02000000;
1624 break;
1625
1626 case 12:
1627 w0[3] = 0x02;
1628 break;
1629
1630 case 13:
1631 w0[3] = w0[3] | 0x0200;
1632 break;
1633
1634 case 14:
1635 w0[3] = w0[3] | 0x020000;
1636 break;
1637
1638 case 15:
1639 w0[3] = w0[3] | 0x02000000;
1640 break;
1641
1642 case 16:
1643 w1[0] = 0x02;
1644 break;
1645
1646 case 17:
1647 w1[0] = w1[0] | 0x0200;
1648 break;
1649
1650 case 18:
1651 w1[0] = w1[0] | 0x020000;
1652 break;
1653
1654 case 19:
1655 w1[0] = w1[0] | 0x02000000;
1656 break;
1657
1658 case 20:
1659 w1[1] = 0x02;
1660 break;
1661
1662 case 21:
1663 w1[1] = w1[1] | 0x0200;
1664 break;
1665
1666 case 22:
1667 w1[1] = w1[1] | 0x020000;
1668 break;
1669
1670 case 23:
1671 w1[1] = w1[1] | 0x02000000;
1672 break;
1673
1674 case 24:
1675 w1[2] = 0x02;
1676 break;
1677
1678 case 25:
1679 w1[2] = w1[2] | 0x0200;
1680 break;
1681
1682 case 26:
1683 w1[2] = w1[2] | 0x020000;
1684 break;
1685
1686 case 27:
1687 w1[2] = w1[2] | 0x02000000;
1688 break;
1689
1690 case 28:
1691 w1[3] = 0x02;
1692 break;
1693
1694 case 29:
1695 w1[3] = w1[3] | 0x0200;
1696 break;
1697
1698 case 30:
1699 w1[3] = w1[3] | 0x020000;
1700 break;
1701
1702 case 31:
1703 w1[3] = w1[3] | 0x02000000;
1704 break;
1705
1706 case 32:
1707 w2[0] = 0x02;
1708 break;
1709
1710 case 33:
1711 w2[0] = w2[0] | 0x0200;
1712 break;
1713
1714 case 34:
1715 w2[0] = w2[0] | 0x020000;
1716 break;
1717
1718 case 35:
1719 w2[0] = w2[0] | 0x02000000;
1720 break;
1721
1722 case 36:
1723 w2[1] = 0x02;
1724 break;
1725
1726 case 37:
1727 w2[1] = w2[1] | 0x0200;
1728 break;
1729
1730 case 38:
1731 w2[1] = w2[1] | 0x020000;
1732 break;
1733
1734 case 39:
1735 w2[1] = w2[1] | 0x02000000;
1736 break;
1737
1738 case 40:
1739 w2[2] = 0x02;
1740 break;
1741
1742 case 41:
1743 w2[2] = w2[2] | 0x0200;
1744 break;
1745
1746 case 42:
1747 w2[2] = w2[2] | 0x020000;
1748 break;
1749
1750 case 43:
1751 w2[2] = w2[2] | 0x02000000;
1752 break;
1753
1754 case 44:
1755 w2[3] = 0x02;
1756 break;
1757
1758 case 45:
1759 w2[3] = w2[3] | 0x0200;
1760 break;
1761
1762 case 46:
1763 w2[3] = w2[3] | 0x020000;
1764 break;
1765
1766 case 47:
1767 w2[3] = w2[3] | 0x02000000;
1768 break;
1769 }
1770 }
1771
1772 // before: append_0x02_4
1773 static void append_0x02_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
1774 {
1775 switch (offset)
1776 {
1777 case 0:
1778 w0[0] = 0x02;
1779 break;
1780
1781 case 1:
1782 w0[0] = w0[0] | 0x0200;
1783 break;
1784
1785 case 2:
1786 w0[0] = w0[0] | 0x020000;
1787 break;
1788
1789 case 3:
1790 w0[0] = w0[0] | 0x02000000;
1791 break;
1792
1793 case 4:
1794 w0[1] = 0x02;
1795 break;
1796
1797 case 5:
1798 w0[1] = w0[1] | 0x0200;
1799 break;
1800
1801 case 6:
1802 w0[1] = w0[1] | 0x020000;
1803 break;
1804
1805 case 7:
1806 w0[1] = w0[1] | 0x02000000;
1807 break;
1808
1809 case 8:
1810 w0[2] = 0x02;
1811 break;
1812
1813 case 9:
1814 w0[2] = w0[2] | 0x0200;
1815 break;
1816
1817 case 10:
1818 w0[2] = w0[2] | 0x020000;
1819 break;
1820
1821 case 11:
1822 w0[2] = w0[2] | 0x02000000;
1823 break;
1824
1825 case 12:
1826 w0[3] = 0x02;
1827 break;
1828
1829 case 13:
1830 w0[3] = w0[3] | 0x0200;
1831 break;
1832
1833 case 14:
1834 w0[3] = w0[3] | 0x020000;
1835 break;
1836
1837 case 15:
1838 w0[3] = w0[3] | 0x02000000;
1839 break;
1840
1841 case 16:
1842 w1[0] = 0x02;
1843 break;
1844
1845 case 17:
1846 w1[0] = w1[0] | 0x0200;
1847 break;
1848
1849 case 18:
1850 w1[0] = w1[0] | 0x020000;
1851 break;
1852
1853 case 19:
1854 w1[0] = w1[0] | 0x02000000;
1855 break;
1856
1857 case 20:
1858 w1[1] = 0x02;
1859 break;
1860
1861 case 21:
1862 w1[1] = w1[1] | 0x0200;
1863 break;
1864
1865 case 22:
1866 w1[1] = w1[1] | 0x020000;
1867 break;
1868
1869 case 23:
1870 w1[1] = w1[1] | 0x02000000;
1871 break;
1872
1873 case 24:
1874 w1[2] = 0x02;
1875 break;
1876
1877 case 25:
1878 w1[2] = w1[2] | 0x0200;
1879 break;
1880
1881 case 26:
1882 w1[2] = w1[2] | 0x020000;
1883 break;
1884
1885 case 27:
1886 w1[2] = w1[2] | 0x02000000;
1887 break;
1888
1889 case 28:
1890 w1[3] = 0x02;
1891 break;
1892
1893 case 29:
1894 w1[3] = w1[3] | 0x0200;
1895 break;
1896
1897 case 30:
1898 w1[3] = w1[3] | 0x020000;
1899 break;
1900
1901 case 31:
1902 w1[3] = w1[3] | 0x02000000;
1903 break;
1904
1905 case 32:
1906 w2[0] = 0x02;
1907 break;
1908
1909 case 33:
1910 w2[0] = w2[0] | 0x0200;
1911 break;
1912
1913 case 34:
1914 w2[0] = w2[0] | 0x020000;
1915 break;
1916
1917 case 35:
1918 w2[0] = w2[0] | 0x02000000;
1919 break;
1920
1921 case 36:
1922 w2[1] = 0x02;
1923 break;
1924
1925 case 37:
1926 w2[1] = w2[1] | 0x0200;
1927 break;
1928
1929 case 38:
1930 w2[1] = w2[1] | 0x020000;
1931 break;
1932
1933 case 39:
1934 w2[1] = w2[1] | 0x02000000;
1935 break;
1936
1937 case 40:
1938 w2[2] = 0x02;
1939 break;
1940
1941 case 41:
1942 w2[2] = w2[2] | 0x0200;
1943 break;
1944
1945 case 42:
1946 w2[2] = w2[2] | 0x020000;
1947 break;
1948
1949 case 43:
1950 w2[2] = w2[2] | 0x02000000;
1951 break;
1952
1953 case 44:
1954 w2[3] = 0x02;
1955 break;
1956
1957 case 45:
1958 w2[3] = w2[3] | 0x0200;
1959 break;
1960
1961 case 46:
1962 w2[3] = w2[3] | 0x020000;
1963 break;
1964
1965 case 47:
1966 w2[3] = w2[3] | 0x02000000;
1967 break;
1968
1969 case 48:
1970 w3[0] = 0x02;
1971 break;
1972
1973 case 49:
1974 w3[0] = w3[0] | 0x0200;
1975 break;
1976
1977 case 50:
1978 w3[0] = w3[0] | 0x020000;
1979 break;
1980
1981 case 51:
1982 w3[0] = w3[0] | 0x02000000;
1983 break;
1984
1985 case 52:
1986 w3[1] = 0x02;
1987 break;
1988
1989 case 53:
1990 w3[1] = w3[1] | 0x0200;
1991 break;
1992
1993 case 54:
1994 w3[1] = w3[1] | 0x020000;
1995 break;
1996
1997 case 55:
1998 w3[1] = w3[1] | 0x02000000;
1999 break;
2000
2001 case 56:
2002 w3[2] = 0x02;
2003 break;
2004
2005 case 57:
2006 w3[2] = w3[2] | 0x0200;
2007 break;
2008
2009 case 58:
2010 w3[2] = w3[2] | 0x020000;
2011 break;
2012
2013 case 59:
2014 w3[2] = w3[2] | 0x02000000;
2015 break;
2016
2017 case 60:
2018 w3[3] = 0x02;
2019 break;
2020
2021 case 61:
2022 w3[3] = w3[3] | 0x0200;
2023 break;
2024
2025 case 62:
2026 w3[3] = w3[3] | 0x020000;
2027 break;
2028
2029 case 63:
2030 w3[3] = w3[3] | 0x02000000;
2031 break;
2032 }
2033 }
2034
2035 // before: append_0x02_8
2036 static void append_0x02_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
2037 {
2038 switch (offset)
2039 {
2040 case 0:
2041 w0[0] = 0x02;
2042 break;
2043
2044 case 1:
2045 w0[0] = w0[0] | 0x0200;
2046 break;
2047
2048 case 2:
2049 w0[0] = w0[0] | 0x020000;
2050 break;
2051
2052 case 3:
2053 w0[0] = w0[0] | 0x02000000;
2054 break;
2055
2056 case 4:
2057 w0[1] = 0x02;
2058 break;
2059
2060 case 5:
2061 w0[1] = w0[1] | 0x0200;
2062 break;
2063
2064 case 6:
2065 w0[1] = w0[1] | 0x020000;
2066 break;
2067
2068 case 7:
2069 w0[1] = w0[1] | 0x02000000;
2070 break;
2071
2072 case 8:
2073 w0[2] = 0x02;
2074 break;
2075
2076 case 9:
2077 w0[2] = w0[2] | 0x0200;
2078 break;
2079
2080 case 10:
2081 w0[2] = w0[2] | 0x020000;
2082 break;
2083
2084 case 11:
2085 w0[2] = w0[2] | 0x02000000;
2086 break;
2087
2088 case 12:
2089 w0[3] = 0x02;
2090 break;
2091
2092 case 13:
2093 w0[3] = w0[3] | 0x0200;
2094 break;
2095
2096 case 14:
2097 w0[3] = w0[3] | 0x020000;
2098 break;
2099
2100 case 15:
2101 w0[3] = w0[3] | 0x02000000;
2102 break;
2103
2104 case 16:
2105 w1[0] = 0x02;
2106 break;
2107
2108 case 17:
2109 w1[0] = w1[0] | 0x0200;
2110 break;
2111
2112 case 18:
2113 w1[0] = w1[0] | 0x020000;
2114 break;
2115
2116 case 19:
2117 w1[0] = w1[0] | 0x02000000;
2118 break;
2119
2120 case 20:
2121 w1[1] = 0x02;
2122 break;
2123
2124 case 21:
2125 w1[1] = w1[1] | 0x0200;
2126 break;
2127
2128 case 22:
2129 w1[1] = w1[1] | 0x020000;
2130 break;
2131
2132 case 23:
2133 w1[1] = w1[1] | 0x02000000;
2134 break;
2135
2136 case 24:
2137 w1[2] = 0x02;
2138 break;
2139
2140 case 25:
2141 w1[2] = w1[2] | 0x0200;
2142 break;
2143
2144 case 26:
2145 w1[2] = w1[2] | 0x020000;
2146 break;
2147
2148 case 27:
2149 w1[2] = w1[2] | 0x02000000;
2150 break;
2151
2152 case 28:
2153 w1[3] = 0x02;
2154 break;
2155
2156 case 29:
2157 w1[3] = w1[3] | 0x0200;
2158 break;
2159
2160 case 30:
2161 w1[3] = w1[3] | 0x020000;
2162 break;
2163
2164 case 31:
2165 w1[3] = w1[3] | 0x02000000;
2166 break;
2167
2168 case 32:
2169 w2[0] = 0x02;
2170 break;
2171
2172 case 33:
2173 w2[0] = w2[0] | 0x0200;
2174 break;
2175
2176 case 34:
2177 w2[0] = w2[0] | 0x020000;
2178 break;
2179
2180 case 35:
2181 w2[0] = w2[0] | 0x02000000;
2182 break;
2183
2184 case 36:
2185 w2[1] = 0x02;
2186 break;
2187
2188 case 37:
2189 w2[1] = w2[1] | 0x0200;
2190 break;
2191
2192 case 38:
2193 w2[1] = w2[1] | 0x020000;
2194 break;
2195
2196 case 39:
2197 w2[1] = w2[1] | 0x02000000;
2198 break;
2199
2200 case 40:
2201 w2[2] = 0x02;
2202 break;
2203
2204 case 41:
2205 w2[2] = w2[2] | 0x0200;
2206 break;
2207
2208 case 42:
2209 w2[2] = w2[2] | 0x020000;
2210 break;
2211
2212 case 43:
2213 w2[2] = w2[2] | 0x02000000;
2214 break;
2215
2216 case 44:
2217 w2[3] = 0x02;
2218 break;
2219
2220 case 45:
2221 w2[3] = w2[3] | 0x0200;
2222 break;
2223
2224 case 46:
2225 w2[3] = w2[3] | 0x020000;
2226 break;
2227
2228 case 47:
2229 w2[3] = w2[3] | 0x02000000;
2230 break;
2231
2232 case 48:
2233 w3[0] = 0x02;
2234 break;
2235
2236 case 49:
2237 w3[0] = w3[0] | 0x0200;
2238 break;
2239
2240 case 50:
2241 w3[0] = w3[0] | 0x020000;
2242 break;
2243
2244 case 51:
2245 w3[0] = w3[0] | 0x02000000;
2246 break;
2247
2248 case 52:
2249 w3[1] = 0x02;
2250 break;
2251
2252 case 53:
2253 w3[1] = w3[1] | 0x0200;
2254 break;
2255
2256 case 54:
2257 w3[1] = w3[1] | 0x020000;
2258 break;
2259
2260 case 55:
2261 w3[1] = w3[1] | 0x02000000;
2262 break;
2263
2264 case 56:
2265 w3[2] = 0x02;
2266 break;
2267
2268 case 57:
2269 w3[2] = w3[2] | 0x0200;
2270 break;
2271
2272 case 58:
2273 w3[2] = w3[2] | 0x020000;
2274 break;
2275
2276 case 59:
2277 w3[2] = w3[2] | 0x02000000;
2278 break;
2279
2280 case 60:
2281 w3[3] = 0x02;
2282 break;
2283
2284 case 61:
2285 w3[3] = w3[3] | 0x0200;
2286 break;
2287
2288 case 62:
2289 w3[3] = w3[3] | 0x020000;
2290 break;
2291
2292 case 63:
2293 w3[3] = w3[3] | 0x02000000;
2294 break;
2295
2296 case 64:
2297 w4[0] = 0x02;
2298 break;
2299
2300 case 65:
2301 w4[0] = w4[0] | 0x0200;
2302 break;
2303
2304 case 66:
2305 w4[0] = w4[0] | 0x020000;
2306 break;
2307
2308 case 67:
2309 w4[0] = w4[0] | 0x02000000;
2310 break;
2311
2312 case 68:
2313 w4[1] = 0x02;
2314 break;
2315
2316 case 69:
2317 w4[1] = w4[1] | 0x0200;
2318 break;
2319
2320 case 70:
2321 w4[1] = w4[1] | 0x020000;
2322 break;
2323
2324 case 71:
2325 w4[1] = w4[1] | 0x02000000;
2326 break;
2327
2328 case 72:
2329 w4[2] = 0x02;
2330 break;
2331
2332 case 73:
2333 w4[2] = w4[2] | 0x0200;
2334 break;
2335
2336 case 74:
2337 w4[2] = w4[2] | 0x020000;
2338 break;
2339
2340 case 75:
2341 w4[2] = w4[2] | 0x02000000;
2342 break;
2343
2344 case 76:
2345 w4[3] = 0x02;
2346 break;
2347
2348 case 77:
2349 w4[3] = w4[3] | 0x0200;
2350 break;
2351
2352 case 78:
2353 w4[3] = w4[3] | 0x020000;
2354 break;
2355
2356 case 79:
2357 w4[3] = w4[3] | 0x02000000;
2358 break;
2359
2360 case 80:
2361 w5[0] = 0x02;
2362 break;
2363
2364 case 81:
2365 w5[0] = w5[0] | 0x0200;
2366 break;
2367
2368 case 82:
2369 w5[0] = w5[0] | 0x020000;
2370 break;
2371
2372 case 83:
2373 w5[0] = w5[0] | 0x02000000;
2374 break;
2375
2376 case 84:
2377 w5[1] = 0x02;
2378 break;
2379
2380 case 85:
2381 w5[1] = w5[1] | 0x0200;
2382 break;
2383
2384 case 86:
2385 w5[1] = w5[1] | 0x020000;
2386 break;
2387
2388 case 87:
2389 w5[1] = w5[1] | 0x02000000;
2390 break;
2391
2392 case 88:
2393 w5[2] = 0x02;
2394 break;
2395
2396 case 89:
2397 w5[2] = w5[2] | 0x0200;
2398 break;
2399
2400 case 90:
2401 w5[2] = w5[2] | 0x020000;
2402 break;
2403
2404 case 91:
2405 w5[2] = w5[2] | 0x02000000;
2406 break;
2407
2408 case 92:
2409 w5[3] = 0x02;
2410 break;
2411
2412 case 93:
2413 w5[3] = w5[3] | 0x0200;
2414 break;
2415
2416 case 94:
2417 w5[3] = w5[3] | 0x020000;
2418 break;
2419
2420 case 95:
2421 w5[3] = w5[3] | 0x02000000;
2422 break;
2423
2424 case 96:
2425 w6[0] = 0x02;
2426 break;
2427
2428 case 97:
2429 w6[0] = w6[0] | 0x0200;
2430 break;
2431
2432 case 98:
2433 w6[0] = w6[0] | 0x020000;
2434 break;
2435
2436 case 99:
2437 w6[0] = w6[0] | 0x02000000;
2438 break;
2439
2440 case 100:
2441 w6[1] = 0x02;
2442 break;
2443
2444 case 101:
2445 w6[1] = w6[1] | 0x0200;
2446 break;
2447
2448 case 102:
2449 w6[1] = w6[1] | 0x020000;
2450 break;
2451
2452 case 103:
2453 w6[1] = w6[1] | 0x02000000;
2454 break;
2455
2456 case 104:
2457 w6[2] = 0x02;
2458 break;
2459
2460 case 105:
2461 w6[2] = w6[2] | 0x0200;
2462 break;
2463
2464 case 106:
2465 w6[2] = w6[2] | 0x020000;
2466 break;
2467
2468 case 107:
2469 w6[2] = w6[2] | 0x02000000;
2470 break;
2471
2472 case 108:
2473 w6[3] = 0x02;
2474 break;
2475
2476 case 109:
2477 w6[3] = w6[3] | 0x0200;
2478 break;
2479
2480 case 110:
2481 w6[3] = w6[3] | 0x020000;
2482 break;
2483
2484 case 111:
2485 w6[3] = w6[3] | 0x02000000;
2486 break;
2487
2488 case 112:
2489 w7[0] = 0x02;
2490 break;
2491
2492 case 113:
2493 w7[0] = w7[0] | 0x0200;
2494 break;
2495
2496 case 114:
2497 w7[0] = w7[0] | 0x020000;
2498 break;
2499
2500 case 115:
2501 w7[0] = w7[0] | 0x02000000;
2502 break;
2503
2504 case 116:
2505 w7[1] = 0x02;
2506 break;
2507
2508 case 117:
2509 w7[1] = w7[1] | 0x0200;
2510 break;
2511
2512 case 118:
2513 w7[1] = w7[1] | 0x020000;
2514 break;
2515
2516 case 119:
2517 w7[1] = w7[1] | 0x02000000;
2518 break;
2519
2520 case 120:
2521 w7[2] = 0x02;
2522 break;
2523
2524 case 121:
2525 w7[2] = w7[2] | 0x0200;
2526 break;
2527
2528 case 122:
2529 w7[2] = w7[2] | 0x020000;
2530 break;
2531
2532 case 123:
2533 w7[2] = w7[2] | 0x02000000;
2534 break;
2535
2536 case 124:
2537 w7[3] = 0x02;
2538 break;
2539
2540 case 125:
2541 w7[3] = w7[3] | 0x0200;
2542 break;
2543
2544 case 126:
2545 w7[3] = w7[3] | 0x020000;
2546 break;
2547
2548 case 127:
2549 w7[3] = w7[3] | 0x02000000;
2550 break;
2551 }
2552 }
2553
2554 // before: append_0x80_1
2555 static void append_0x80_1x4 (u32 w0[4], const u32 offset)
2556 {
2557 switch (offset)
2558 {
2559 case 0:
2560 w0[0] = 0x80;
2561 break;
2562
2563 case 1:
2564 w0[0] = w0[0] | 0x8000;
2565 break;
2566
2567 case 2:
2568 w0[0] = w0[0] | 0x800000;
2569 break;
2570
2571 case 3:
2572 w0[0] = w0[0] | 0x80000000;
2573 break;
2574
2575 case 4:
2576 w0[1] = 0x80;
2577 break;
2578
2579 case 5:
2580 w0[1] = w0[1] | 0x8000;
2581 break;
2582
2583 case 6:
2584 w0[1] = w0[1] | 0x800000;
2585 break;
2586
2587 case 7:
2588 w0[1] = w0[1] | 0x80000000;
2589 break;
2590
2591 case 8:
2592 w0[2] = 0x80;
2593 break;
2594
2595 case 9:
2596 w0[2] = w0[2] | 0x8000;
2597 break;
2598
2599 case 10:
2600 w0[2] = w0[2] | 0x800000;
2601 break;
2602
2603 case 11:
2604 w0[2] = w0[2] | 0x80000000;
2605 break;
2606
2607 case 12:
2608 w0[3] = 0x80;
2609 break;
2610
2611 case 13:
2612 w0[3] = w0[3] | 0x8000;
2613 break;
2614
2615 case 14:
2616 w0[3] = w0[3] | 0x800000;
2617 break;
2618
2619 case 15:
2620 w0[3] = w0[3] | 0x80000000;
2621 break;
2622 }
2623 }
2624
2625 // before: append_0x80_2
2626 static void append_0x80_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
2627 {
2628 switch (offset)
2629 {
2630 case 0:
2631 w0[0] = 0x80;
2632 break;
2633
2634 case 1:
2635 w0[0] = w0[0] | 0x8000;
2636 break;
2637
2638 case 2:
2639 w0[0] = w0[0] | 0x800000;
2640 break;
2641
2642 case 3:
2643 w0[0] = w0[0] | 0x80000000;
2644 break;
2645
2646 case 4:
2647 w0[1] = 0x80;
2648 break;
2649
2650 case 5:
2651 w0[1] = w0[1] | 0x8000;
2652 break;
2653
2654 case 6:
2655 w0[1] = w0[1] | 0x800000;
2656 break;
2657
2658 case 7:
2659 w0[1] = w0[1] | 0x80000000;
2660 break;
2661
2662 case 8:
2663 w0[2] = 0x80;
2664 break;
2665
2666 case 9:
2667 w0[2] = w0[2] | 0x8000;
2668 break;
2669
2670 case 10:
2671 w0[2] = w0[2] | 0x800000;
2672 break;
2673
2674 case 11:
2675 w0[2] = w0[2] | 0x80000000;
2676 break;
2677
2678 case 12:
2679 w0[3] = 0x80;
2680 break;
2681
2682 case 13:
2683 w0[3] = w0[3] | 0x8000;
2684 break;
2685
2686 case 14:
2687 w0[3] = w0[3] | 0x800000;
2688 break;
2689
2690 case 15:
2691 w0[3] = w0[3] | 0x80000000;
2692 break;
2693
2694 case 16:
2695 w1[0] = 0x80;
2696 break;
2697
2698 case 17:
2699 w1[0] = w1[0] | 0x8000;
2700 break;
2701
2702 case 18:
2703 w1[0] = w1[0] | 0x800000;
2704 break;
2705
2706 case 19:
2707 w1[0] = w1[0] | 0x80000000;
2708 break;
2709
2710 case 20:
2711 w1[1] = 0x80;
2712 break;
2713
2714 case 21:
2715 w1[1] = w1[1] | 0x8000;
2716 break;
2717
2718 case 22:
2719 w1[1] = w1[1] | 0x800000;
2720 break;
2721
2722 case 23:
2723 w1[1] = w1[1] | 0x80000000;
2724 break;
2725
2726 case 24:
2727 w1[2] = 0x80;
2728 break;
2729
2730 case 25:
2731 w1[2] = w1[2] | 0x8000;
2732 break;
2733
2734 case 26:
2735 w1[2] = w1[2] | 0x800000;
2736 break;
2737
2738 case 27:
2739 w1[2] = w1[2] | 0x80000000;
2740 break;
2741
2742 case 28:
2743 w1[3] = 0x80;
2744 break;
2745
2746 case 29:
2747 w1[3] = w1[3] | 0x8000;
2748 break;
2749
2750 case 30:
2751 w1[3] = w1[3] | 0x800000;
2752 break;
2753
2754 case 31:
2755 w1[3] = w1[3] | 0x80000000;
2756 break;
2757 }
2758 }
2759
2760 // before: append_0x80_3
2761 static void append_0x80_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
2762 {
2763 switch (offset)
2764 {
2765 case 0:
2766 w0[0] = 0x80;
2767 break;
2768
2769 case 1:
2770 w0[0] = w0[0] | 0x8000;
2771 break;
2772
2773 case 2:
2774 w0[0] = w0[0] | 0x800000;
2775 break;
2776
2777 case 3:
2778 w0[0] = w0[0] | 0x80000000;
2779 break;
2780
2781 case 4:
2782 w0[1] = 0x80;
2783 break;
2784
2785 case 5:
2786 w0[1] = w0[1] | 0x8000;
2787 break;
2788
2789 case 6:
2790 w0[1] = w0[1] | 0x800000;
2791 break;
2792
2793 case 7:
2794 w0[1] = w0[1] | 0x80000000;
2795 break;
2796
2797 case 8:
2798 w0[2] = 0x80;
2799 break;
2800
2801 case 9:
2802 w0[2] = w0[2] | 0x8000;
2803 break;
2804
2805 case 10:
2806 w0[2] = w0[2] | 0x800000;
2807 break;
2808
2809 case 11:
2810 w0[2] = w0[2] | 0x80000000;
2811 break;
2812
2813 case 12:
2814 w0[3] = 0x80;
2815 break;
2816
2817 case 13:
2818 w0[3] = w0[3] | 0x8000;
2819 break;
2820
2821 case 14:
2822 w0[3] = w0[3] | 0x800000;
2823 break;
2824
2825 case 15:
2826 w0[3] = w0[3] | 0x80000000;
2827 break;
2828
2829 case 16:
2830 w1[0] = 0x80;
2831 break;
2832
2833 case 17:
2834 w1[0] = w1[0] | 0x8000;
2835 break;
2836
2837 case 18:
2838 w1[0] = w1[0] | 0x800000;
2839 break;
2840
2841 case 19:
2842 w1[0] = w1[0] | 0x80000000;
2843 break;
2844
2845 case 20:
2846 w1[1] = 0x80;
2847 break;
2848
2849 case 21:
2850 w1[1] = w1[1] | 0x8000;
2851 break;
2852
2853 case 22:
2854 w1[1] = w1[1] | 0x800000;
2855 break;
2856
2857 case 23:
2858 w1[1] = w1[1] | 0x80000000;
2859 break;
2860
2861 case 24:
2862 w1[2] = 0x80;
2863 break;
2864
2865 case 25:
2866 w1[2] = w1[2] | 0x8000;
2867 break;
2868
2869 case 26:
2870 w1[2] = w1[2] | 0x800000;
2871 break;
2872
2873 case 27:
2874 w1[2] = w1[2] | 0x80000000;
2875 break;
2876
2877 case 28:
2878 w1[3] = 0x80;
2879 break;
2880
2881 case 29:
2882 w1[3] = w1[3] | 0x8000;
2883 break;
2884
2885 case 30:
2886 w1[3] = w1[3] | 0x800000;
2887 break;
2888
2889 case 31:
2890 w1[3] = w1[3] | 0x80000000;
2891 break;
2892
2893 case 32:
2894 w2[0] = 0x80;
2895 break;
2896
2897 case 33:
2898 w2[0] = w2[0] | 0x8000;
2899 break;
2900
2901 case 34:
2902 w2[0] = w2[0] | 0x800000;
2903 break;
2904
2905 case 35:
2906 w2[0] = w2[0] | 0x80000000;
2907 break;
2908
2909 case 36:
2910 w2[1] = 0x80;
2911 break;
2912
2913 case 37:
2914 w2[1] = w2[1] | 0x8000;
2915 break;
2916
2917 case 38:
2918 w2[1] = w2[1] | 0x800000;
2919 break;
2920
2921 case 39:
2922 w2[1] = w2[1] | 0x80000000;
2923 break;
2924
2925 case 40:
2926 w2[2] = 0x80;
2927 break;
2928
2929 case 41:
2930 w2[2] = w2[2] | 0x8000;
2931 break;
2932
2933 case 42:
2934 w2[2] = w2[2] | 0x800000;
2935 break;
2936
2937 case 43:
2938 w2[2] = w2[2] | 0x80000000;
2939 break;
2940
2941 case 44:
2942 w2[3] = 0x80;
2943 break;
2944
2945 case 45:
2946 w2[3] = w2[3] | 0x8000;
2947 break;
2948
2949 case 46:
2950 w2[3] = w2[3] | 0x800000;
2951 break;
2952
2953 case 47:
2954 w2[3] = w2[3] | 0x80000000;
2955 break;
2956 }
2957 }
2958
2959 // before: append_0x80_4
2960 static void append_0x80_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
2961 {
2962 switch (offset)
2963 {
2964 case 0:
2965 w0[0] = 0x80;
2966 break;
2967
2968 case 1:
2969 w0[0] = w0[0] | 0x8000;
2970 break;
2971
2972 case 2:
2973 w0[0] = w0[0] | 0x800000;
2974 break;
2975
2976 case 3:
2977 w0[0] = w0[0] | 0x80000000;
2978 break;
2979
2980 case 4:
2981 w0[1] = 0x80;
2982 break;
2983
2984 case 5:
2985 w0[1] = w0[1] | 0x8000;
2986 break;
2987
2988 case 6:
2989 w0[1] = w0[1] | 0x800000;
2990 break;
2991
2992 case 7:
2993 w0[1] = w0[1] | 0x80000000;
2994 break;
2995
2996 case 8:
2997 w0[2] = 0x80;
2998 break;
2999
3000 case 9:
3001 w0[2] = w0[2] | 0x8000;
3002 break;
3003
3004 case 10:
3005 w0[2] = w0[2] | 0x800000;
3006 break;
3007
3008 case 11:
3009 w0[2] = w0[2] | 0x80000000;
3010 break;
3011
3012 case 12:
3013 w0[3] = 0x80;
3014 break;
3015
3016 case 13:
3017 w0[3] = w0[3] | 0x8000;
3018 break;
3019
3020 case 14:
3021 w0[3] = w0[3] | 0x800000;
3022 break;
3023
3024 case 15:
3025 w0[3] = w0[3] | 0x80000000;
3026 break;
3027
3028 case 16:
3029 w1[0] = 0x80;
3030 break;
3031
3032 case 17:
3033 w1[0] = w1[0] | 0x8000;
3034 break;
3035
3036 case 18:
3037 w1[0] = w1[0] | 0x800000;
3038 break;
3039
3040 case 19:
3041 w1[0] = w1[0] | 0x80000000;
3042 break;
3043
3044 case 20:
3045 w1[1] = 0x80;
3046 break;
3047
3048 case 21:
3049 w1[1] = w1[1] | 0x8000;
3050 break;
3051
3052 case 22:
3053 w1[1] = w1[1] | 0x800000;
3054 break;
3055
3056 case 23:
3057 w1[1] = w1[1] | 0x80000000;
3058 break;
3059
3060 case 24:
3061 w1[2] = 0x80;
3062 break;
3063
3064 case 25:
3065 w1[2] = w1[2] | 0x8000;
3066 break;
3067
3068 case 26:
3069 w1[2] = w1[2] | 0x800000;
3070 break;
3071
3072 case 27:
3073 w1[2] = w1[2] | 0x80000000;
3074 break;
3075
3076 case 28:
3077 w1[3] = 0x80;
3078 break;
3079
3080 case 29:
3081 w1[3] = w1[3] | 0x8000;
3082 break;
3083
3084 case 30:
3085 w1[3] = w1[3] | 0x800000;
3086 break;
3087
3088 case 31:
3089 w1[3] = w1[3] | 0x80000000;
3090 break;
3091
3092 case 32:
3093 w2[0] = 0x80;
3094 break;
3095
3096 case 33:
3097 w2[0] = w2[0] | 0x8000;
3098 break;
3099
3100 case 34:
3101 w2[0] = w2[0] | 0x800000;
3102 break;
3103
3104 case 35:
3105 w2[0] = w2[0] | 0x80000000;
3106 break;
3107
3108 case 36:
3109 w2[1] = 0x80;
3110 break;
3111
3112 case 37:
3113 w2[1] = w2[1] | 0x8000;
3114 break;
3115
3116 case 38:
3117 w2[1] = w2[1] | 0x800000;
3118 break;
3119
3120 case 39:
3121 w2[1] = w2[1] | 0x80000000;
3122 break;
3123
3124 case 40:
3125 w2[2] = 0x80;
3126 break;
3127
3128 case 41:
3129 w2[2] = w2[2] | 0x8000;
3130 break;
3131
3132 case 42:
3133 w2[2] = w2[2] | 0x800000;
3134 break;
3135
3136 case 43:
3137 w2[2] = w2[2] | 0x80000000;
3138 break;
3139
3140 case 44:
3141 w2[3] = 0x80;
3142 break;
3143
3144 case 45:
3145 w2[3] = w2[3] | 0x8000;
3146 break;
3147
3148 case 46:
3149 w2[3] = w2[3] | 0x800000;
3150 break;
3151
3152 case 47:
3153 w2[3] = w2[3] | 0x80000000;
3154 break;
3155
3156 case 48:
3157 w3[0] = 0x80;
3158 break;
3159
3160 case 49:
3161 w3[0] = w3[0] | 0x8000;
3162 break;
3163
3164 case 50:
3165 w3[0] = w3[0] | 0x800000;
3166 break;
3167
3168 case 51:
3169 w3[0] = w3[0] | 0x80000000;
3170 break;
3171
3172 case 52:
3173 w3[1] = 0x80;
3174 break;
3175
3176 case 53:
3177 w3[1] = w3[1] | 0x8000;
3178 break;
3179
3180 case 54:
3181 w3[1] = w3[1] | 0x800000;
3182 break;
3183
3184 case 55:
3185 w3[1] = w3[1] | 0x80000000;
3186 break;
3187
3188 case 56:
3189 w3[2] = 0x80;
3190 break;
3191
3192 case 57:
3193 w3[2] = w3[2] | 0x8000;
3194 break;
3195
3196 case 58:
3197 w3[2] = w3[2] | 0x800000;
3198 break;
3199
3200 case 59:
3201 w3[2] = w3[2] | 0x80000000;
3202 break;
3203
3204 case 60:
3205 w3[3] = 0x80;
3206 break;
3207
3208 case 61:
3209 w3[3] = w3[3] | 0x8000;
3210 break;
3211
3212 case 62:
3213 w3[3] = w3[3] | 0x800000;
3214 break;
3215
3216 case 63:
3217 w3[3] = w3[3] | 0x80000000;
3218 break;
3219 }
3220 }
3221
3222 // before: append_0x80_8
3223 static void append_0x80_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
3224 {
3225 switch (offset)
3226 {
3227 case 0:
3228 w0[0] = 0x80;
3229 break;
3230
3231 case 1:
3232 w0[0] = w0[0] | 0x8000;
3233 break;
3234
3235 case 2:
3236 w0[0] = w0[0] | 0x800000;
3237 break;
3238
3239 case 3:
3240 w0[0] = w0[0] | 0x80000000;
3241 break;
3242
3243 case 4:
3244 w0[1] = 0x80;
3245 break;
3246
3247 case 5:
3248 w0[1] = w0[1] | 0x8000;
3249 break;
3250
3251 case 6:
3252 w0[1] = w0[1] | 0x800000;
3253 break;
3254
3255 case 7:
3256 w0[1] = w0[1] | 0x80000000;
3257 break;
3258
3259 case 8:
3260 w0[2] = 0x80;
3261 break;
3262
3263 case 9:
3264 w0[2] = w0[2] | 0x8000;
3265 break;
3266
3267 case 10:
3268 w0[2] = w0[2] | 0x800000;
3269 break;
3270
3271 case 11:
3272 w0[2] = w0[2] | 0x80000000;
3273 break;
3274
3275 case 12:
3276 w0[3] = 0x80;
3277 break;
3278
3279 case 13:
3280 w0[3] = w0[3] | 0x8000;
3281 break;
3282
3283 case 14:
3284 w0[3] = w0[3] | 0x800000;
3285 break;
3286
3287 case 15:
3288 w0[3] = w0[3] | 0x80000000;
3289 break;
3290
3291 case 16:
3292 w1[0] = 0x80;
3293 break;
3294
3295 case 17:
3296 w1[0] = w1[0] | 0x8000;
3297 break;
3298
3299 case 18:
3300 w1[0] = w1[0] | 0x800000;
3301 break;
3302
3303 case 19:
3304 w1[0] = w1[0] | 0x80000000;
3305 break;
3306
3307 case 20:
3308 w1[1] = 0x80;
3309 break;
3310
3311 case 21:
3312 w1[1] = w1[1] | 0x8000;
3313 break;
3314
3315 case 22:
3316 w1[1] = w1[1] | 0x800000;
3317 break;
3318
3319 case 23:
3320 w1[1] = w1[1] | 0x80000000;
3321 break;
3322
3323 case 24:
3324 w1[2] = 0x80;
3325 break;
3326
3327 case 25:
3328 w1[2] = w1[2] | 0x8000;
3329 break;
3330
3331 case 26:
3332 w1[2] = w1[2] | 0x800000;
3333 break;
3334
3335 case 27:
3336 w1[2] = w1[2] | 0x80000000;
3337 break;
3338
3339 case 28:
3340 w1[3] = 0x80;
3341 break;
3342
3343 case 29:
3344 w1[3] = w1[3] | 0x8000;
3345 break;
3346
3347 case 30:
3348 w1[3] = w1[3] | 0x800000;
3349 break;
3350
3351 case 31:
3352 w1[3] = w1[3] | 0x80000000;
3353 break;
3354
3355 case 32:
3356 w2[0] = 0x80;
3357 break;
3358
3359 case 33:
3360 w2[0] = w2[0] | 0x8000;
3361 break;
3362
3363 case 34:
3364 w2[0] = w2[0] | 0x800000;
3365 break;
3366
3367 case 35:
3368 w2[0] = w2[0] | 0x80000000;
3369 break;
3370
3371 case 36:
3372 w2[1] = 0x80;
3373 break;
3374
3375 case 37:
3376 w2[1] = w2[1] | 0x8000;
3377 break;
3378
3379 case 38:
3380 w2[1] = w2[1] | 0x800000;
3381 break;
3382
3383 case 39:
3384 w2[1] = w2[1] | 0x80000000;
3385 break;
3386
3387 case 40:
3388 w2[2] = 0x80;
3389 break;
3390
3391 case 41:
3392 w2[2] = w2[2] | 0x8000;
3393 break;
3394
3395 case 42:
3396 w2[2] = w2[2] | 0x800000;
3397 break;
3398
3399 case 43:
3400 w2[2] = w2[2] | 0x80000000;
3401 break;
3402
3403 case 44:
3404 w2[3] = 0x80;
3405 break;
3406
3407 case 45:
3408 w2[3] = w2[3] | 0x8000;
3409 break;
3410
3411 case 46:
3412 w2[3] = w2[3] | 0x800000;
3413 break;
3414
3415 case 47:
3416 w2[3] = w2[3] | 0x80000000;
3417 break;
3418
3419 case 48:
3420 w3[0] = 0x80;
3421 break;
3422
3423 case 49:
3424 w3[0] = w3[0] | 0x8000;
3425 break;
3426
3427 case 50:
3428 w3[0] = w3[0] | 0x800000;
3429 break;
3430
3431 case 51:
3432 w3[0] = w3[0] | 0x80000000;
3433 break;
3434
3435 case 52:
3436 w3[1] = 0x80;
3437 break;
3438
3439 case 53:
3440 w3[1] = w3[1] | 0x8000;
3441 break;
3442
3443 case 54:
3444 w3[1] = w3[1] | 0x800000;
3445 break;
3446
3447 case 55:
3448 w3[1] = w3[1] | 0x80000000;
3449 break;
3450
3451 case 56:
3452 w3[2] = 0x80;
3453 break;
3454
3455 case 57:
3456 w3[2] = w3[2] | 0x8000;
3457 break;
3458
3459 case 58:
3460 w3[2] = w3[2] | 0x800000;
3461 break;
3462
3463 case 59:
3464 w3[2] = w3[2] | 0x80000000;
3465 break;
3466
3467 case 60:
3468 w3[3] = 0x80;
3469 break;
3470
3471 case 61:
3472 w3[3] = w3[3] | 0x8000;
3473 break;
3474
3475 case 62:
3476 w3[3] = w3[3] | 0x800000;
3477 break;
3478
3479 case 63:
3480 w3[3] = w3[3] | 0x80000000;
3481 break;
3482
3483 case 64:
3484 w4[0] = 0x80;
3485 break;
3486
3487 case 65:
3488 w4[0] = w4[0] | 0x8000;
3489 break;
3490
3491 case 66:
3492 w4[0] = w4[0] | 0x800000;
3493 break;
3494
3495 case 67:
3496 w4[0] = w4[0] | 0x80000000;
3497 break;
3498
3499 case 68:
3500 w4[1] = 0x80;
3501 break;
3502
3503 case 69:
3504 w4[1] = w4[1] | 0x8000;
3505 break;
3506
3507 case 70:
3508 w4[1] = w4[1] | 0x800000;
3509 break;
3510
3511 case 71:
3512 w4[1] = w4[1] | 0x80000000;
3513 break;
3514
3515 case 72:
3516 w4[2] = 0x80;
3517 break;
3518
3519 case 73:
3520 w4[2] = w4[2] | 0x8000;
3521 break;
3522
3523 case 74:
3524 w4[2] = w4[2] | 0x800000;
3525 break;
3526
3527 case 75:
3528 w4[2] = w4[2] | 0x80000000;
3529 break;
3530
3531 case 76:
3532 w4[3] = 0x80;
3533 break;
3534
3535 case 77:
3536 w4[3] = w4[3] | 0x8000;
3537 break;
3538
3539 case 78:
3540 w4[3] = w4[3] | 0x800000;
3541 break;
3542
3543 case 79:
3544 w4[3] = w4[3] | 0x80000000;
3545 break;
3546
3547 case 80:
3548 w5[0] = 0x80;
3549 break;
3550
3551 case 81:
3552 w5[0] = w5[0] | 0x8000;
3553 break;
3554
3555 case 82:
3556 w5[0] = w5[0] | 0x800000;
3557 break;
3558
3559 case 83:
3560 w5[0] = w5[0] | 0x80000000;
3561 break;
3562
3563 case 84:
3564 w5[1] = 0x80;
3565 break;
3566
3567 case 85:
3568 w5[1] = w5[1] | 0x8000;
3569 break;
3570
3571 case 86:
3572 w5[1] = w5[1] | 0x800000;
3573 break;
3574
3575 case 87:
3576 w5[1] = w5[1] | 0x80000000;
3577 break;
3578
3579 case 88:
3580 w5[2] = 0x80;
3581 break;
3582
3583 case 89:
3584 w5[2] = w5[2] | 0x8000;
3585 break;
3586
3587 case 90:
3588 w5[2] = w5[2] | 0x800000;
3589 break;
3590
3591 case 91:
3592 w5[2] = w5[2] | 0x80000000;
3593 break;
3594
3595 case 92:
3596 w5[3] = 0x80;
3597 break;
3598
3599 case 93:
3600 w5[3] = w5[3] | 0x8000;
3601 break;
3602
3603 case 94:
3604 w5[3] = w5[3] | 0x800000;
3605 break;
3606
3607 case 95:
3608 w5[3] = w5[3] | 0x80000000;
3609 break;
3610
3611 case 96:
3612 w6[0] = 0x80;
3613 break;
3614
3615 case 97:
3616 w6[0] = w6[0] | 0x8000;
3617 break;
3618
3619 case 98:
3620 w6[0] = w6[0] | 0x800000;
3621 break;
3622
3623 case 99:
3624 w6[0] = w6[0] | 0x80000000;
3625 break;
3626
3627 case 100:
3628 w6[1] = 0x80;
3629 break;
3630
3631 case 101:
3632 w6[1] = w6[1] | 0x8000;
3633 break;
3634
3635 case 102:
3636 w6[1] = w6[1] | 0x800000;
3637 break;
3638
3639 case 103:
3640 w6[1] = w6[1] | 0x80000000;
3641 break;
3642
3643 case 104:
3644 w6[2] = 0x80;
3645 break;
3646
3647 case 105:
3648 w6[2] = w6[2] | 0x8000;
3649 break;
3650
3651 case 106:
3652 w6[2] = w6[2] | 0x800000;
3653 break;
3654
3655 case 107:
3656 w6[2] = w6[2] | 0x80000000;
3657 break;
3658
3659 case 108:
3660 w6[3] = 0x80;
3661 break;
3662
3663 case 109:
3664 w6[3] = w6[3] | 0x8000;
3665 break;
3666
3667 case 110:
3668 w6[3] = w6[3] | 0x800000;
3669 break;
3670
3671 case 111:
3672 w6[3] = w6[3] | 0x80000000;
3673 break;
3674
3675 case 112:
3676 w7[0] = 0x80;
3677 break;
3678
3679 case 113:
3680 w7[0] = w7[0] | 0x8000;
3681 break;
3682
3683 case 114:
3684 w7[0] = w7[0] | 0x800000;
3685 break;
3686
3687 case 115:
3688 w7[0] = w7[0] | 0x80000000;
3689 break;
3690
3691 case 116:
3692 w7[1] = 0x80;
3693 break;
3694
3695 case 117:
3696 w7[1] = w7[1] | 0x8000;
3697 break;
3698
3699 case 118:
3700 w7[1] = w7[1] | 0x800000;
3701 break;
3702
3703 case 119:
3704 w7[1] = w7[1] | 0x80000000;
3705 break;
3706
3707 case 120:
3708 w7[2] = 0x80;
3709 break;
3710
3711 case 121:
3712 w7[2] = w7[2] | 0x8000;
3713 break;
3714
3715 case 122:
3716 w7[2] = w7[2] | 0x800000;
3717 break;
3718
3719 case 123:
3720 w7[2] = w7[2] | 0x80000000;
3721 break;
3722
3723 case 124:
3724 w7[3] = 0x80;
3725 break;
3726
3727 case 125:
3728 w7[3] = w7[3] | 0x8000;
3729 break;
3730
3731 case 126:
3732 w7[3] = w7[3] | 0x800000;
3733 break;
3734
3735 case 127:
3736 w7[3] = w7[3] | 0x80000000;
3737 break;
3738 }
3739 }
3740
3741 // before: device_memcat2L
3742 static void memcat_c7_d1x2_sl1x2_sr1x2 (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2])
3743 {
3744 switch (offset)
3745 {
3746 case 1:
3747 dst0[0] = src_l0[0] | src_r0[0] << 8;
3748 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
3749 break;
3750
3751 case 2:
3752 dst0[0] = src_l0[0] | src_r0[0] << 16;
3753 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
3754 break;
3755
3756 case 3:
3757 dst0[0] = src_l0[0] | src_r0[0] << 24;
3758 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
3759 break;
3760
3761 case 4:
3762 dst0[1] = src_r0[0];
3763 break;
3764
3765 case 5:
3766 dst0[1] = src_l0[1] | src_r0[0] << 8;
3767 break;
3768
3769 case 6:
3770 dst0[1] = src_l0[1] | src_r0[0] << 16;
3771 break;
3772
3773 case 7:
3774 dst0[1] = src_l0[1] | src_r0[0] << 24;
3775 break;
3776 }
3777 }
3778
3779 // before: device_memcat4L
3780 static void memcat_c15_d1x4_sl1x4_sr1x4 (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4])
3781 {
3782 switch (offset)
3783 {
3784 case 1:
3785 dst0[0] = src_l0[0] | src_r0[0] << 8;
3786 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
3787 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
3788 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
3789 break;
3790
3791 case 2:
3792 dst0[0] = src_l0[0] | src_r0[0] << 16;
3793 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
3794 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
3795 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
3796 break;
3797
3798 case 3:
3799 dst0[0] = src_l0[0] | src_r0[0] << 24;
3800 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
3801 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
3802 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
3803 break;
3804
3805 case 4:
3806 dst0[1] = src_r0[0];
3807 dst0[2] = src_r0[1];
3808 dst0[3] = src_r0[2];
3809 break;
3810
3811 case 5:
3812 dst0[1] = src_l0[1] | src_r0[0] << 8;
3813 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
3814 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
3815 break;
3816
3817 case 6:
3818 dst0[1] = src_l0[1] | src_r0[0] << 16;
3819 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
3820 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
3821 break;
3822
3823 case 7:
3824 dst0[1] = src_l0[1] | src_r0[0] << 24;
3825 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
3826 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
3827 break;
3828
3829 case 8:
3830 dst0[2] = src_r0[0];
3831 dst0[3] = src_r0[1];
3832 break;
3833
3834 case 9:
3835 dst0[2] = src_l0[2] | src_r0[0] << 8;
3836 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
3837 break;
3838
3839 case 10:
3840 dst0[2] = src_l0[2] | src_r0[0] << 16;
3841 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
3842 break;
3843
3844 case 11:
3845 dst0[2] = src_l0[2] | src_r0[0] << 24;
3846 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
3847 break;
3848
3849 case 12:
3850 dst0[3] = src_r0[0];
3851 break;
3852
3853 case 13:
3854 dst0[3] = src_l0[3] | src_r0[0] << 8;
3855 break;
3856
3857 case 14:
3858 dst0[3] = src_l0[3] | src_r0[0] << 16;
3859 break;
3860
3861 case 15:
3862 dst0[3] = src_l0[3] | src_r0[0] << 24;
3863 break;
3864 }
3865 }
3866
3867 // before: device_memcat8L
3868 static void memcat_c31_d2x4_sl2x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4])
3869 {
3870 switch (offset)
3871 {
3872 case 1:
3873 dst0[0] = src_l0[0] | src_r0[0] << 8;
3874 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
3875 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
3876 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
3877 dst1[0] = src_r0[3] >> 24;
3878 break;
3879
3880 case 2:
3881 dst0[0] = src_l0[0] | src_r0[0] << 16;
3882 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
3883 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
3884 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
3885 dst1[0] = src_r0[3] >> 16;
3886 break;
3887
3888 case 3:
3889 dst0[0] = src_l0[0] | src_r0[0] << 24;
3890 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
3891 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
3892 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
3893 dst1[0] = src_r0[3] >> 8;
3894 break;
3895
3896 case 4:
3897 dst0[1] = src_r0[0];
3898 dst0[2] = src_r0[1];
3899 dst0[3] = src_r0[2];
3900 dst1[0] = src_r0[3];
3901 break;
3902
3903 case 5:
3904 dst0[1] = src_l0[1] | src_r0[0] << 8;
3905 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
3906 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
3907 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
3908 dst1[1] = src_r0[3] >> 24;
3909 break;
3910
3911 case 6:
3912 dst0[1] = src_l0[1] | src_r0[0] << 16;
3913 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
3914 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
3915 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
3916 dst1[1] = src_r0[3] >> 16;
3917 break;
3918
3919 case 7:
3920 dst0[1] = src_l0[1] | src_r0[0] << 24;
3921 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
3922 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
3923 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
3924 dst1[1] = src_r0[3] >> 8;
3925 break;
3926
3927 case 8:
3928 dst0[2] = src_r0[0];
3929 dst0[3] = src_r0[1];
3930 dst1[0] = src_r0[2];
3931 dst1[1] = src_r0[3];
3932 break;
3933
3934 case 9:
3935 dst0[2] = src_l0[2] | src_r0[0] << 8;
3936 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
3937 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
3938 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
3939 dst1[2] = src_r0[3] >> 24;
3940 break;
3941
3942 case 10:
3943 dst0[2] = src_l0[2] | src_r0[0] << 16;
3944 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
3945 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
3946 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
3947 dst1[2] = src_r0[3] >> 16;
3948 break;
3949
3950 case 11:
3951 dst0[2] = src_l0[2] | src_r0[0] << 24;
3952 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
3953 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
3954 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
3955 dst1[2] = src_r0[3] >> 8;
3956 break;
3957
3958 case 12:
3959 dst0[3] = src_r0[0];
3960 dst1[0] = src_r0[1];
3961 dst1[1] = src_r0[2];
3962 dst1[2] = src_r0[3];
3963 break;
3964
3965 case 13:
3966 dst0[3] = src_l0[3] | src_r0[0] << 8;
3967 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
3968 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
3969 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
3970 dst1[3] = src_r0[3] >> 24;
3971 break;
3972
3973 case 14:
3974 dst0[3] = src_l0[3] | src_r0[0] << 16;
3975 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
3976 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
3977 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
3978 dst1[3] = src_r0[3] >> 16;
3979 break;
3980
3981 case 15:
3982 dst0[3] = src_l0[3] | src_r0[0] << 24;
3983 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
3984 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
3985 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
3986 dst1[3] = src_r0[3] >> 8;
3987 break;
3988
3989 case 16:
3990 dst1[0] = src_r0[0];
3991 dst1[1] = src_r0[1];
3992 dst1[2] = src_r0[2];
3993 dst1[3] = src_r0[3];
3994 break;
3995
3996 case 17:
3997 dst1[0] = src_l1[0] | src_r0[0] << 8;
3998 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
3999 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4000 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4001 break;
4002
4003 case 18:
4004 dst1[0] = src_l1[0] | src_r0[0] << 16;
4005 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4006 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4007 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4008 break;
4009
4010 case 19:
4011 dst1[0] = src_l1[0] | src_r0[0] << 24;
4012 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4013 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4014 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4015 break;
4016
4017 case 20:
4018 dst1[1] = src_r0[0];
4019 dst1[2] = src_r0[1];
4020 dst1[3] = src_r0[2];
4021 break;
4022
4023 case 21:
4024 dst1[1] = src_l1[1] | src_r0[0] << 8;
4025 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4026 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4027 break;
4028
4029 case 22:
4030 dst1[1] = src_l1[1] | src_r0[0] << 16;
4031 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4032 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4033 break;
4034
4035 case 23:
4036 dst1[1] = src_l1[1] | src_r0[0] << 24;
4037 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4038 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4039 break;
4040
4041 case 24:
4042 dst1[2] = src_r0[0];
4043 dst1[3] = src_r0[1];
4044 break;
4045
4046 case 25:
4047 dst1[2] = src_l1[2] | src_r0[0] << 8;
4048 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4049 break;
4050
4051 case 26:
4052 dst1[2] = src_l1[2] | src_r0[0] << 16;
4053 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4054 break;
4055
4056 case 27:
4057 dst1[2] = src_l1[2] | src_r0[0] << 24;
4058 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4059 break;
4060
4061 case 28:
4062 dst1[3] = src_r0[0];
4063 break;
4064
4065 case 29:
4066 dst1[3] = src_l1[3] | src_r0[0] << 8;
4067 break;
4068
4069 case 30:
4070 dst1[3] = src_l1[3] | src_r0[0] << 16;
4071 break;
4072
4073 case 31:
4074 dst1[3] = src_l1[3] | src_r0[0] << 24;
4075 break;
4076 }
4077 }
4078
4079 // before: device_memcat12L
4080 static void memcat_c47_d3x4_sl3x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4])
4081 {
4082 switch (offset)
4083 {
4084 case 1:
4085 dst0[0] = src_l0[0] | src_r0[0] << 8;
4086 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4087 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4088 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4089 dst1[0] = src_r0[3] >> 24;
4090 break;
4091
4092 case 2:
4093 dst0[0] = src_l0[0] | src_r0[0] << 16;
4094 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4095 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4096 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4097 dst1[0] = src_r0[3] >> 16;
4098 break;
4099
4100 case 3:
4101 dst0[0] = src_l0[0] | src_r0[0] << 24;
4102 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4103 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4104 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4105 dst1[0] = src_r0[3] >> 8;
4106 break;
4107
4108 case 4:
4109 dst0[1] = src_r0[0];
4110 dst0[2] = src_r0[1];
4111 dst0[3] = src_r0[2];
4112 dst1[0] = src_r0[3];
4113 break;
4114
4115 case 5:
4116 dst0[1] = src_l0[1] | src_r0[0] << 8;
4117 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4118 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4119 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
4120 dst1[1] = src_r0[3] >> 24;
4121 break;
4122
4123 case 6:
4124 dst0[1] = src_l0[1] | src_r0[0] << 16;
4125 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4126 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4127 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
4128 dst1[1] = src_r0[3] >> 16;
4129 break;
4130
4131 case 7:
4132 dst0[1] = src_l0[1] | src_r0[0] << 24;
4133 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4134 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4135 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
4136 dst1[1] = src_r0[3] >> 8;
4137 break;
4138
4139 case 8:
4140 dst0[2] = src_r0[0];
4141 dst0[3] = src_r0[1];
4142 dst1[0] = src_r0[2];
4143 dst1[1] = src_r0[3];
4144 break;
4145
4146 case 9:
4147 dst0[2] = src_l0[2] | src_r0[0] << 8;
4148 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4149 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
4150 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
4151 dst1[2] = src_r0[3] >> 24;
4152 break;
4153
4154 case 10:
4155 dst0[2] = src_l0[2] | src_r0[0] << 16;
4156 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4157 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
4158 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
4159 dst1[2] = src_r0[3] >> 16;
4160 break;
4161
4162 case 11:
4163 dst0[2] = src_l0[2] | src_r0[0] << 24;
4164 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4165 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
4166 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
4167 dst1[2] = src_r0[3] >> 8;
4168 break;
4169
4170 case 12:
4171 dst0[3] = src_r0[0];
4172 dst1[0] = src_r0[1];
4173 dst1[1] = src_r0[2];
4174 dst1[2] = src_r0[3];
4175 break;
4176
4177 case 13:
4178 dst0[3] = src_l0[3] | src_r0[0] << 8;
4179 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
4180 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
4181 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
4182 dst1[3] = src_r0[3] >> 24;
4183 break;
4184
4185 case 14:
4186 dst0[3] = src_l0[3] | src_r0[0] << 16;
4187 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
4188 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
4189 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
4190 dst1[3] = src_r0[3] >> 16;
4191 break;
4192
4193 case 15:
4194 dst0[3] = src_l0[3] | src_r0[0] << 24;
4195 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
4196 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
4197 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
4198 dst1[3] = src_r0[3] >> 8;
4199 break;
4200
4201 case 16:
4202 dst1[0] = src_r0[0];
4203 dst1[1] = src_r0[1];
4204 dst1[2] = src_r0[2];
4205 dst1[3] = src_r0[3];
4206 break;
4207
4208 case 17:
4209 dst1[0] = src_l1[0] | src_r0[0] << 8;
4210 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4211 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4212 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4213 dst2[0] = src_r0[3] >> 24;
4214 break;
4215
4216 case 18:
4217 dst1[0] = src_l1[0] | src_r0[0] << 16;
4218 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4219 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4220 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4221 dst2[0] = src_r0[3] >> 16;
4222 break;
4223
4224 case 19:
4225 dst1[0] = src_l1[0] | src_r0[0] << 24;
4226 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4227 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4228 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4229 dst2[0] = src_r0[3] >> 8;
4230 break;
4231
4232 case 20:
4233 dst1[1] = src_r0[0];
4234 dst1[2] = src_r0[1];
4235 dst1[3] = src_r0[2];
4236 dst2[0] = src_r0[3];
4237 break;
4238
4239 case 21:
4240 dst1[1] = src_l1[1] | src_r0[0] << 8;
4241 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4242 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4243 dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
4244 dst2[1] = src_r0[3] >> 24;
4245 break;
4246
4247 case 22:
4248 dst1[1] = src_l1[1] | src_r0[0] << 16;
4249 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4250 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4251 dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
4252 dst2[1] = src_r0[3] >> 16;
4253 break;
4254
4255 case 23:
4256 dst1[1] = src_l1[1] | src_r0[0] << 24;
4257 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4258 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4259 dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
4260 dst2[1] = src_r0[3] >> 8;
4261 break;
4262
4263 case 24:
4264 dst1[2] = src_r0[0];
4265 dst1[3] = src_r0[1];
4266 dst2[0] = src_r0[2];
4267 dst2[1] = src_r0[3];
4268 break;
4269
4270 case 25:
4271 dst1[2] = src_l1[2] | src_r0[0] << 8;
4272 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4273 dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
4274 dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
4275 dst2[2] = src_r0[3] >> 24;
4276 break;
4277
4278 case 26:
4279 dst1[2] = src_l1[2] | src_r0[0] << 16;
4280 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4281 dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
4282 dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
4283 dst2[2] = src_r0[3] >> 16;
4284 break;
4285
4286 case 27:
4287 dst1[2] = src_l1[2] | src_r0[0] << 24;
4288 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4289 dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
4290 dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
4291 dst2[2] = src_r0[3] >> 8;
4292 break;
4293
4294 case 28:
4295 dst1[3] = src_r0[0];
4296 dst2[0] = src_r0[1];
4297 dst2[1] = src_r0[2];
4298 dst2[2] = src_r0[3];
4299 break;
4300
4301 case 29:
4302 dst1[3] = src_l1[3] | src_r0[0] << 8;
4303 dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
4304 dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
4305 dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
4306 dst2[3] = src_r0[3] >> 24;
4307 break;
4308
4309 case 30:
4310 dst1[3] = src_l1[3] | src_r0[0] << 16;
4311 dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
4312 dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
4313 dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
4314 dst2[3] = src_r0[3] >> 16;
4315 break;
4316
4317 case 31:
4318 dst1[3] = src_l1[3] | src_r0[0] << 24;
4319 dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
4320 dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
4321 dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
4322 dst2[3] = src_r0[3] >> 8;
4323 break;
4324
4325 case 32:
4326 dst2[0] = src_r0[0];
4327 dst2[1] = src_r0[1];
4328 dst2[2] = src_r0[2];
4329 dst2[3] = src_r0[3];
4330 break;
4331
4332 case 33:
4333 dst2[0] = src_l2[0] | src_r0[0] << 8;
4334 dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4335 dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4336 dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4337 break;
4338
4339 case 34:
4340 dst2[0] = src_l2[0] | src_r0[0] << 16;
4341 dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4342 dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4343 dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4344 break;
4345
4346 case 35:
4347 dst2[0] = src_l2[0] | src_r0[0] << 24;
4348 dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4349 dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4350 dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4351 break;
4352
4353 case 36:
4354 dst2[1] = src_r0[0];
4355 dst2[2] = src_r0[1];
4356 dst2[3] = src_r0[2];
4357 break;
4358
4359 case 37:
4360 dst2[1] = src_l2[1] | src_r0[0] << 8;
4361 dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4362 dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4363 break;
4364
4365 case 38:
4366 dst2[1] = src_l2[1] | src_r0[0] << 16;
4367 dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4368 dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4369 break;
4370
4371 case 39:
4372 dst2[1] = src_l2[1] | src_r0[0] << 24;
4373 dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4374 dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4375 break;
4376
4377 case 40:
4378 dst2[2] = src_r0[0];
4379 dst2[3] = src_r0[1];
4380 break;
4381
4382 case 41:
4383 dst2[2] = src_l2[2] | src_r0[0] << 8;
4384 dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4385 break;
4386
4387 case 42:
4388 dst2[2] = src_l2[2] | src_r0[0] << 16;
4389 dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4390 break;
4391
4392 case 43:
4393 dst2[2] = src_l2[2] | src_r0[0] << 24;
4394 dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4395 break;
4396
4397 case 44:
4398 dst2[3] = src_r0[0];
4399 break;
4400
4401 case 45:
4402 dst2[3] = src_l2[3] | src_r0[0] << 8;
4403 break;
4404
4405 case 46:
4406 dst2[3] = src_l2[3] | src_r0[0] << 16;
4407 break;
4408
4409 case 47:
4410 dst2[3] = src_l2[3] | src_r0[0] << 24;
4411 break;
4412 }
4413 }
4414
4415 // before: device_memcat12L
4416 static void memcat_c47_d3x4_sl3x4_sr2x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4])
4417 {
4418 switch (offset)
4419 {
4420 case 0:
4421 dst0[0] = src_r0[0];
4422 dst0[1] = src_r0[1];
4423 dst0[2] = src_r0[2];
4424 dst0[3] = src_r0[3];
4425 dst1[0] = src_r1[0];
4426 dst1[1] = src_r1[1];
4427 dst1[2] = src_r1[2];
4428 dst1[3] = src_r1[3];
4429 break;
4430
4431 case 1:
4432 dst0[0] = src_l0[0] | src_r0[0] << 8;
4433 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4434 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4435 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4436 dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8;
4437 dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8;
4438 dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8;
4439 dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8;
4440 dst2[0] = src_r1[3] >> 24;
4441 break;
4442
4443 case 2:
4444 dst0[0] = src_l0[0] | src_r0[0] << 16;
4445 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4446 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4447 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4448 dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16;
4449 dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16;
4450 dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16;
4451 dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16;
4452 dst2[0] = src_r1[3] >> 16;
4453 break;
4454
4455 case 3:
4456 dst0[0] = src_l0[0] | src_r0[0] << 24;
4457 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4458 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4459 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4460 dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24;
4461 dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24;
4462 dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24;
4463 dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24;
4464 dst2[0] = src_r1[3] >> 8;
4465 break;
4466
4467 case 4:
4468 dst0[1] = src_r0[0];
4469 dst0[2] = src_r0[1];
4470 dst0[3] = src_r0[2];
4471 dst1[0] = src_r0[3];
4472 dst1[1] = src_r1[0];
4473 dst1[2] = src_r1[1];
4474 dst1[3] = src_r1[2];
4475 dst2[0] = src_r1[3];
4476 break;
4477
4478 case 5:
4479 dst0[1] = src_l0[1] | src_r0[0] << 8;
4480 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4481 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4482 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
4483 dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8;
4484 dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8;
4485 dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8;
4486 dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8;
4487 dst2[1] = src_r1[3] >> 24;
4488 break;
4489
4490 case 6:
4491 dst0[1] = src_l0[1] | src_r0[0] << 16;
4492 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4493 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4494 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
4495 dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16;
4496 dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16;
4497 dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16;
4498 dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16;
4499 dst2[1] = src_r1[3] >> 16;
4500 break;
4501
4502 case 7:
4503 dst0[1] = src_l0[1] | src_r0[0] << 24;
4504 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4505 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4506 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
4507 dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24;
4508 dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24;
4509 dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24;
4510 dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24;
4511 dst2[1] = src_r1[3] >> 8;
4512 break;
4513
4514 case 8:
4515 dst0[2] = src_r0[0];
4516 dst0[3] = src_r0[1];
4517 dst1[0] = src_r0[2];
4518 dst1[1] = src_r0[3];
4519 dst1[2] = src_r1[0];
4520 dst1[3] = src_r1[1];
4521 dst2[0] = src_r1[2];
4522 dst2[1] = src_r1[3];
4523 break;
4524
4525 case 9:
4526 dst0[2] = src_l0[2] | src_r0[0] << 8;
4527 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4528 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
4529 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
4530 dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8;
4531 dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8;
4532 dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8;
4533 dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8;
4534 dst2[2] = src_r1[3] >> 24;
4535 break;
4536
4537 case 10:
4538 dst0[2] = src_l0[2] | src_r0[0] << 16;
4539 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4540 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
4541 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
4542 dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16;
4543 dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16;
4544 dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16;
4545 dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16;
4546 dst2[2] = src_r1[3] >> 16;
4547 break;
4548
4549 case 11:
4550 dst0[2] = src_l0[2] | src_r0[0] << 24;
4551 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4552 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
4553 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
4554 dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24;
4555 dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24;
4556 dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24;
4557 dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24;
4558 dst2[2] = src_r1[3] >> 8;
4559 break;
4560
4561 case 12:
4562 dst0[3] = src_r0[0];
4563 dst1[0] = src_r0[1];
4564 dst1[1] = src_r0[2];
4565 dst1[2] = src_r0[3];
4566 dst1[3] = src_r1[0];
4567 dst2[0] = src_r1[1];
4568 dst2[1] = src_r1[2];
4569 dst2[2] = src_r1[3];
4570 break;
4571
4572 case 13:
4573 dst0[3] = src_l0[3] | src_r0[0] << 8;
4574 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
4575 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
4576 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
4577 dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8;
4578 dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8;
4579 dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8;
4580 dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8;
4581 dst2[3] = src_r1[3] >> 24;
4582 break;
4583
4584 case 14:
4585 dst0[3] = src_l0[3] | src_r0[0] << 16;
4586 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
4587 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
4588 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
4589 dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16;
4590 dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16;
4591 dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16;
4592 dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16;
4593 dst2[3] = src_r1[3] >> 16;
4594 break;
4595
4596 case 15:
4597 dst0[3] = src_l0[3] | src_r0[0] << 24;
4598 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
4599 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
4600 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
4601 dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24;
4602 dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24;
4603 dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24;
4604 dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24;
4605 dst2[3] = src_r1[3] >> 8;
4606 break;
4607
4608 case 16:
4609 dst1[0] = src_r0[0];
4610 dst1[1] = src_r0[1];
4611 dst1[2] = src_r0[2];
4612 dst1[3] = src_r0[3];
4613 dst2[0] = src_r1[0];
4614 dst2[1] = src_r1[1];
4615 dst2[2] = src_r1[2];
4616 dst2[3] = src_r1[3];
4617 break;
4618
4619 case 17:
4620 dst1[0] = src_l1[0] | src_r0[0] << 8;
4621 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4622 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4623 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4624 dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8;
4625 dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8;
4626 dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8;
4627 dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8;
4628 break;
4629
4630 case 18:
4631 dst1[0] = src_l1[0] | src_r0[0] << 16;
4632 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4633 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4634 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4635 dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16;
4636 dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16;
4637 dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16;
4638 dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16;
4639 break;
4640
4641 case 19:
4642 dst1[0] = src_l1[0] | src_r0[0] << 24;
4643 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4644 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4645 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4646 dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24;
4647 dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24;
4648 dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24;
4649 dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24;
4650 break;
4651
4652 case 20:
4653 dst1[1] = src_r1[0];
4654 dst1[2] = src_r0[1];
4655 dst1[3] = src_r0[2];
4656 dst2[0] = src_r0[3];
4657 dst2[1] = src_r1[0];
4658 dst2[2] = src_r1[1];
4659 dst2[3] = src_r1[2];
4660 break;
4661
4662 case 21:
4663 dst1[1] = src_l1[1] | src_r0[0] << 8;
4664 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4665 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4666 dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
4667 dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8;
4668 dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8;
4669 dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8;
4670 break;
4671
4672 case 22:
4673 dst1[1] = src_l1[1] | src_r0[0] << 16;
4674 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4675 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4676 dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
4677 dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16;
4678 dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16;
4679 dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16;
4680 break;
4681
4682 case 23:
4683 dst1[1] = src_l1[1] | src_r0[0] << 24;
4684 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4685 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4686 dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
4687 dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24;
4688 dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24;
4689 dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24;
4690 break;
4691
4692 case 24:
4693 dst1[2] = src_r1[0];
4694 dst1[3] = src_r0[1];
4695 dst2[0] = src_r0[2];
4696 dst2[1] = src_r0[3];
4697 dst2[2] = src_r1[0];
4698 dst2[3] = src_r1[1];
4699 break;
4700
4701 case 25:
4702 dst1[2] = src_l1[2] | src_r0[0] << 8;
4703 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4704 dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
4705 dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
4706 dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8;
4707 dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8;
4708 break;
4709
4710 case 26:
4711 dst1[2] = src_l1[2] | src_r0[0] << 16;
4712 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4713 dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
4714 dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
4715 dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16;
4716 dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16;
4717 break;
4718
4719 case 27:
4720 dst1[2] = src_l1[2] | src_r0[0] << 24;
4721 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4722 dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
4723 dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
4724 dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24;
4725 dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24;
4726 break;
4727
4728 case 28:
4729 dst1[3] = src_r1[0];
4730 dst2[0] = src_r0[1];
4731 dst2[1] = src_r0[2];
4732 dst2[2] = src_r0[3];
4733 dst2[3] = src_r1[0];
4734 break;
4735
4736 case 29:
4737 dst1[3] = src_l1[3] | src_r0[0] << 8;
4738 dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
4739 dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
4740 dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
4741 dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8;
4742 break;
4743
4744 case 30:
4745 dst1[3] = src_l1[3] | src_r0[0] << 16;
4746 dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
4747 dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
4748 dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
4749 dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16;
4750 break;
4751
4752 case 31:
4753 dst1[3] = src_l1[3] | src_r0[0] << 24;
4754 dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
4755 dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
4756 dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
4757 dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24;
4758 break;
4759
4760 case 32:
4761 dst2[0] = src_r0[0];
4762 dst2[1] = src_r0[1];
4763 dst2[2] = src_r0[2];
4764 dst2[3] = src_r0[3];
4765 break;
4766
4767 case 33:
4768 dst2[0] = src_l2[0] | src_r0[0] << 8;
4769 dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
4770 dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
4771 dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
4772 break;
4773
4774 case 34:
4775 dst2[0] = src_l2[0] | src_r0[0] << 16;
4776 dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
4777 dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
4778 dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
4779 break;
4780
4781 case 35:
4782 dst2[0] = src_l2[0] | src_r0[0] << 24;
4783 dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
4784 dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
4785 dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
4786 break;
4787
4788 case 36:
4789 dst2[1] = src_r0[0];
4790 dst2[2] = src_r0[1];
4791 dst2[3] = src_r0[2];
4792 break;
4793
4794 case 37:
4795 dst2[1] = src_l2[1] | src_r0[0] << 8;
4796 dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
4797 dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
4798 break;
4799
4800 case 38:
4801 dst2[1] = src_l2[1] | src_r0[0] << 16;
4802 dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
4803 dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
4804 break;
4805
4806 case 39:
4807 dst2[1] = src_l2[1] | src_r0[0] << 24;
4808 dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
4809 dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
4810 break;
4811
4812 case 40:
4813 dst2[2] = src_r0[0];
4814 dst2[3] = src_r0[1];
4815 break;
4816
4817 case 41:
4818 dst2[2] = src_l2[2] | src_r0[0] << 8;
4819 dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
4820 break;
4821
4822 case 42:
4823 dst2[2] = src_l2[2] | src_r0[0] << 16;
4824 dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
4825 break;
4826
4827 case 43:
4828 dst2[2] = src_l2[2] | src_r0[0] << 24;
4829 dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
4830 break;
4831
4832 case 44:
4833 dst2[3] = src_r0[0];
4834 break;
4835
4836 case 45:
4837 dst2[3] = src_l2[3] | src_r0[0] << 8;
4838 break;
4839
4840 case 46:
4841 dst2[3] = src_l2[3] | src_r0[0] << 16;
4842 break;
4843
4844 case 47:
4845 dst2[3] = src_l2[3] | src_r0[0] << 24;
4846 break;
4847 }
4848 }
4849
4850 // before: memcat16_9
4851 static void memcat_c15_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
4852 {
4853 switch (offset)
4854 {
4855 case 0:
4856 w0[0] = append0[0];
4857 w0[1] = append0[1];
4858 w0[2] = append0[2];
4859 w0[3] = append0[3];
4860 w1[0] = append1[0];
4861 w1[1] = append1[1];
4862 w1[2] = append1[2];
4863 w1[3] = append1[3];
4864 w2[0] = append2[0];
4865 break;
4866
4867 case 1:
4868 w0[0] = w0[0] | append0[0] << 8;
4869 w0[1] = append0[0] >> 24 | append0[1] << 8;
4870 w0[2] = append0[1] >> 24 | append0[2] << 8;
4871 w0[3] = append0[2] >> 24 | append0[3] << 8;
4872 w1[0] = append0[3] >> 24 | append1[0] << 8;
4873 w1[1] = append1[0] >> 24 | append1[1] << 8;
4874 w1[2] = append1[1] >> 24 | append1[2] << 8;
4875 w1[3] = append1[2] >> 24 | append1[3] << 8;
4876 w2[0] = append1[3] >> 24 | append2[0] << 8;
4877 w2[1] = append2[0] >> 24;
4878 break;
4879
4880 case 2:
4881 w0[0] = w0[0] | append0[0] << 16;
4882 w0[1] = append0[0] >> 16 | append0[1] << 16;
4883 w0[2] = append0[1] >> 16 | append0[2] << 16;
4884 w0[3] = append0[2] >> 16 | append0[3] << 16;
4885 w1[0] = append0[3] >> 16 | append1[0] << 16;
4886 w1[1] = append1[0] >> 16 | append1[1] << 16;
4887 w1[2] = append1[1] >> 16 | append1[2] << 16;
4888 w1[3] = append1[2] >> 16 | append1[3] << 16;
4889 w2[0] = append1[3] >> 16 | append2[0] << 16;
4890 w2[1] = append2[0] >> 16;
4891 break;
4892
4893 case 3:
4894 w0[0] = w0[0] | append0[0] << 24;
4895 w0[1] = append0[0] >> 8 | append0[1] << 24;
4896 w0[2] = append0[1] >> 8 | append0[2] << 24;
4897 w0[3] = append0[2] >> 8 | append0[3] << 24;
4898 w1[0] = append0[3] >> 8 | append1[0] << 24;
4899 w1[1] = append1[0] >> 8 | append1[1] << 24;
4900 w1[2] = append1[1] >> 8 | append1[2] << 24;
4901 w1[3] = append1[2] >> 8 | append1[3] << 24;
4902 w2[0] = append1[3] >> 8 | append2[0] << 24;
4903 w2[1] = append2[0] >> 8;
4904 break;
4905
4906 case 4:
4907 w0[1] = append0[0];
4908 w0[2] = append0[1];
4909 w0[3] = append0[2];
4910 w1[0] = append0[3];
4911 w1[1] = append1[0];
4912 w1[2] = append1[1];
4913 w1[3] = append1[2];
4914 w2[0] = append1[3];
4915 w2[1] = append2[0];
4916 break;
4917
4918 case 5:
4919 w0[1] = w0[1] | append0[0] << 8;
4920 w0[2] = append0[0] >> 24 | append0[1] << 8;
4921 w0[3] = append0[1] >> 24 | append0[2] << 8;
4922 w1[0] = append0[2] >> 24 | append0[3] << 8;
4923 w1[1] = append0[3] >> 24 | append1[0] << 8;
4924 w1[2] = append1[0] >> 24 | append1[1] << 8;
4925 w1[3] = append1[1] >> 24 | append1[2] << 8;
4926 w2[0] = append1[2] >> 24 | append1[3] << 8;
4927 w2[1] = append1[3] >> 24 | append2[0] << 8;
4928 w2[2] = append2[0] >> 24;
4929 break;
4930
4931 case 6:
4932 w0[1] = w0[1] | append0[0] << 16;
4933 w0[2] = append0[0] >> 16 | append0[1] << 16;
4934 w0[3] = append0[1] >> 16 | append0[2] << 16;
4935 w1[0] = append0[2] >> 16 | append0[3] << 16;
4936 w1[1] = append0[3] >> 16 | append1[0] << 16;
4937 w1[2] = append1[0] >> 16 | append1[1] << 16;
4938 w1[3] = append1[1] >> 16 | append1[2] << 16;
4939 w2[0] = append1[2] >> 16 | append1[3] << 16;
4940 w2[1] = append1[3] >> 16 | append2[0] << 16;
4941 w2[2] = append2[0] >> 16;
4942 break;
4943
4944 case 7:
4945 w0[1] = w0[1] | append0[0] << 24;
4946 w0[2] = append0[0] >> 8 | append0[1] << 24;
4947 w0[3] = append0[1] >> 8 | append0[2] << 24;
4948 w1[0] = append0[2] >> 8 | append0[3] << 24;
4949 w1[1] = append0[3] >> 8 | append1[0] << 24;
4950 w1[2] = append1[0] >> 8 | append1[1] << 24;
4951 w1[3] = append1[1] >> 8 | append1[2] << 24;
4952 w2[0] = append1[2] >> 8 | append1[3] << 24;
4953 w2[1] = append1[3] >> 8 | append2[0] << 24;
4954 w2[2] = append2[0] >> 8;
4955 break;
4956
4957 case 8:
4958 w0[2] = append0[0];
4959 w0[3] = append0[1];
4960 w1[0] = append0[2];
4961 w1[1] = append0[3];
4962 w1[2] = append1[0];
4963 w1[3] = append1[1];
4964 w2[0] = append1[2];
4965 w2[1] = append1[3];
4966 w2[2] = append2[0];
4967 break;
4968
4969 case 9:
4970 w0[2] = w0[2] | append0[0] << 8;
4971 w0[3] = append0[0] >> 24 | append0[1] << 8;
4972 w1[0] = append0[1] >> 24 | append0[2] << 8;
4973 w1[1] = append0[2] >> 24 | append0[3] << 8;
4974 w1[2] = append0[3] >> 24 | append1[0] << 8;
4975 w1[3] = append1[0] >> 24 | append1[1] << 8;
4976 w2[0] = append1[1] >> 24 | append1[2] << 8;
4977 w2[1] = append1[2] >> 24 | append1[3] << 8;
4978 w2[2] = append1[3] >> 24 | append2[0] << 8;
4979 w2[3] = append2[0] >> 24;
4980 break;
4981
4982 case 10:
4983 w0[2] = w0[2] | append0[0] << 16;
4984 w0[3] = append0[0] >> 16 | append0[1] << 16;
4985 w1[0] = append0[1] >> 16 | append0[2] << 16;
4986 w1[1] = append0[2] >> 16 | append0[3] << 16;
4987 w1[2] = append0[3] >> 16 | append1[0] << 16;
4988 w1[3] = append1[0] >> 16 | append1[1] << 16;
4989 w2[0] = append1[1] >> 16 | append1[2] << 16;
4990 w2[1] = append1[2] >> 16 | append1[3] << 16;
4991 w2[2] = append1[3] >> 16 | append2[0] << 16;
4992 w2[3] = append2[0] >> 16;
4993 break;
4994
4995 case 11:
4996 w0[2] = w0[2] | append0[0] << 24;
4997 w0[3] = append0[0] >> 8 | append0[1] << 24;
4998 w1[0] = append0[1] >> 8 | append0[2] << 24;
4999 w1[1] = append0[2] >> 8 | append0[3] << 24;
5000 w1[2] = append0[3] >> 8 | append1[0] << 24;
5001 w1[3] = append1[0] >> 8 | append1[1] << 24;
5002 w2[0] = append1[1] >> 8 | append1[2] << 24;
5003 w2[1] = append1[2] >> 8 | append1[3] << 24;
5004 w2[2] = append1[3] >> 8 | append2[0] << 24;
5005 w2[3] = append2[0] >> 8;
5006 break;
5007
5008 case 12:
5009 w0[3] = append0[0];
5010 w1[0] = append0[1];
5011 w1[1] = append0[2];
5012 w1[2] = append0[3];
5013 w1[3] = append1[0];
5014 w2[0] = append1[1];
5015 w2[1] = append1[2];
5016 w2[2] = append1[3];
5017 w2[3] = append2[0];
5018 break;
5019
5020 case 13:
5021 w0[3] = w0[3] | append0[0] << 8;
5022 w1[0] = append0[0] >> 24 | append0[1] << 8;
5023 w1[1] = append0[1] >> 24 | append0[2] << 8;
5024 w1[2] = append0[2] >> 24 | append0[3] << 8;
5025 w1[3] = append0[3] >> 24 | append1[0] << 8;
5026 w2[0] = append1[0] >> 24 | append1[1] << 8;
5027 w2[1] = append1[1] >> 24 | append1[2] << 8;
5028 w2[2] = append1[2] >> 24 | append1[3] << 8;
5029 w2[3] = append1[3] >> 24 | append2[0] << 8;
5030 w3[0] = append2[0] >> 24;
5031 break;
5032
5033 case 14:
5034 w0[3] = w0[3] | append0[0] << 16;
5035 w1[0] = append0[0] >> 16 | append0[1] << 16;
5036 w1[1] = append0[1] >> 16 | append0[2] << 16;
5037 w1[2] = append0[2] >> 16 | append0[3] << 16;
5038 w1[3] = append0[3] >> 16 | append1[0] << 16;
5039 w2[0] = append1[0] >> 16 | append1[1] << 16;
5040 w2[1] = append1[1] >> 16 | append1[2] << 16;
5041 w2[2] = append1[2] >> 16 | append1[3] << 16;
5042 w2[3] = append1[3] >> 16 | append2[0] << 16;
5043 w3[0] = append2[0] >> 16;
5044 break;
5045
5046 case 15:
5047 w0[3] = w0[3] | append0[0] << 24;
5048 w1[0] = append0[0] >> 8 | append0[1] << 24;
5049 w1[1] = append0[1] >> 8 | append0[2] << 24;
5050 w1[2] = append0[2] >> 8 | append0[3] << 24;
5051 w1[3] = append0[3] >> 8 | append1[0] << 24;
5052 w2[0] = append1[0] >> 8 | append1[1] << 24;
5053 w2[1] = append1[1] >> 8 | append1[2] << 24;
5054 w2[2] = append1[2] >> 8 | append1[3] << 24;
5055 w2[3] = append1[3] >> 8 | append2[0] << 24;
5056 w3[0] = append2[0] >> 8;
5057 break;
5058 }
5059 }
5060
5061 // before: memcat32_8
5062 static void memcat_c32_w4x4_a2x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset)
5063 {
5064 switch (offset)
5065 {
5066 case 0:
5067 w0[0] = append0[0];
5068 w0[1] = append0[1];
5069 w0[2] = append0[2];
5070 w0[3] = append0[3];
5071 w1[0] = append1[0];
5072 w1[1] = append1[1];
5073 w1[2] = append1[2];
5074 w1[3] = append1[3];
5075 break;
5076
5077 case 1:
5078 w0[0] = w0[0] | append0[0] << 8;
5079 w0[1] = append0[0] >> 24 | append0[1] << 8;
5080 w0[2] = append0[1] >> 24 | append0[2] << 8;
5081 w0[3] = append0[2] >> 24 | append0[3] << 8;
5082 w1[0] = append0[3] >> 24 | append1[0] << 8;
5083 w1[1] = append1[0] >> 24 | append1[1] << 8;
5084 w1[2] = append1[1] >> 24 | append1[2] << 8;
5085 w1[3] = append1[2] >> 24 | append1[3] << 8;
5086 w2[0] = append1[3] >> 24;
5087 break;
5088
5089 case 2:
5090 w0[0] = w0[0] | append0[0] << 16;
5091 w0[1] = append0[0] >> 16 | append0[1] << 16;
5092 w0[2] = append0[1] >> 16 | append0[2] << 16;
5093 w0[3] = append0[2] >> 16 | append0[3] << 16;
5094 w1[0] = append0[3] >> 16 | append1[0] << 16;
5095 w1[1] = append1[0] >> 16 | append1[1] << 16;
5096 w1[2] = append1[1] >> 16 | append1[2] << 16;
5097 w1[3] = append1[2] >> 16 | append1[3] << 16;
5098 w2[0] = append1[3] >> 16;
5099 break;
5100
5101 case 3:
5102 w0[0] = w0[0] | append0[0] << 24;
5103 w0[1] = append0[0] >> 8 | append0[1] << 24;
5104 w0[2] = append0[1] >> 8 | append0[2] << 24;
5105 w0[3] = append0[2] >> 8 | append0[3] << 24;
5106 w1[0] = append0[3] >> 8 | append1[0] << 24;
5107 w1[1] = append1[0] >> 8 | append1[1] << 24;
5108 w1[2] = append1[1] >> 8 | append1[2] << 24;
5109 w1[3] = append1[2] >> 8 | append1[3] << 24;
5110 w2[0] = append1[3] >> 8;
5111 break;
5112
5113 case 4:
5114 w0[1] = append0[0];
5115 w0[2] = append0[1];
5116 w0[3] = append0[2];
5117 w1[0] = append0[3];
5118 w1[1] = append1[0];
5119 w1[2] = append1[1];
5120 w1[3] = append1[2];
5121 w2[0] = append1[3];
5122 break;
5123
5124 case 5:
5125 w0[1] = w0[1] | append0[0] << 8;
5126 w0[2] = append0[0] >> 24 | append0[1] << 8;
5127 w0[3] = append0[1] >> 24 | append0[2] << 8;
5128 w1[0] = append0[2] >> 24 | append0[3] << 8;
5129 w1[1] = append0[3] >> 24 | append1[0] << 8;
5130 w1[2] = append1[0] >> 24 | append1[1] << 8;
5131 w1[3] = append1[1] >> 24 | append1[2] << 8;
5132 w2[0] = append1[2] >> 24 | append1[3] << 8;
5133 w2[1] = append1[3] >> 24;
5134 break;
5135
5136 case 6:
5137 w0[1] = w0[1] | append0[0] << 16;
5138 w0[2] = append0[0] >> 16 | append0[1] << 16;
5139 w0[3] = append0[1] >> 16 | append0[2] << 16;
5140 w1[0] = append0[2] >> 16 | append0[3] << 16;
5141 w1[1] = append0[3] >> 16 | append1[0] << 16;
5142 w1[2] = append1[0] >> 16 | append1[1] << 16;
5143 w1[3] = append1[1] >> 16 | append1[2] << 16;
5144 w2[0] = append1[2] >> 16 | append1[3] << 16;
5145 w2[1] = append1[3] >> 16;
5146 break;
5147
5148 case 7:
5149 w0[1] = w0[1] | append0[0] << 24;
5150 w0[2] = append0[0] >> 8 | append0[1] << 24;
5151 w0[3] = append0[1] >> 8 | append0[2] << 24;
5152 w1[0] = append0[2] >> 8 | append0[3] << 24;
5153 w1[1] = append0[3] >> 8 | append1[0] << 24;
5154 w1[2] = append1[0] >> 8 | append1[1] << 24;
5155 w1[3] = append1[1] >> 8 | append1[2] << 24;
5156 w2[0] = append1[2] >> 8 | append1[3] << 24;
5157 w2[1] = append1[3] >> 8;
5158 break;
5159
5160 case 8:
5161 w0[2] = append0[0];
5162 w0[3] = append0[1];
5163 w1[0] = append0[2];
5164 w1[1] = append0[3];
5165 w1[2] = append1[0];
5166 w1[3] = append1[1];
5167 w2[0] = append1[2];
5168 w2[1] = append1[3];
5169 break;
5170
5171 case 9:
5172 w0[2] = w0[2] | append0[0] << 8;
5173 w0[3] = append0[0] >> 24 | append0[1] << 8;
5174 w1[0] = append0[1] >> 24 | append0[2] << 8;
5175 w1[1] = append0[2] >> 24 | append0[3] << 8;
5176 w1[2] = append0[3] >> 24 | append1[0] << 8;
5177 w1[3] = append1[0] >> 24 | append1[1] << 8;
5178 w2[0] = append1[1] >> 24 | append1[2] << 8;
5179 w2[1] = append1[2] >> 24 | append1[3] << 8;
5180 w2[2] = append1[3] >> 24;
5181 break;
5182
5183 case 10:
5184 w0[2] = w0[2] | append0[0] << 16;
5185 w0[3] = append0[0] >> 16 | append0[1] << 16;
5186 w1[0] = append0[1] >> 16 | append0[2] << 16;
5187 w1[1] = append0[2] >> 16 | append0[3] << 16;
5188 w1[2] = append0[3] >> 16 | append1[0] << 16;
5189 w1[3] = append1[0] >> 16 | append1[1] << 16;
5190 w2[0] = append1[1] >> 16 | append1[2] << 16;
5191 w2[1] = append1[2] >> 16 | append1[3] << 16;
5192 w2[2] = append1[3] >> 16;
5193 break;
5194
5195 case 11:
5196 w0[2] = w0[2] | append0[0] << 24;
5197 w0[3] = append0[0] >> 8 | append0[1] << 24;
5198 w1[0] = append0[1] >> 8 | append0[2] << 24;
5199 w1[1] = append0[2] >> 8 | append0[3] << 24;
5200 w1[2] = append0[3] >> 8 | append1[0] << 24;
5201 w1[3] = append1[0] >> 8 | append1[1] << 24;
5202 w2[0] = append1[1] >> 8 | append1[2] << 24;
5203 w2[1] = append1[2] >> 8 | append1[3] << 24;
5204 w2[2] = append1[3] >> 8;
5205 break;
5206
5207 case 12:
5208 w0[3] = append0[0];
5209 w1[0] = append0[1];
5210 w1[1] = append0[2];
5211 w1[2] = append0[3];
5212 w1[3] = append1[0];
5213 w2[0] = append1[1];
5214 w2[1] = append1[2];
5215 w2[2] = append1[3];
5216 break;
5217
5218 case 13:
5219 w0[3] = w0[3] | append0[0] << 8;
5220 w1[0] = append0[0] >> 24 | append0[1] << 8;
5221 w1[1] = append0[1] >> 24 | append0[2] << 8;
5222 w1[2] = append0[2] >> 24 | append0[3] << 8;
5223 w1[3] = append0[3] >> 24 | append1[0] << 8;
5224 w2[0] = append1[0] >> 24 | append1[1] << 8;
5225 w2[1] = append1[1] >> 24 | append1[2] << 8;
5226 w2[2] = append1[2] >> 24 | append1[3] << 8;
5227 w2[3] = append1[3] >> 24;
5228 break;
5229
5230 case 14:
5231 w0[3] = w0[3] | append0[0] << 16;
5232 w1[0] = append0[0] >> 16 | append0[1] << 16;
5233 w1[1] = append0[1] >> 16 | append0[2] << 16;
5234 w1[2] = append0[2] >> 16 | append0[3] << 16;
5235 w1[3] = append0[3] >> 16 | append1[0] << 16;
5236 w2[0] = append1[0] >> 16 | append1[1] << 16;
5237 w2[1] = append1[1] >> 16 | append1[2] << 16;
5238 w2[2] = append1[2] >> 16 | append1[3] << 16;
5239 w2[3] = append1[3] >> 16;
5240 break;
5241
5242 case 15:
5243 w0[3] = w0[3] | append0[0] << 24;
5244 w1[0] = append0[0] >> 8 | append0[1] << 24;
5245 w1[1] = append0[1] >> 8 | append0[2] << 24;
5246 w1[2] = append0[2] >> 8 | append0[3] << 24;
5247 w1[3] = append0[3] >> 8 | append1[0] << 24;
5248 w2[0] = append1[0] >> 8 | append1[1] << 24;
5249 w2[1] = append1[1] >> 8 | append1[2] << 24;
5250 w2[2] = append1[2] >> 8 | append1[3] << 24;
5251 w2[3] = append1[3] >> 8;
5252 break;
5253
5254 case 16:
5255 w1[0] = append0[0];
5256 w1[1] = append0[1];
5257 w1[2] = append0[2];
5258 w1[3] = append0[3];
5259 w2[0] = append1[0];
5260 w2[1] = append1[1];
5261 w2[2] = append1[2];
5262 w2[3] = append1[3];
5263 break;
5264
5265 case 17:
5266 w1[0] = w1[0] | append0[0] << 8;
5267 w1[1] = append0[0] >> 24 | append0[1] << 8;
5268 w1[2] = append0[1] >> 24 | append0[2] << 8;
5269 w1[3] = append0[2] >> 24 | append0[3] << 8;
5270 w2[0] = append0[3] >> 24 | append1[0] << 8;
5271 w2[1] = append1[0] >> 24 | append1[1] << 8;
5272 w2[2] = append1[1] >> 24 | append1[2] << 8;
5273 w2[3] = append1[2] >> 24 | append1[3] << 8;
5274 w3[0] = append1[3] >> 24;
5275 break;
5276
5277 case 18:
5278 w1[0] = w1[0] | append0[0] << 16;
5279 w1[1] = append0[0] >> 16 | append0[1] << 16;
5280 w1[2] = append0[1] >> 16 | append0[2] << 16;
5281 w1[3] = append0[2] >> 16 | append0[3] << 16;
5282 w2[0] = append0[3] >> 16 | append1[0] << 16;
5283 w2[1] = append1[0] >> 16 | append1[1] << 16;
5284 w2[2] = append1[1] >> 16 | append1[2] << 16;
5285 w2[3] = append1[2] >> 16 | append1[3] << 16;
5286 w3[0] = append1[3] >> 16;
5287 break;
5288
5289 case 19:
5290 w1[0] = w1[0] | append0[0] << 24;
5291 w1[1] = append0[0] >> 8 | append0[1] << 24;
5292 w1[2] = append0[1] >> 8 | append0[2] << 24;
5293 w1[3] = append0[2] >> 8 | append0[3] << 24;
5294 w2[0] = append0[3] >> 8 | append1[0] << 24;
5295 w2[1] = append1[0] >> 8 | append1[1] << 24;
5296 w2[2] = append1[1] >> 8 | append1[2] << 24;
5297 w2[3] = append1[2] >> 8 | append1[3] << 24;
5298 w3[0] = append1[3] >> 8;
5299 break;
5300
5301 case 20:
5302 w1[1] = append0[0];
5303 w1[2] = append0[1];
5304 w1[3] = append0[2];
5305 w2[0] = append0[3];
5306 w2[1] = append1[0];
5307 w2[2] = append1[1];
5308 w2[3] = append1[2];
5309 w3[0] = append1[3];
5310 break;
5311
5312 case 21:
5313 w1[1] = w1[1] | append0[0] << 8;
5314 w1[2] = append0[0] >> 24 | append0[1] << 8;
5315 w1[3] = append0[1] >> 24 | append0[2] << 8;
5316 w2[0] = append0[2] >> 24 | append0[3] << 8;
5317 w2[1] = append0[3] >> 24 | append1[0] << 8;
5318 w2[2] = append1[0] >> 24 | append1[1] << 8;
5319 w2[3] = append1[1] >> 24 | append1[2] << 8;
5320 w3[0] = append1[2] >> 24 | append1[3] << 8;
5321 w3[1] = append1[3] >> 24;
5322 break;
5323
5324 case 22:
5325 w1[1] = w1[1] | append0[0] << 16;
5326 w1[2] = append0[0] >> 16 | append0[1] << 16;
5327 w1[3] = append0[1] >> 16 | append0[2] << 16;
5328 w2[0] = append0[2] >> 16 | append0[3] << 16;
5329 w2[1] = append0[3] >> 16 | append1[0] << 16;
5330 w2[2] = append1[0] >> 16 | append1[1] << 16;
5331 w2[3] = append1[1] >> 16 | append1[2] << 16;
5332 w3[0] = append1[2] >> 16 | append1[3] << 16;
5333 w3[1] = append1[3] >> 16;
5334 break;
5335
5336 case 23:
5337 w1[1] = w1[1] | append0[0] << 24;
5338 w1[2] = append0[0] >> 8 | append0[1] << 24;
5339 w1[3] = append0[1] >> 8 | append0[2] << 24;
5340 w2[0] = append0[2] >> 8 | append0[3] << 24;
5341 w2[1] = append0[3] >> 8 | append1[0] << 24;
5342 w2[2] = append1[0] >> 8 | append1[1] << 24;
5343 w2[3] = append1[1] >> 8 | append1[2] << 24;
5344 w3[0] = append1[2] >> 8 | append1[3] << 24;
5345 w3[1] = append1[3] >> 8;
5346 break;
5347
5348 case 24:
5349 w1[2] = append0[0];
5350 w1[3] = append0[1];
5351 w2[0] = append0[2];
5352 w2[1] = append0[3];
5353 w2[2] = append1[0];
5354 w2[3] = append1[1];
5355 w3[0] = append1[2];
5356 w3[1] = append1[3];
5357 break;
5358
5359 case 25:
5360 w1[2] = w1[2] | append0[0] << 8;
5361 w1[3] = append0[0] >> 24 | append0[1] << 8;
5362 w2[0] = append0[1] >> 24 | append0[2] << 8;
5363 w2[1] = append0[2] >> 24 | append0[3] << 8;
5364 w2[2] = append0[3] >> 24 | append1[0] << 8;
5365 w2[3] = append1[0] >> 24 | append1[1] << 8;
5366 w3[0] = append1[1] >> 24 | append1[2] << 8;
5367 w3[1] = append1[2] >> 24 | append1[3] << 8;
5368 break;
5369
5370 case 26:
5371 w1[2] = w1[2] | append0[0] << 16;
5372 w1[3] = append0[0] >> 16 | append0[1] << 16;
5373 w2[0] = append0[1] >> 16 | append0[2] << 16;
5374 w2[1] = append0[2] >> 16 | append0[3] << 16;
5375 w2[2] = append0[3] >> 16 | append1[0] << 16;
5376 w2[3] = append1[0] >> 16 | append1[1] << 16;
5377 w3[0] = append1[1] >> 16 | append1[2] << 16;
5378 w3[1] = append1[2] >> 16 | append1[3] << 16;
5379 break;
5380
5381 case 27:
5382 w1[2] = w1[2] | append0[0] << 24;
5383 w1[3] = append0[0] >> 8 | append0[1] << 24;
5384 w2[0] = append0[1] >> 8 | append0[2] << 24;
5385 w2[1] = append0[2] >> 8 | append0[3] << 24;
5386 w2[2] = append0[3] >> 8 | append1[0] << 24;
5387 w2[3] = append1[0] >> 8 | append1[1] << 24;
5388 w3[0] = append1[1] >> 8 | append1[2] << 24;
5389 w3[1] = append1[2] >> 8 | append1[3] << 24;
5390 break;
5391
5392 case 28:
5393 w1[3] = append0[0];
5394 w2[0] = append0[1];
5395 w2[1] = append0[2];
5396 w2[2] = append0[3];
5397 w2[3] = append1[0];
5398 w3[0] = append1[1];
5399 w3[1] = append1[2];
5400 break;
5401
5402 case 29:
5403 w1[3] = w1[3] | append0[0] << 8;
5404 w2[0] = append0[0] >> 24 | append0[1] << 8;
5405 w2[1] = append0[1] >> 24 | append0[2] << 8;
5406 w2[2] = append0[2] >> 24 | append0[3] << 8;
5407 w2[3] = append0[3] >> 24 | append1[0] << 8;
5408 w3[0] = append1[0] >> 24 | append1[1] << 8;
5409 w3[1] = append1[1] >> 24 | append1[2] << 8;
5410 break;
5411
5412 case 30:
5413 w1[3] = w1[3] | append0[0] << 16;
5414 w2[0] = append0[0] >> 16 | append0[1] << 16;
5415 w2[1] = append0[1] >> 16 | append0[2] << 16;
5416 w2[2] = append0[2] >> 16 | append0[3] << 16;
5417 w2[3] = append0[3] >> 16 | append1[0] << 16;
5418 w3[0] = append1[0] >> 16 | append1[1] << 16;
5419 w3[1] = append1[1] >> 16 | append1[2] << 16;
5420 break;
5421
5422 case 31:
5423 w1[3] = w1[3] | append0[0] << 24;
5424 w2[0] = append0[0] >> 8 | append0[1] << 24;
5425 w2[1] = append0[1] >> 8 | append0[2] << 24;
5426 w2[2] = append0[2] >> 8 | append0[3] << 24;
5427 w2[3] = append0[3] >> 8 | append1[0] << 24;
5428 w3[0] = append1[0] >> 8 | append1[1] << 24;
5429 w3[1] = append1[1] >> 8 | append1[2] << 24;
5430 break;
5431
5432 case 32:
5433 w2[0] = append0[0];
5434 w2[1] = append0[1];
5435 w2[2] = append0[2];
5436 w2[3] = append0[3];
5437 w3[0] = append1[0];
5438 w3[1] = append1[1];
5439 break;
5440 }
5441 }
5442
5443 // before: memcat32_9
5444 static void memcat_c32_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
5445 {
5446 switch (offset)
5447 {
5448 case 0:
5449 w0[0] = append0[0];
5450 w0[1] = append0[1];
5451 w0[2] = append0[2];
5452 w0[3] = append0[3];
5453 w1[0] = append1[0];
5454 w1[1] = append1[1];
5455 w1[2] = append1[2];
5456 w1[3] = append1[3];
5457 w2[0] = append2[0];
5458 break;
5459
5460 case 1:
5461 w0[0] = w0[0] | append0[0] << 8;
5462 w0[1] = append0[0] >> 24 | append0[1] << 8;
5463 w0[2] = append0[1] >> 24 | append0[2] << 8;
5464 w0[3] = append0[2] >> 24 | append0[3] << 8;
5465 w1[0] = append0[3] >> 24 | append1[0] << 8;
5466 w1[1] = append1[0] >> 24 | append1[1] << 8;
5467 w1[2] = append1[1] >> 24 | append1[2] << 8;
5468 w1[3] = append1[2] >> 24 | append1[3] << 8;
5469 w2[0] = append1[3] >> 24 | append2[0] << 8;
5470 w2[1] = append2[0] >> 24;
5471 break;
5472
5473 case 2:
5474 w0[0] = w0[0] | append0[0] << 16;
5475 w0[1] = append0[0] >> 16 | append0[1] << 16;
5476 w0[2] = append0[1] >> 16 | append0[2] << 16;
5477 w0[3] = append0[2] >> 16 | append0[3] << 16;
5478 w1[0] = append0[3] >> 16 | append1[0] << 16;
5479 w1[1] = append1[0] >> 16 | append1[1] << 16;
5480 w1[2] = append1[1] >> 16 | append1[2] << 16;
5481 w1[3] = append1[2] >> 16 | append1[3] << 16;
5482 w2[0] = append1[3] >> 16 | append2[0] << 16;
5483 w2[1] = append2[0] >> 16;
5484 break;
5485
5486 case 3:
5487 w0[0] = w0[0] | append0[0] << 24;
5488 w0[1] = append0[0] >> 8 | append0[1] << 24;
5489 w0[2] = append0[1] >> 8 | append0[2] << 24;
5490 w0[3] = append0[2] >> 8 | append0[3] << 24;
5491 w1[0] = append0[3] >> 8 | append1[0] << 24;
5492 w1[1] = append1[0] >> 8 | append1[1] << 24;
5493 w1[2] = append1[1] >> 8 | append1[2] << 24;
5494 w1[3] = append1[2] >> 8 | append1[3] << 24;
5495 w2[0] = append1[3] >> 8 | append2[0] << 24;
5496 w2[1] = append2[0] >> 8;
5497 break;
5498
5499 case 4:
5500 w0[1] = append0[0];
5501 w0[2] = append0[1];
5502 w0[3] = append0[2];
5503 w1[0] = append0[3];
5504 w1[1] = append1[0];
5505 w1[2] = append1[1];
5506 w1[3] = append1[2];
5507 w2[0] = append1[3];
5508 w2[1] = append2[0];
5509 break;
5510
5511 case 5:
5512 w0[1] = w0[1] | append0[0] << 8;
5513 w0[2] = append0[0] >> 24 | append0[1] << 8;
5514 w0[3] = append0[1] >> 24 | append0[2] << 8;
5515 w1[0] = append0[2] >> 24 | append0[3] << 8;
5516 w1[1] = append0[3] >> 24 | append1[0] << 8;
5517 w1[2] = append1[0] >> 24 | append1[1] << 8;
5518 w1[3] = append1[1] >> 24 | append1[2] << 8;
5519 w2[0] = append1[2] >> 24 | append1[3] << 8;
5520 w2[1] = append1[3] >> 24 | append2[0] << 8;
5521 w2[2] = append2[0] >> 24;
5522 break;
5523
5524 case 6:
5525 w0[1] = w0[1] | append0[0] << 16;
5526 w0[2] = append0[0] >> 16 | append0[1] << 16;
5527 w0[3] = append0[1] >> 16 | append0[2] << 16;
5528 w1[0] = append0[2] >> 16 | append0[3] << 16;
5529 w1[1] = append0[3] >> 16 | append1[0] << 16;
5530 w1[2] = append1[0] >> 16 | append1[1] << 16;
5531 w1[3] = append1[1] >> 16 | append1[2] << 16;
5532 w2[0] = append1[2] >> 16 | append1[3] << 16;
5533 w2[1] = append1[3] >> 16 | append2[0] << 16;
5534 w2[2] = append2[0] >> 16;
5535 break;
5536
5537 case 7:
5538 w0[1] = w0[1] | append0[0] << 24;
5539 w0[2] = append0[0] >> 8 | append0[1] << 24;
5540 w0[3] = append0[1] >> 8 | append0[2] << 24;
5541 w1[0] = append0[2] >> 8 | append0[3] << 24;
5542 w1[1] = append0[3] >> 8 | append1[0] << 24;
5543 w1[2] = append1[0] >> 8 | append1[1] << 24;
5544 w1[3] = append1[1] >> 8 | append1[2] << 24;
5545 w2[0] = append1[2] >> 8 | append1[3] << 24;
5546 w2[1] = append1[3] >> 8 | append2[0] << 24;
5547 w2[2] = append2[0] >> 8;
5548 break;
5549
5550 case 8:
5551 w0[2] = append0[0];
5552 w0[3] = append0[1];
5553 w1[0] = append0[2];
5554 w1[1] = append0[3];
5555 w1[2] = append1[0];
5556 w1[3] = append1[1];
5557 w2[0] = append1[2];
5558 w2[1] = append1[3];
5559 w2[2] = append2[0];
5560 break;
5561
5562 case 9:
5563 w0[2] = w0[2] | append0[0] << 8;
5564 w0[3] = append0[0] >> 24 | append0[1] << 8;
5565 w1[0] = append0[1] >> 24 | append0[2] << 8;
5566 w1[1] = append0[2] >> 24 | append0[3] << 8;
5567 w1[2] = append0[3] >> 24 | append1[0] << 8;
5568 w1[3] = append1[0] >> 24 | append1[1] << 8;
5569 w2[0] = append1[1] >> 24 | append1[2] << 8;
5570 w2[1] = append1[2] >> 24 | append1[3] << 8;
5571 w2[2] = append1[3] >> 24 | append2[0] << 8;
5572 w2[3] = append2[0] >> 24;
5573 break;
5574
5575 case 10:
5576 w0[2] = w0[2] | append0[0] << 16;
5577 w0[3] = append0[0] >> 16 | append0[1] << 16;
5578 w1[0] = append0[1] >> 16 | append0[2] << 16;
5579 w1[1] = append0[2] >> 16 | append0[3] << 16;
5580 w1[2] = append0[3] >> 16 | append1[0] << 16;
5581 w1[3] = append1[0] >> 16 | append1[1] << 16;
5582 w2[0] = append1[1] >> 16 | append1[2] << 16;
5583 w2[1] = append1[2] >> 16 | append1[3] << 16;
5584 w2[2] = append1[3] >> 16 | append2[0] << 16;
5585 w2[3] = append2[0] >> 16;
5586 break;
5587
5588 case 11:
5589 w0[2] = w0[2] | append0[0] << 24;
5590 w0[3] = append0[0] >> 8 | append0[1] << 24;
5591 w1[0] = append0[1] >> 8 | append0[2] << 24;
5592 w1[1] = append0[2] >> 8 | append0[3] << 24;
5593 w1[2] = append0[3] >> 8 | append1[0] << 24;
5594 w1[3] = append1[0] >> 8 | append1[1] << 24;
5595 w2[0] = append1[1] >> 8 | append1[2] << 24;
5596 w2[1] = append1[2] >> 8 | append1[3] << 24;
5597 w2[2] = append1[3] >> 8 | append2[0] << 24;
5598 w2[3] = append2[0] >> 8;
5599 break;
5600
5601 case 12:
5602 w0[3] = append0[0];
5603 w1[0] = append0[1];
5604 w1[1] = append0[2];
5605 w1[2] = append0[3];
5606 w1[3] = append1[0];
5607 w2[0] = append1[1];
5608 w2[1] = append1[2];
5609 w2[2] = append1[3];
5610 w2[3] = append2[0];
5611 break;
5612
5613 case 13:
5614 w0[3] = w0[3] | append0[0] << 8;
5615 w1[0] = append0[0] >> 24 | append0[1] << 8;
5616 w1[1] = append0[1] >> 24 | append0[2] << 8;
5617 w1[2] = append0[2] >> 24 | append0[3] << 8;
5618 w1[3] = append0[3] >> 24 | append1[0] << 8;
5619 w2[0] = append1[0] >> 24 | append1[1] << 8;
5620 w2[1] = append1[1] >> 24 | append1[2] << 8;
5621 w2[2] = append1[2] >> 24 | append1[3] << 8;
5622 w2[3] = append1[3] >> 24 | append2[0] << 8;
5623 w3[0] = append2[0] >> 24;
5624 break;
5625
5626 case 14:
5627 w0[3] = w0[3] | append0[0] << 16;
5628 w1[0] = append0[0] >> 16 | append0[1] << 16;
5629 w1[1] = append0[1] >> 16 | append0[2] << 16;
5630 w1[2] = append0[2] >> 16 | append0[3] << 16;
5631 w1[3] = append0[3] >> 16 | append1[0] << 16;
5632 w2[0] = append1[0] >> 16 | append1[1] << 16;
5633 w2[1] = append1[1] >> 16 | append1[2] << 16;
5634 w2[2] = append1[2] >> 16 | append1[3] << 16;
5635 w2[3] = append1[3] >> 16 | append2[0] << 16;
5636 w3[0] = append2[0] >> 16;
5637 break;
5638
5639 case 15:
5640 w0[3] = w0[3] | append0[0] << 24;
5641 w1[0] = append0[0] >> 8 | append0[1] << 24;
5642 w1[1] = append0[1] >> 8 | append0[2] << 24;
5643 w1[2] = append0[2] >> 8 | append0[3] << 24;
5644 w1[3] = append0[3] >> 8 | append1[0] << 24;
5645 w2[0] = append1[0] >> 8 | append1[1] << 24;
5646 w2[1] = append1[1] >> 8 | append1[2] << 24;
5647 w2[2] = append1[2] >> 8 | append1[3] << 24;
5648 w2[3] = append1[3] >> 8 | append2[0] << 24;
5649 w3[0] = append2[0] >> 8;
5650 break;
5651
5652 case 16:
5653 w1[0] = append0[0];
5654 w1[1] = append0[1];
5655 w1[2] = append0[2];
5656 w1[3] = append0[3];
5657 w2[0] = append1[0];
5658 w2[1] = append1[1];
5659 w2[2] = append1[2];
5660 w2[3] = append1[3];
5661 w3[0] = append2[0];
5662 break;
5663
5664 case 17:
5665 w1[0] = w1[0] | append0[0] << 8;
5666 w1[1] = append0[0] >> 24 | append0[1] << 8;
5667 w1[2] = append0[1] >> 24 | append0[2] << 8;
5668 w1[3] = append0[2] >> 24 | append0[3] << 8;
5669 w2[0] = append0[3] >> 24 | append1[0] << 8;
5670 w2[1] = append1[0] >> 24 | append1[1] << 8;
5671 w2[2] = append1[1] >> 24 | append1[2] << 8;
5672 w2[3] = append1[2] >> 24 | append1[3] << 8;
5673 w3[0] = append1[3] >> 24 | append2[0] << 8;
5674 w3[1] = append2[0] >> 24;
5675 break;
5676
5677 case 18:
5678 w1[0] = w1[0] | append0[0] << 16;
5679 w1[1] = append0[0] >> 16 | append0[1] << 16;
5680 w1[2] = append0[1] >> 16 | append0[2] << 16;
5681 w1[3] = append0[2] >> 16 | append0[3] << 16;
5682 w2[0] = append0[3] >> 16 | append1[0] << 16;
5683 w2[1] = append1[0] >> 16 | append1[1] << 16;
5684 w2[2] = append1[1] >> 16 | append1[2] << 16;
5685 w2[3] = append1[2] >> 16 | append1[3] << 16;
5686 w3[0] = append1[3] >> 16 | append2[0] << 16;
5687 w3[1] = append2[0] >> 16;
5688 break;
5689
5690 case 19:
5691 w1[0] = w1[0] | append0[0] << 24;
5692 w1[1] = append0[0] >> 8 | append0[1] << 24;
5693 w1[2] = append0[1] >> 8 | append0[2] << 24;
5694 w1[3] = append0[2] >> 8 | append0[3] << 24;
5695 w2[0] = append0[3] >> 8 | append1[0] << 24;
5696 w2[1] = append1[0] >> 8 | append1[1] << 24;
5697 w2[2] = append1[1] >> 8 | append1[2] << 24;
5698 w2[3] = append1[2] >> 8 | append1[3] << 24;
5699 w3[0] = append1[3] >> 8 | append2[0] << 24;
5700 w3[1] = append2[0] >> 8;
5701 break;
5702
5703 case 20:
5704 w1[1] = append0[0];
5705 w1[2] = append0[1];
5706 w1[3] = append0[2];
5707 w2[0] = append0[3];
5708 w2[1] = append1[0];
5709 w2[2] = append1[1];
5710 w2[3] = append1[2];
5711 w3[0] = append1[3];
5712 w3[1] = append2[0];
5713 break;
5714
5715 case 21:
5716 w1[1] = w1[1] | append0[0] << 8;
5717 w1[2] = append0[0] >> 24 | append0[1] << 8;
5718 w1[3] = append0[1] >> 24 | append0[2] << 8;
5719 w2[0] = append0[2] >> 24 | append0[3] << 8;
5720 w2[1] = append0[3] >> 24 | append1[0] << 8;
5721 w2[2] = append1[0] >> 24 | append1[1] << 8;
5722 w2[3] = append1[1] >> 24 | append1[2] << 8;
5723 w3[0] = append1[2] >> 24 | append1[3] << 8;
5724 w3[1] = append1[3] >> 24 | append2[0] << 8;
5725 break;
5726
5727 case 22:
5728 w1[1] = w1[1] | append0[0] << 16;
5729 w1[2] = append0[0] >> 16 | append0[1] << 16;
5730 w1[3] = append0[1] >> 16 | append0[2] << 16;
5731 w2[0] = append0[2] >> 16 | append0[3] << 16;
5732 w2[1] = append0[3] >> 16 | append1[0] << 16;
5733 w2[2] = append1[0] >> 16 | append1[1] << 16;
5734 w2[3] = append1[1] >> 16 | append1[2] << 16;
5735 w3[0] = append1[2] >> 16 | append1[3] << 16;
5736 w3[1] = append1[3] >> 16 | append2[0] << 16;
5737 break;
5738
5739 case 23:
5740 w1[1] = w1[1] | append0[0] << 24;
5741 w1[2] = append0[0] >> 8 | append0[1] << 24;
5742 w1[3] = append0[1] >> 8 | append0[2] << 24;
5743 w2[0] = append0[2] >> 8 | append0[3] << 24;
5744 w2[1] = append0[3] >> 8 | append1[0] << 24;
5745 w2[2] = append1[0] >> 8 | append1[1] << 24;
5746 w2[3] = append1[1] >> 8 | append1[2] << 24;
5747 w3[0] = append1[2] >> 8 | append1[3] << 24;
5748 w3[1] = append1[3] >> 8 | append2[0] << 24;
5749 break;
5750
5751 case 24:
5752 w1[2] = append0[0];
5753 w1[3] = append0[1];
5754 w2[0] = append0[2];
5755 w2[1] = append0[3];
5756 w2[2] = append1[0];
5757 w2[3] = append1[1];
5758 w3[0] = append1[2];
5759 w3[1] = append1[3];
5760 break;
5761
5762 case 25:
5763 w1[2] = w1[2] | append0[0] << 8;
5764 w1[3] = append0[0] >> 24 | append0[1] << 8;
5765 w2[0] = append0[1] >> 24 | append0[2] << 8;
5766 w2[1] = append0[2] >> 24 | append0[3] << 8;
5767 w2[2] = append0[3] >> 24 | append1[0] << 8;
5768 w2[3] = append1[0] >> 24 | append1[1] << 8;
5769 w3[0] = append1[1] >> 24 | append1[2] << 8;
5770 w3[1] = append1[2] >> 24 | append1[3] << 8;
5771 break;
5772
5773 case 26:
5774 w1[2] = w1[2] | append0[0] << 16;
5775 w1[3] = append0[0] >> 16 | append0[1] << 16;
5776 w2[0] = append0[1] >> 16 | append0[2] << 16;
5777 w2[1] = append0[2] >> 16 | append0[3] << 16;
5778 w2[2] = append0[3] >> 16 | append1[0] << 16;
5779 w2[3] = append1[0] >> 16 | append1[1] << 16;
5780 w3[0] = append1[1] >> 16 | append1[2] << 16;
5781 w3[1] = append1[2] >> 16 | append1[3] << 16;
5782 break;
5783
5784 case 27:
5785 w1[2] = w1[2] | append0[0] << 24;
5786 w1[3] = append0[0] >> 8 | append0[1] << 24;
5787 w2[0] = append0[1] >> 8 | append0[2] << 24;
5788 w2[1] = append0[2] >> 8 | append0[3] << 24;
5789 w2[2] = append0[3] >> 8 | append1[0] << 24;
5790 w2[3] = append1[0] >> 8 | append1[1] << 24;
5791 w3[0] = append1[1] >> 8 | append1[2] << 24;
5792 w3[1] = append1[2] >> 8 | append1[3] << 24;
5793 break;
5794
5795 case 28:
5796 w1[3] = append0[0];
5797 w2[0] = append0[1];
5798 w2[1] = append0[2];
5799 w2[2] = append0[3];
5800 w2[3] = append1[0];
5801 w3[0] = append1[1];
5802 w3[1] = append1[2];
5803 break;
5804
5805 case 29:
5806 w1[3] = w1[3] | append0[0] << 8;
5807 w2[0] = append0[0] >> 24 | append0[1] << 8;
5808 w2[1] = append0[1] >> 24 | append0[2] << 8;
5809 w2[2] = append0[2] >> 24 | append0[3] << 8;
5810 w2[3] = append0[3] >> 24 | append1[0] << 8;
5811 w3[0] = append1[0] >> 24 | append1[1] << 8;
5812 w3[1] = append1[1] >> 24 | append1[2] << 8;
5813 break;
5814
5815 case 30:
5816 w1[3] = w1[3] | append0[0] << 16;
5817 w2[0] = append0[0] >> 16 | append0[1] << 16;
5818 w2[1] = append0[1] >> 16 | append0[2] << 16;
5819 w2[2] = append0[2] >> 16 | append0[3] << 16;
5820 w2[3] = append0[3] >> 16 | append1[0] << 16;
5821 w3[0] = append1[0] >> 16 | append1[1] << 16;
5822 w3[1] = append1[1] >> 16 | append1[2] << 16;
5823 break;
5824
5825 case 31:
5826 w1[3] = w1[3] | append0[0] << 24;
5827 w2[0] = append0[0] >> 8 | append0[1] << 24;
5828 w2[1] = append0[1] >> 8 | append0[2] << 24;
5829 w2[2] = append0[2] >> 8 | append0[3] << 24;
5830 w2[3] = append0[3] >> 8 | append1[0] << 24;
5831 w3[0] = append1[0] >> 8 | append1[1] << 24;
5832 w3[1] = append1[1] >> 8 | append1[2] << 24;
5833 break;
5834
5835 case 32:
5836 w2[0] = append0[0];
5837 w2[1] = append0[1];
5838 w2[2] = append0[2];
5839 w2[3] = append0[3];
5840 w3[0] = append1[0];
5841 w3[1] = append1[1];
5842 break;
5843 }
5844 }
5845
5846 static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
5847 {
5848 #ifdef IS_AMD
5849 const int offset_mod_4 = offset & 3;
5850
5851 const int offset_minus_4 = 4 - offset;
5852
5853 switch (offset / 4)
5854 {
5855 case 0:
5856 w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4);
5857 w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
5858 w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
5859 w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
5860 w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
5861 w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
5862 w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5863 w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5864 w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5865 w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5866 w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5867 w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5868 w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5869 w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5870 w0[0] = amd_bytealign (w0[0], 0, offset_minus_4);
5871
5872 if (offset_mod_4 == 0)
5873 {
5874 w0[0] = w0[1];
5875 w0[1] = w0[2];
5876 w0[2] = w0[3];
5877 w0[3] = w1[0];
5878 w1[0] = w1[1];
5879 w1[1] = w1[2];
5880 w1[2] = w1[3];
5881 w1[3] = w2[0];
5882 w2[0] = w2[1];
5883 w2[1] = w2[2];
5884 w2[2] = w2[3];
5885 w2[3] = w3[0];
5886 w3[0] = w3[1];
5887 w3[1] = w3[2];
5888 w3[2] = 0;
5889 }
5890
5891 break;
5892
5893 case 1:
5894 w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4);
5895 w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
5896 w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
5897 w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
5898 w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
5899 w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5900 w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5901 w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5902 w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5903 w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5904 w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5905 w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5906 w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5907 w0[1] = amd_bytealign (w0[0], 0, offset_minus_4);
5908 w0[0] = 0;
5909
5910 if (offset_mod_4 == 0)
5911 {
5912 w0[1] = w0[2];
5913 w0[2] = w0[3];
5914 w0[3] = w1[0];
5915 w1[0] = w1[1];
5916 w1[1] = w1[2];
5917 w1[2] = w1[3];
5918 w1[3] = w2[0];
5919 w2[0] = w2[1];
5920 w2[1] = w2[2];
5921 w2[2] = w2[3];
5922 w2[3] = w3[0];
5923 w3[0] = w3[1];
5924 w3[1] = w3[2];
5925 w3[2] = 0;
5926 }
5927
5928 break;
5929
5930 case 2:
5931 w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4);
5932 w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
5933 w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
5934 w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
5935 w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5936 w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5937 w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5938 w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5939 w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5940 w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5941 w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5942 w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5943 w0[2] = amd_bytealign (w0[0], 0, offset_minus_4);
5944 w0[1] = 0;
5945 w0[0] = 0;
5946
5947 if (offset_mod_4 == 0)
5948 {
5949 w0[2] = w0[3];
5950 w0[3] = w1[0];
5951 w1[0] = w1[1];
5952 w1[1] = w1[2];
5953 w1[2] = w1[3];
5954 w1[3] = w2[0];
5955 w2[0] = w2[1];
5956 w2[1] = w2[2];
5957 w2[2] = w2[3];
5958 w2[3] = w3[0];
5959 w3[0] = w3[1];
5960 w3[1] = w3[2];
5961 w3[2] = 0;
5962 }
5963
5964 break;
5965
5966 case 3:
5967 w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4);
5968 w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
5969 w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
5970 w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5971 w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5972 w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5973 w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5974 w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5975 w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5976 w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5977 w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5978 w0[3] = amd_bytealign (w0[0], 0, offset_minus_4);
5979 w0[2] = 0;
5980 w0[1] = 0;
5981 w0[0] = 0;
5982
5983 if (offset_mod_4 == 0)
5984 {
5985 w0[3] = w1[0];
5986 w1[0] = w1[1];
5987 w1[1] = w1[2];
5988 w1[2] = w1[3];
5989 w1[3] = w2[0];
5990 w2[0] = w2[1];
5991 w2[1] = w2[2];
5992 w2[2] = w2[3];
5993 w2[3] = w3[0];
5994 w3[0] = w3[1];
5995 w3[1] = w3[2];
5996 w3[2] = 0;
5997 }
5998
5999 break;
6000
6001 case 4:
6002 w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4);
6003 w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
6004 w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
6005 w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6006 w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6007 w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6008 w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6009 w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6010 w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6011 w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6012 w1[0] = amd_bytealign (w0[0], 0, offset_minus_4);
6013 w0[3] = 0;
6014 w0[2] = 0;
6015 w0[1] = 0;
6016 w0[0] = 0;
6017
6018 if (offset_mod_4 == 0)
6019 {
6020 w1[0] = w1[1];
6021 w1[1] = w1[2];
6022 w1[2] = w1[3];
6023 w1[3] = w2[0];
6024 w2[0] = w2[1];
6025 w2[1] = w2[2];
6026 w2[2] = w2[3];
6027 w2[3] = w3[0];
6028 w3[0] = w3[1];
6029 w3[1] = w3[2];
6030 w3[2] = 0;
6031 }
6032
6033 break;
6034
6035 case 5:
6036 w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4);
6037 w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
6038 w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6039 w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6040 w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6041 w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6042 w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6043 w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6044 w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6045 w1[1] = amd_bytealign (w0[0], 0, offset_minus_4);
6046 w1[0] = 0;
6047 w0[3] = 0;
6048 w0[2] = 0;
6049 w0[1] = 0;
6050 w0[0] = 0;
6051
6052 if (offset_mod_4 == 0)
6053 {
6054 w1[1] = w1[2];
6055 w1[2] = w1[3];
6056 w1[3] = w2[0];
6057 w2[0] = w2[1];
6058 w2[1] = w2[2];
6059 w2[2] = w2[3];
6060 w2[3] = w3[0];
6061 w3[0] = w3[1];
6062 w3[1] = w3[2];
6063 w3[2] = 0;
6064 }
6065
6066 break;
6067
6068 case 6:
6069 w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4);
6070 w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
6071 w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6072 w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6073 w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6074 w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6075 w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6076 w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6077 w1[2] = amd_bytealign (w0[0], 0, offset_minus_4);
6078 w1[1] = 0;
6079 w1[0] = 0;
6080 w0[3] = 0;
6081 w0[2] = 0;
6082 w0[1] = 0;
6083 w0[0] = 0;
6084
6085 if (offset_mod_4 == 0)
6086 {
6087 w1[2] = w1[3];
6088 w1[3] = w2[0];
6089 w2[0] = w2[1];
6090 w2[1] = w2[2];
6091 w2[2] = w2[3];
6092 w2[3] = w3[0];
6093 w3[0] = w3[1];
6094 w3[1] = w3[2];
6095 w3[2] = 0;
6096 }
6097
6098 break;
6099
6100 case 7:
6101 w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4);
6102 w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
6103 w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6104 w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6105 w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6106 w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6107 w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6108 w1[3] = amd_bytealign (w0[0], 0, offset_minus_4);
6109 w1[2] = 0;
6110 w1[1] = 0;
6111 w1[0] = 0;
6112 w0[3] = 0;
6113 w0[2] = 0;
6114 w0[1] = 0;
6115 w0[0] = 0;
6116
6117 if (offset_mod_4 == 0)
6118 {
6119 w1[3] = w2[0];
6120 w2[0] = w2[1];
6121 w2[1] = w2[2];
6122 w2[2] = w2[3];
6123 w2[3] = w3[0];
6124 w3[0] = w3[1];
6125 w3[1] = w3[2];
6126 w3[2] = 0;
6127 }
6128
6129 break;
6130
6131 case 8:
6132 w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4);
6133 w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
6134 w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6135 w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6136 w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6137 w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6138 w2[0] = amd_bytealign (w0[0], 0, offset_minus_4);
6139 w1[3] = 0;
6140 w1[2] = 0;
6141 w1[1] = 0;
6142 w1[0] = 0;
6143 w0[3] = 0;
6144 w0[2] = 0;
6145 w0[1] = 0;
6146 w0[0] = 0;
6147
6148 if (offset_mod_4 == 0)
6149 {
6150 w2[0] = w2[1];
6151 w2[1] = w2[2];
6152 w2[2] = w2[3];
6153 w2[3] = w3[0];
6154 w3[0] = w3[1];
6155 w3[1] = w3[2];
6156 w3[2] = 0;
6157 }
6158
6159 break;
6160
6161 case 9:
6162 w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4);
6163 w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
6164 w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6165 w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6166 w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6167 w2[1] = amd_bytealign (w0[0], 0, offset_minus_4);
6168 w2[0] = 0;
6169 w1[3] = 0;
6170 w1[2] = 0;
6171 w1[1] = 0;
6172 w1[0] = 0;
6173 w0[3] = 0;
6174 w0[2] = 0;
6175 w0[1] = 0;
6176 w0[0] = 0;
6177
6178 if (offset_mod_4 == 0)
6179 {
6180 w2[1] = w2[2];
6181 w2[2] = w2[3];
6182 w2[3] = w3[0];
6183 w3[0] = w3[1];
6184 w3[1] = w3[2];
6185 w3[2] = 0;
6186 }
6187
6188 break;
6189
6190 case 10:
6191 w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4);
6192 w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
6193 w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6194 w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6195 w2[2] = amd_bytealign (w0[0], 0, offset_minus_4);
6196 w2[1] = 0;
6197 w2[0] = 0;
6198 w1[3] = 0;
6199 w1[2] = 0;
6200 w1[1] = 0;
6201 w1[0] = 0;
6202 w0[3] = 0;
6203 w0[2] = 0;
6204 w0[1] = 0;
6205 w0[0] = 0;
6206
6207 if (offset_mod_4 == 0)
6208 {
6209 w2[2] = w2[3];
6210 w2[3] = w3[0];
6211 w3[0] = w3[1];
6212 w3[1] = w3[2];
6213 w3[2] = 0;
6214 }
6215
6216 break;
6217
6218 case 11:
6219 w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4);
6220 w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
6221 w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6222 w2[3] = amd_bytealign (w0[0], 0, offset_minus_4);
6223 w2[2] = 0;
6224 w2[1] = 0;
6225 w2[0] = 0;
6226 w1[3] = 0;
6227 w1[2] = 0;
6228 w1[1] = 0;
6229 w1[0] = 0;
6230 w0[3] = 0;
6231 w0[2] = 0;
6232 w0[1] = 0;
6233 w0[0] = 0;
6234
6235 if (offset_mod_4 == 0)
6236 {
6237 w2[3] = w3[0];
6238 w3[0] = w3[1];
6239 w3[1] = w3[2];
6240 w3[2] = 0;
6241 }
6242
6243 break;
6244
6245 case 12:
6246 w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4);
6247 w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
6248 w3[0] = amd_bytealign (w0[0], 0, offset_minus_4);
6249 w2[3] = 0;
6250 w2[2] = 0;
6251 w2[1] = 0;
6252 w2[0] = 0;
6253 w1[3] = 0;
6254 w1[2] = 0;
6255 w1[1] = 0;
6256 w1[0] = 0;
6257 w0[3] = 0;
6258 w0[2] = 0;
6259 w0[1] = 0;
6260 w0[0] = 0;
6261
6262 if (offset_mod_4 == 0)
6263 {
6264 w3[0] = w3[1];
6265 w3[1] = w3[2];
6266 w3[2] = 0;
6267 }
6268
6269 break;
6270
6271 case 13:
6272 w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4);
6273 w3[1] = amd_bytealign (w0[0], 0, offset_minus_4);
6274 w3[0] = 0;
6275 w2[3] = 0;
6276 w2[2] = 0;
6277 w2[1] = 0;
6278 w2[0] = 0;
6279 w1[3] = 0;
6280 w1[2] = 0;
6281 w1[1] = 0;
6282 w1[0] = 0;
6283 w0[3] = 0;
6284 w0[2] = 0;
6285 w0[1] = 0;
6286 w0[0] = 0;
6287
6288 if (offset_mod_4 == 0)
6289 {
6290 w3[1] = w3[2];
6291 w3[2] = 0;
6292 }
6293
6294 break;
6295 }
6296 #endif
6297
6298 #ifdef IS_NV
6299 const int offset_minus_4 = 4 - (offset % 4);
6300
6301 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
6302
6303 switch (offset / 4)
6304 {
6305 case 0:
6306 w3[1] = __byte_perm (w3[0], w3[1], selector);
6307 w3[0] = __byte_perm (w2[3], w3[0], selector);
6308 w2[3] = __byte_perm (w2[2], w2[3], selector);
6309 w2[2] = __byte_perm (w2[1], w2[2], selector);
6310 w2[1] = __byte_perm (w2[0], w2[1], selector);
6311 w2[0] = __byte_perm (w1[3], w2[0], selector);
6312 w1[3] = __byte_perm (w1[2], w1[3], selector);
6313 w1[2] = __byte_perm (w1[1], w1[2], selector);
6314 w1[1] = __byte_perm (w1[0], w1[1], selector);
6315 w1[0] = __byte_perm (w0[3], w1[0], selector);
6316 w0[3] = __byte_perm (w0[2], w0[3], selector);
6317 w0[2] = __byte_perm (w0[1], w0[2], selector);
6318 w0[1] = __byte_perm (w0[0], w0[1], selector);
6319 w0[0] = __byte_perm ( 0, w0[0], selector);
6320
6321 break;
6322
6323 case 1:
6324 w3[1] = __byte_perm (w2[3], w3[0], selector);
6325 w3[0] = __byte_perm (w2[2], w2[3], selector);
6326 w2[3] = __byte_perm (w2[1], w2[2], selector);
6327 w2[2] = __byte_perm (w2[0], w2[1], selector);
6328 w2[1] = __byte_perm (w1[3], w2[0], selector);
6329 w2[0] = __byte_perm (w1[2], w1[3], selector);
6330 w1[3] = __byte_perm (w1[1], w1[2], selector);
6331 w1[2] = __byte_perm (w1[0], w1[1], selector);
6332 w1[1] = __byte_perm (w0[3], w1[0], selector);
6333 w1[0] = __byte_perm (w0[2], w0[3], selector);
6334 w0[3] = __byte_perm (w0[1], w0[2], selector);
6335 w0[2] = __byte_perm (w0[0], w0[1], selector);
6336 w0[1] = __byte_perm ( 0, w0[0], selector);
6337 w0[0] = 0;
6338
6339 break;
6340
6341 case 2:
6342 w3[1] = __byte_perm (w2[2], w2[3], selector);
6343 w3[0] = __byte_perm (w2[1], w2[2], selector);
6344 w2[3] = __byte_perm (w2[0], w2[1], selector);
6345 w2[2] = __byte_perm (w1[3], w2[0], selector);
6346 w2[1] = __byte_perm (w1[2], w1[3], selector);
6347 w2[0] = __byte_perm (w1[1], w1[2], selector);
6348 w1[3] = __byte_perm (w1[0], w1[1], selector);
6349 w1[2] = __byte_perm (w0[3], w1[0], selector);
6350 w1[1] = __byte_perm (w0[2], w0[3], selector);
6351 w1[0] = __byte_perm (w0[1], w0[2], selector);
6352 w0[3] = __byte_perm (w0[0], w0[1], selector);
6353 w0[2] = __byte_perm ( 0, w0[0], selector);
6354 w0[1] = 0;
6355 w0[0] = 0;
6356
6357 break;
6358
6359 case 3:
6360 w3[1] = __byte_perm (w2[1], w2[2], selector);
6361 w3[0] = __byte_perm (w2[0], w2[1], selector);
6362 w2[3] = __byte_perm (w1[3], w2[0], selector);
6363 w2[2] = __byte_perm (w1[2], w1[3], selector);
6364 w2[1] = __byte_perm (w1[1], w1[2], selector);
6365 w2[0] = __byte_perm (w1[0], w1[1], selector);
6366 w1[3] = __byte_perm (w0[3], w1[0], selector);
6367 w1[2] = __byte_perm (w0[2], w0[3], selector);
6368 w1[1] = __byte_perm (w0[1], w0[2], selector);
6369 w1[0] = __byte_perm (w0[0], w0[1], selector);
6370 w0[3] = __byte_perm ( 0, w0[0], selector);
6371 w0[2] = 0;
6372 w0[1] = 0;
6373 w0[0] = 0;
6374
6375 break;
6376
6377 case 4:
6378 w3[1] = __byte_perm (w2[0], w2[1], selector);
6379 w3[0] = __byte_perm (w1[3], w2[0], selector);
6380 w2[3] = __byte_perm (w1[2], w1[3], selector);
6381 w2[2] = __byte_perm (w1[1], w1[2], selector);
6382 w2[1] = __byte_perm (w1[0], w1[1], selector);
6383 w2[0] = __byte_perm (w0[3], w1[0], selector);
6384 w1[3] = __byte_perm (w0[2], w0[3], selector);
6385 w1[2] = __byte_perm (w0[1], w0[2], selector);
6386 w1[1] = __byte_perm (w0[0], w0[1], selector);
6387 w1[0] = __byte_perm ( 0, w0[0], selector);
6388 w0[3] = 0;
6389 w0[2] = 0;
6390 w0[1] = 0;
6391 w0[0] = 0;
6392
6393 break;
6394
6395 case 5:
6396 w3[1] = __byte_perm (w1[3], w2[0], selector);
6397 w3[0] = __byte_perm (w1[2], w1[3], selector);
6398 w2[3] = __byte_perm (w1[1], w1[2], selector);
6399 w2[2] = __byte_perm (w1[0], w1[1], selector);
6400 w2[1] = __byte_perm (w0[3], w1[0], selector);
6401 w2[0] = __byte_perm (w0[2], w0[3], selector);
6402 w1[3] = __byte_perm (w0[1], w0[2], selector);
6403 w1[2] = __byte_perm (w0[0], w0[1], selector);
6404 w1[1] = __byte_perm ( 0, w0[0], selector);
6405 w1[0] = 0;
6406 w0[3] = 0;
6407 w0[2] = 0;
6408 w0[1] = 0;
6409 w0[0] = 0;
6410
6411 break;
6412
6413 case 6:
6414 w3[1] = __byte_perm (w1[2], w1[3], selector);
6415 w3[0] = __byte_perm (w1[1], w1[2], selector);
6416 w2[3] = __byte_perm (w1[0], w1[1], selector);
6417 w2[2] = __byte_perm (w0[3], w1[0], selector);
6418 w2[1] = __byte_perm (w0[2], w0[3], selector);
6419 w2[0] = __byte_perm (w0[1], w0[2], selector);
6420 w1[3] = __byte_perm (w0[0], w0[1], selector);
6421 w1[2] = __byte_perm ( 0, w0[0], selector);
6422 w1[1] = 0;
6423 w1[0] = 0;
6424 w0[3] = 0;
6425 w0[2] = 0;
6426 w0[1] = 0;
6427 w0[0] = 0;
6428
6429 break;
6430
6431 case 7:
6432 w3[1] = __byte_perm (w1[1], w1[2], selector);
6433 w3[0] = __byte_perm (w1[0], w1[1], selector);
6434 w2[3] = __byte_perm (w0[3], w1[0], selector);
6435 w2[2] = __byte_perm (w0[2], w0[3], selector);
6436 w2[1] = __byte_perm (w0[1], w0[2], selector);
6437 w2[0] = __byte_perm (w0[0], w0[1], selector);
6438 w1[3] = __byte_perm ( 0, w0[0], selector);
6439 w1[2] = 0;
6440 w1[1] = 0;
6441 w1[0] = 0;
6442 w0[3] = 0;
6443 w0[2] = 0;
6444 w0[1] = 0;
6445 w0[0] = 0;
6446
6447 break;
6448
6449 case 8:
6450 w3[1] = __byte_perm (w1[0], w1[1], selector);
6451 w3[0] = __byte_perm (w0[3], w1[0], selector);
6452 w2[3] = __byte_perm (w0[2], w0[3], selector);
6453 w2[2] = __byte_perm (w0[1], w0[2], selector);
6454 w2[1] = __byte_perm (w0[0], w0[1], selector);
6455 w2[0] = __byte_perm ( 0, w0[0], selector);
6456 w1[3] = 0;
6457 w1[2] = 0;
6458 w1[1] = 0;
6459 w1[0] = 0;
6460 w0[3] = 0;
6461 w0[2] = 0;
6462 w0[1] = 0;
6463 w0[0] = 0;
6464
6465 break;
6466
6467 case 9:
6468 w3[1] = __byte_perm (w0[3], w1[0], selector);
6469 w3[0] = __byte_perm (w0[2], w0[3], selector);
6470 w2[3] = __byte_perm (w0[1], w0[2], selector);
6471 w2[2] = __byte_perm (w0[0], w0[1], selector);
6472 w2[1] = __byte_perm ( 0, w0[0], selector);
6473 w2[0] = 0;
6474 w1[3] = 0;
6475 w1[2] = 0;
6476 w1[1] = 0;
6477 w1[0] = 0;
6478 w0[3] = 0;
6479 w0[2] = 0;
6480 w0[1] = 0;
6481 w0[0] = 0;
6482
6483 break;
6484
6485 case 10:
6486 w3[1] = __byte_perm (w0[2], w0[3], selector);
6487 w3[0] = __byte_perm (w0[1], w0[2], selector);
6488 w2[3] = __byte_perm (w0[0], w0[1], selector);
6489 w2[2] = __byte_perm ( 0, w0[0], selector);
6490 w2[1] = 0;
6491 w2[0] = 0;
6492 w1[3] = 0;
6493 w1[2] = 0;
6494 w1[1] = 0;
6495 w1[0] = 0;
6496 w0[3] = 0;
6497 w0[2] = 0;
6498 w0[1] = 0;
6499 w0[0] = 0;
6500
6501 break;
6502
6503 case 11:
6504 w3[1] = __byte_perm (w0[1], w0[2], selector);
6505 w3[0] = __byte_perm (w0[0], w0[1], selector);
6506 w2[3] = __byte_perm ( 0, w0[0], selector);
6507 w2[2] = 0;
6508 w2[1] = 0;
6509 w2[0] = 0;
6510 w1[3] = 0;
6511 w1[2] = 0;
6512 w1[1] = 0;
6513 w1[0] = 0;
6514 w0[3] = 0;
6515 w0[2] = 0;
6516 w0[1] = 0;
6517 w0[0] = 0;
6518
6519 break;
6520
6521 case 12:
6522 w3[1] = __byte_perm (w0[0], w0[1], selector);
6523 w3[0] = __byte_perm ( 0, w0[0], selector);
6524 w2[3] = 0;
6525 w2[2] = 0;
6526 w2[1] = 0;
6527 w2[0] = 0;
6528 w1[3] = 0;
6529 w1[2] = 0;
6530 w1[1] = 0;
6531 w1[0] = 0;
6532 w0[3] = 0;
6533 w0[2] = 0;
6534 w0[1] = 0;
6535 w0[0] = 0;
6536
6537 break;
6538
6539 case 13:
6540 w3[1] = __byte_perm ( 0, w0[0], selector);
6541 w3[0] = 0;
6542 w2[3] = 0;
6543 w2[2] = 0;
6544 w2[1] = 0;
6545 w2[0] = 0;
6546 w1[3] = 0;
6547 w1[2] = 0;
6548 w1[1] = 0;
6549 w1[0] = 0;
6550 w0[3] = 0;
6551 w0[2] = 0;
6552 w0[1] = 0;
6553 w0[0] = 0;
6554
6555 break;
6556 }
6557 #endif
6558 }
6559
6560 static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
6561 {
6562 #ifdef IS_AMD
6563 switch (offset / 4)
6564 {
6565 case 0:
6566 w3[2] = amd_bytealign (w3[1], 0, offset);
6567 w3[1] = amd_bytealign (w3[0], w3[1], offset);
6568 w3[0] = amd_bytealign (w2[3], w3[0], offset);
6569 w2[3] = amd_bytealign (w2[2], w2[3], offset);
6570 w2[2] = amd_bytealign (w2[1], w2[2], offset);
6571 w2[1] = amd_bytealign (w2[0], w2[1], offset);
6572 w2[0] = amd_bytealign (w1[3], w2[0], offset);
6573 w1[3] = amd_bytealign (w1[2], w1[3], offset);
6574 w1[2] = amd_bytealign (w1[1], w1[2], offset);
6575 w1[1] = amd_bytealign (w1[0], w1[1], offset);
6576 w1[0] = amd_bytealign (w0[3], w1[0], offset);
6577 w0[3] = amd_bytealign (w0[2], w0[3], offset);
6578 w0[2] = amd_bytealign (w0[1], w0[2], offset);
6579 w0[1] = amd_bytealign (w0[0], w0[1], offset);
6580 w0[0] = amd_bytealign ( 0, w0[0], offset);
6581 break;
6582
6583 case 1:
6584 w3[2] = amd_bytealign (w3[0], 0, offset);
6585 w3[1] = amd_bytealign (w2[3], w3[0], offset);
6586 w3[0] = amd_bytealign (w2[2], w2[3], offset);
6587 w2[3] = amd_bytealign (w2[1], w2[2], offset);
6588 w2[2] = amd_bytealign (w2[0], w2[1], offset);
6589 w2[1] = amd_bytealign (w1[3], w2[0], offset);
6590 w2[0] = amd_bytealign (w1[2], w1[3], offset);
6591 w1[3] = amd_bytealign (w1[1], w1[2], offset);
6592 w1[2] = amd_bytealign (w1[0], w1[1], offset);
6593 w1[1] = amd_bytealign (w0[3], w1[0], offset);
6594 w1[0] = amd_bytealign (w0[2], w0[3], offset);
6595 w0[3] = amd_bytealign (w0[1], w0[2], offset);
6596 w0[2] = amd_bytealign (w0[0], w0[1], offset);
6597 w0[1] = amd_bytealign ( 0, w0[0], offset);
6598 w0[0] = 0;
6599 break;
6600
6601 case 2:
6602 w3[2] = amd_bytealign (w2[3], 0, offset);
6603 w3[1] = amd_bytealign (w2[2], w2[3], offset);
6604 w3[0] = amd_bytealign (w2[1], w2[2], offset);
6605 w2[3] = amd_bytealign (w2[0], w2[1], offset);
6606 w2[2] = amd_bytealign (w1[3], w2[0], offset);
6607 w2[1] = amd_bytealign (w1[2], w1[3], offset);
6608 w2[0] = amd_bytealign (w1[1], w1[2], offset);
6609 w1[3] = amd_bytealign (w1[0], w1[1], offset);
6610 w1[2] = amd_bytealign (w0[3], w1[0], offset);
6611 w1[1] = amd_bytealign (w0[2], w0[3], offset);
6612 w1[0] = amd_bytealign (w0[1], w0[2], offset);
6613 w0[3] = amd_bytealign (w0[0], w0[1], offset);
6614 w0[2] = amd_bytealign ( 0, w0[0], offset);
6615 w0[1] = 0;
6616 w0[0] = 0;
6617 break;
6618
6619 case 3:
6620 w3[2] = amd_bytealign (w2[2], 0, offset);
6621 w3[1] = amd_bytealign (w2[1], w2[2], offset);
6622 w3[0] = amd_bytealign (w2[0], w2[1], offset);
6623 w2[3] = amd_bytealign (w1[3], w2[0], offset);
6624 w2[2] = amd_bytealign (w1[2], w1[3], offset);
6625 w2[1] = amd_bytealign (w1[1], w1[2], offset);
6626 w2[0] = amd_bytealign (w1[0], w1[1], offset);
6627 w1[3] = amd_bytealign (w0[3], w1[0], offset);
6628 w1[2] = amd_bytealign (w0[2], w0[3], offset);
6629 w1[1] = amd_bytealign (w0[1], w0[2], offset);
6630 w1[0] = amd_bytealign (w0[0], w0[1], offset);
6631 w0[3] = amd_bytealign ( 0, w0[0], offset);
6632 w0[2] = 0;
6633 w0[1] = 0;
6634 w0[0] = 0;
6635 break;
6636
6637 case 4:
6638 w3[2] = amd_bytealign (w2[1], 0, offset);
6639 w3[1] = amd_bytealign (w2[0], w2[1], offset);
6640 w3[0] = amd_bytealign (w1[3], w2[0], offset);
6641 w2[3] = amd_bytealign (w1[2], w1[3], offset);
6642 w2[2] = amd_bytealign (w1[1], w1[2], offset);
6643 w2[1] = amd_bytealign (w1[0], w1[1], offset);
6644 w2[0] = amd_bytealign (w0[3], w1[0], offset);
6645 w1[3] = amd_bytealign (w0[2], w0[3], offset);
6646 w1[2] = amd_bytealign (w0[1], w0[2], offset);
6647 w1[1] = amd_bytealign (w0[0], w0[1], offset);
6648 w1[0] = amd_bytealign ( 0, w0[0], offset);
6649 w0[3] = 0;
6650 w0[2] = 0;
6651 w0[1] = 0;
6652 w0[0] = 0;
6653 break;
6654
6655 case 5:
6656 w3[2] = amd_bytealign (w2[0], 0, offset);
6657 w3[1] = amd_bytealign (w1[3], w2[0], offset);
6658 w3[0] = amd_bytealign (w1[2], w1[3], offset);
6659 w2[3] = amd_bytealign (w1[1], w1[2], offset);
6660 w2[2] = amd_bytealign (w1[0], w1[1], offset);
6661 w2[1] = amd_bytealign (w0[3], w1[0], offset);
6662 w2[0] = amd_bytealign (w0[2], w0[3], offset);
6663 w1[3] = amd_bytealign (w0[1], w0[2], offset);
6664 w1[2] = amd_bytealign (w0[0], w0[1], offset);
6665 w1[1] = amd_bytealign ( 0, w0[0], offset);
6666 w1[0] = 0;
6667 w0[3] = 0;
6668 w0[2] = 0;
6669 w0[1] = 0;
6670 w0[0] = 0;
6671 break;
6672
6673 case 6:
6674 w3[2] = amd_bytealign (w1[3], 0, offset);
6675 w3[1] = amd_bytealign (w1[2], w1[3], offset);
6676 w3[0] = amd_bytealign (w1[1], w1[2], offset);
6677 w2[3] = amd_bytealign (w1[0], w1[1], offset);
6678 w2[2] = amd_bytealign (w0[3], w1[0], offset);
6679 w2[1] = amd_bytealign (w0[2], w0[3], offset);
6680 w2[0] = amd_bytealign (w0[1], w0[2], offset);
6681 w1[3] = amd_bytealign (w0[0], w0[1], offset);
6682 w1[2] = amd_bytealign ( 0, w0[0], offset);
6683 w1[1] = 0;
6684 w1[0] = 0;
6685 w0[3] = 0;
6686 w0[2] = 0;
6687 w0[1] = 0;
6688 w0[0] = 0;
6689 break;
6690
6691 case 7:
6692 w3[2] = amd_bytealign (w1[2], 0, offset);
6693 w3[1] = amd_bytealign (w1[1], w1[2], offset);
6694 w3[0] = amd_bytealign (w1[0], w1[1], offset);
6695 w2[3] = amd_bytealign (w0[3], w1[0], offset);
6696 w2[2] = amd_bytealign (w0[2], w0[3], offset);
6697 w2[1] = amd_bytealign (w0[1], w0[2], offset);
6698 w2[0] = amd_bytealign (w0[0], w0[1], offset);
6699 w1[3] = amd_bytealign ( 0, w0[0], offset);
6700 w1[2] = 0;
6701 w1[1] = 0;
6702 w1[0] = 0;
6703 w0[3] = 0;
6704 w0[2] = 0;
6705 w0[1] = 0;
6706 w0[0] = 0;
6707 break;
6708
6709 case 8:
6710 w3[2] = amd_bytealign (w1[1], 0, offset);
6711 w3[1] = amd_bytealign (w1[0], w1[1], offset);
6712 w3[0] = amd_bytealign (w0[3], w1[0], offset);
6713 w2[3] = amd_bytealign (w0[2], w0[3], offset);
6714 w2[2] = amd_bytealign (w0[1], w0[2], offset);
6715 w2[1] = amd_bytealign (w0[0], w0[1], offset);
6716 w2[0] = amd_bytealign ( 0, w0[0], offset);
6717 w1[3] = 0;
6718 w1[2] = 0;
6719 w1[1] = 0;
6720 w1[0] = 0;
6721 w0[3] = 0;
6722 w0[2] = 0;
6723 w0[1] = 0;
6724 w0[0] = 0;
6725 break;
6726
6727 case 9:
6728 w3[2] = amd_bytealign (w1[0], 0, offset);
6729 w3[1] = amd_bytealign (w0[3], w1[0], offset);
6730 w3[0] = amd_bytealign (w0[2], w0[3], offset);
6731 w2[3] = amd_bytealign (w0[1], w0[2], offset);
6732 w2[2] = amd_bytealign (w0[0], w0[1], offset);
6733 w2[1] = amd_bytealign ( 0, w0[0], offset);
6734 w2[0] = 0;
6735 w1[3] = 0;
6736 w1[2] = 0;
6737 w1[1] = 0;
6738 w1[0] = 0;
6739 w0[3] = 0;
6740 w0[2] = 0;
6741 w0[1] = 0;
6742 w0[0] = 0;
6743 break;
6744
6745 case 10:
6746 w3[2] = amd_bytealign (w0[3], 0, offset);
6747 w3[1] = amd_bytealign (w0[2], w0[3], offset);
6748 w3[0] = amd_bytealign (w0[1], w0[2], offset);
6749 w2[3] = amd_bytealign (w0[0], w0[1], offset);
6750 w2[2] = amd_bytealign ( 0, w0[0], offset);
6751 w2[1] = 0;
6752 w2[0] = 0;
6753 w1[3] = 0;
6754 w1[2] = 0;
6755 w1[1] = 0;
6756 w1[0] = 0;
6757 w0[3] = 0;
6758 w0[2] = 0;
6759 w0[1] = 0;
6760 w0[0] = 0;
6761 break;
6762
6763 case 11:
6764 w3[2] = amd_bytealign (w0[2], 0, offset);
6765 w3[1] = amd_bytealign (w0[1], w0[2], offset);
6766 w3[0] = amd_bytealign (w0[0], w0[1], offset);
6767 w2[3] = amd_bytealign ( 0, w0[0], offset);
6768 w2[2] = 0;
6769 w2[1] = 0;
6770 w2[0] = 0;
6771 w1[3] = 0;
6772 w1[2] = 0;
6773 w1[1] = 0;
6774 w1[0] = 0;
6775 w0[3] = 0;
6776 w0[2] = 0;
6777 w0[1] = 0;
6778 w0[0] = 0;
6779 break;
6780
6781 case 12:
6782 w3[2] = amd_bytealign (w0[1], 0, offset);
6783 w3[1] = amd_bytealign (w0[0], w0[1], offset);
6784 w3[0] = amd_bytealign ( 0, w0[0], offset);
6785 w2[3] = 0;
6786 w2[2] = 0;
6787 w2[1] = 0;
6788 w2[0] = 0;
6789 w1[3] = 0;
6790 w1[2] = 0;
6791 w1[1] = 0;
6792 w1[0] = 0;
6793 w0[3] = 0;
6794 w0[2] = 0;
6795 w0[1] = 0;
6796 w0[0] = 0;
6797 break;
6798
6799 case 13:
6800 w3[2] = amd_bytealign (w0[0], 0, offset);
6801 w3[1] = amd_bytealign ( 0, w0[0], offset);
6802 w3[0] = 0;
6803 w2[3] = 0;
6804 w2[2] = 0;
6805 w2[1] = 0;
6806 w2[0] = 0;
6807 w1[3] = 0;
6808 w1[2] = 0;
6809 w1[1] = 0;
6810 w1[0] = 0;
6811 w0[3] = 0;
6812 w0[2] = 0;
6813 w0[1] = 0;
6814 w0[0] = 0;
6815 break;
6816 }
6817 #endif
6818
6819 #ifdef IS_NV
6820 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
6821
6822 switch (offset / 4)
6823 {
6824 case 0:
6825 w3[1] = __byte_perm (w3[1], w3[0], selector);
6826 w3[0] = __byte_perm (w3[0], w2[3], selector);
6827 w2[3] = __byte_perm (w2[3], w2[2], selector);
6828 w2[2] = __byte_perm (w2[2], w2[1], selector);
6829 w2[1] = __byte_perm (w2[1], w2[0], selector);
6830 w2[0] = __byte_perm (w2[0], w1[3], selector);
6831 w1[3] = __byte_perm (w1[3], w1[2], selector);
6832 w1[2] = __byte_perm (w1[2], w1[1], selector);
6833 w1[1] = __byte_perm (w1[1], w1[0], selector);
6834 w1[0] = __byte_perm (w1[0], w0[3], selector);
6835 w0[3] = __byte_perm (w0[3], w0[2], selector);
6836 w0[2] = __byte_perm (w0[2], w0[1], selector);
6837 w0[1] = __byte_perm (w0[1], w0[0], selector);
6838 w0[0] = __byte_perm (w0[0], 0, selector);
6839 break;
6840
6841 case 1:
6842 w3[1] = __byte_perm (w3[0], w2[3], selector);
6843 w3[0] = __byte_perm (w2[3], w2[2], selector);
6844 w2[3] = __byte_perm (w2[2], w2[1], selector);
6845 w2[2] = __byte_perm (w2[1], w2[0], selector);
6846 w2[1] = __byte_perm (w2[0], w1[3], selector);
6847 w2[0] = __byte_perm (w1[3], w1[2], selector);
6848 w1[3] = __byte_perm (w1[2], w1[1], selector);
6849 w1[2] = __byte_perm (w1[1], w1[0], selector);
6850 w1[1] = __byte_perm (w1[0], w0[3], selector);
6851 w1[0] = __byte_perm (w0[3], w0[2], selector);
6852 w0[3] = __byte_perm (w0[2], w0[1], selector);
6853 w0[2] = __byte_perm (w0[1], w0[0], selector);
6854 w0[1] = __byte_perm (w0[0], 0, selector);
6855 w0[0] = 0;
6856 break;
6857
6858 case 2:
6859 w3[1] = __byte_perm (w2[3], w2[2], selector);
6860 w3[0] = __byte_perm (w2[2], w2[1], selector);
6861 w2[3] = __byte_perm (w2[1], w2[0], selector);
6862 w2[2] = __byte_perm (w2[0], w1[3], selector);
6863 w2[1] = __byte_perm (w1[3], w1[2], selector);
6864 w2[0] = __byte_perm (w1[2], w1[1], selector);
6865 w1[3] = __byte_perm (w1[1], w1[0], selector);
6866 w1[2] = __byte_perm (w1[0], w0[3], selector);
6867 w1[1] = __byte_perm (w0[3], w0[2], selector);
6868 w1[0] = __byte_perm (w0[2], w0[1], selector);
6869 w0[3] = __byte_perm (w0[1], w0[0], selector);
6870 w0[2] = __byte_perm (w0[0], 0, selector);
6871 w0[1] = 0;
6872 w0[0] = 0;
6873 break;
6874
6875 case 3:
6876 w3[1] = __byte_perm (w2[2], w2[1], selector);
6877 w3[0] = __byte_perm (w2[1], w2[0], selector);
6878 w2[3] = __byte_perm (w2[0], w1[3], selector);
6879 w2[2] = __byte_perm (w1[3], w1[2], selector);
6880 w2[1] = __byte_perm (w1[2], w1[1], selector);
6881 w2[0] = __byte_perm (w1[1], w1[0], selector);
6882 w1[3] = __byte_perm (w1[0], w0[3], selector);
6883 w1[2] = __byte_perm (w0[3], w0[2], selector);
6884 w1[1] = __byte_perm (w0[2], w0[1], selector);
6885 w1[0] = __byte_perm (w0[1], w0[0], selector);
6886 w0[3] = __byte_perm (w0[0], 0, selector);
6887 w0[2] = 0;
6888 w0[1] = 0;
6889 w0[0] = 0;
6890 break;
6891
6892 case 4:
6893 w3[1] = __byte_perm (w2[1], w2[0], selector);
6894 w3[0] = __byte_perm (w2[0], w1[3], selector);
6895 w2[3] = __byte_perm (w1[3], w1[2], selector);
6896 w2[2] = __byte_perm (w1[2], w1[1], selector);
6897 w2[1] = __byte_perm (w1[1], w1[0], selector);
6898 w2[0] = __byte_perm (w1[0], w0[3], selector);
6899 w1[3] = __byte_perm (w0[3], w0[2], selector);
6900 w1[2] = __byte_perm (w0[2], w0[1], selector);
6901 w1[1] = __byte_perm (w0[1], w0[0], selector);
6902 w1[0] = __byte_perm (w0[0], 0, selector);
6903 w0[3] = 0;
6904 w0[2] = 0;
6905 w0[1] = 0;
6906 w0[0] = 0;
6907 break;
6908
6909 case 5:
6910 w3[1] = __byte_perm (w2[0], w1[3], selector);
6911 w3[0] = __byte_perm (w1[3], w1[2], selector);
6912 w2[3] = __byte_perm (w1[2], w1[1], selector);
6913 w2[2] = __byte_perm (w1[1], w1[0], selector);
6914 w2[1] = __byte_perm (w1[0], w0[3], selector);
6915 w2[0] = __byte_perm (w0[3], w0[2], selector);
6916 w1[3] = __byte_perm (w0[2], w0[1], selector);
6917 w1[2] = __byte_perm (w0[1], w0[0], selector);
6918 w1[1] = __byte_perm (w0[0], 0, selector);
6919 w1[0] = 0;
6920 w0[3] = 0;
6921 w0[2] = 0;
6922 w0[1] = 0;
6923 w0[0] = 0;
6924 break;
6925
6926 case 6:
6927 w3[1] = __byte_perm (w1[3], w1[2], selector);
6928 w3[0] = __byte_perm (w1[2], w1[1], selector);
6929 w2[3] = __byte_perm (w1[1], w1[0], selector);
6930 w2[2] = __byte_perm (w1[0], w0[3], selector);
6931 w2[1] = __byte_perm (w0[3], w0[2], selector);
6932 w2[0] = __byte_perm (w0[2], w0[1], selector);
6933 w1[3] = __byte_perm (w0[1], w0[0], selector);
6934 w1[2] = __byte_perm (w0[0], 0, selector);
6935 w1[1] = 0;
6936 w1[0] = 0;
6937 w0[3] = 0;
6938 w0[2] = 0;
6939 w0[1] = 0;
6940 w0[0] = 0;
6941 break;
6942
6943 case 7:
6944 w3[1] = __byte_perm (w1[2], w1[1], selector);
6945 w3[0] = __byte_perm (w1[1], w1[0], selector);
6946 w2[3] = __byte_perm (w1[0], w0[3], selector);
6947 w2[2] = __byte_perm (w0[3], w0[2], selector);
6948 w2[1] = __byte_perm (w0[2], w0[1], selector);
6949 w2[0] = __byte_perm (w0[1], w0[0], selector);
6950 w1[3] = __byte_perm (w0[0], 0, selector);
6951 w1[2] = 0;
6952 w1[1] = 0;
6953 w1[0] = 0;
6954 w0[3] = 0;
6955 w0[2] = 0;
6956 w0[1] = 0;
6957 w0[0] = 0;
6958 break;
6959
6960 case 8:
6961 w3[1] = __byte_perm (w1[1], w1[0], selector);
6962 w3[0] = __byte_perm (w1[0], w0[3], selector);
6963 w2[3] = __byte_perm (w0[3], w0[2], selector);
6964 w2[2] = __byte_perm (w0[2], w0[1], selector);
6965 w2[1] = __byte_perm (w0[1], w0[0], selector);
6966 w2[0] = __byte_perm (w0[0], 0, selector);
6967 w1[3] = 0;
6968 w1[2] = 0;
6969 w1[1] = 0;
6970 w1[0] = 0;
6971 w0[3] = 0;
6972 w0[2] = 0;
6973 w0[1] = 0;
6974 w0[0] = 0;
6975 break;
6976
6977 case 9:
6978 w3[1] = __byte_perm (w1[0], w0[3], selector);
6979 w3[0] = __byte_perm (w0[3], w0[2], selector);
6980 w2[3] = __byte_perm (w0[2], w0[1], selector);
6981 w2[2] = __byte_perm (w0[1], w0[0], selector);
6982 w2[1] = __byte_perm (w0[0], 0, selector);
6983 w2[0] = 0;
6984 w1[3] = 0;
6985 w1[2] = 0;
6986 w1[1] = 0;
6987 w1[0] = 0;
6988 w0[3] = 0;
6989 w0[2] = 0;
6990 w0[1] = 0;
6991 w0[0] = 0;
6992 break;
6993
6994 case 10:
6995 w3[1] = __byte_perm (w0[3], w0[2], selector);
6996 w3[0] = __byte_perm (w0[2], w0[1], selector);
6997 w2[3] = __byte_perm (w0[1], w0[0], selector);
6998 w2[2] = __byte_perm (w0[0], 0, selector);
6999 w2[1] = 0;
7000 w2[0] = 0;
7001 w1[3] = 0;
7002 w1[2] = 0;
7003 w1[1] = 0;
7004 w1[0] = 0;
7005 w0[3] = 0;
7006 w0[2] = 0;
7007 w0[1] = 0;
7008 w0[0] = 0;
7009 break;
7010
7011 case 11:
7012 w3[1] = __byte_perm (w0[2], w0[1], selector);
7013 w3[0] = __byte_perm (w0[1], w0[0], selector);
7014 w2[3] = __byte_perm (w0[0], 0, selector);
7015 w2[2] = 0;
7016 w2[1] = 0;
7017 w2[0] = 0;
7018 w1[3] = 0;
7019 w1[2] = 0;
7020 w1[1] = 0;
7021 w1[0] = 0;
7022 w0[3] = 0;
7023 w0[2] = 0;
7024 w0[1] = 0;
7025 w0[0] = 0;
7026 break;
7027
7028 case 12:
7029 w3[1] = __byte_perm (w0[1], w0[0], selector);
7030 w3[0] = __byte_perm (w0[0], 0, selector);
7031 w2[3] = 0;
7032 w2[2] = 0;
7033 w2[1] = 0;
7034 w2[0] = 0;
7035 w1[3] = 0;
7036 w1[2] = 0;
7037 w1[1] = 0;
7038 w1[0] = 0;
7039 w0[3] = 0;
7040 w0[2] = 0;
7041 w0[1] = 0;
7042 w0[0] = 0;
7043 break;
7044
7045 case 13:
7046 w3[1] = __byte_perm (w0[0], 0, selector);
7047 w3[0] = 0;
7048 w2[3] = 0;
7049 w2[2] = 0;
7050 w2[1] = 0;
7051 w2[0] = 0;
7052 w1[3] = 0;
7053 w1[2] = 0;
7054 w1[1] = 0;
7055 w1[0] = 0;
7056 w0[3] = 0;
7057 w0[2] = 0;
7058 w0[1] = 0;
7059 w0[0] = 0;
7060 break;
7061 }
7062 #endif
7063 }
7064
7065 /* not needed anymore?
7066 // before: append_0x80_2_be
7067 static void append_0x80_2x4_be (u32 w0[4], u32 w1[4], const u32 offset)
7068 {
7069 switch (offset)
7070 {
7071 case 0:
7072 w0[0] |= 0x80000000;
7073 break;
7074
7075 case 1:
7076 w0[0] |= 0x800000;
7077 break;
7078
7079 case 2:
7080 w0[0] |= 0x8000;
7081 break;
7082
7083 case 3:
7084 w0[0] |= 0x80;
7085 break;
7086
7087 case 4:
7088 w0[1] |= 0x80000000;
7089 break;
7090
7091 case 5:
7092 w0[1] |= 0x800000;
7093 break;
7094
7095 case 6:
7096 w0[1] |= 0x8000;
7097 break;
7098
7099 case 7:
7100 w0[1] |= 0x80;
7101 break;
7102
7103 case 8:
7104 w0[2] |= 0x80000000;
7105 break;
7106
7107 case 9:
7108 w0[2] |= 0x800000;
7109 break;
7110
7111 case 10:
7112 w0[2] |= 0x8000;
7113 break;
7114
7115 case 11:
7116 w0[2] |= 0x80;
7117 break;
7118
7119 case 12:
7120 w0[3] |= 0x80000000;
7121 break;
7122
7123 case 13:
7124 w0[3] |= 0x800000;
7125 break;
7126
7127 case 14:
7128 w0[3] |= 0x8000;
7129 break;
7130
7131 case 15:
7132 w0[3] |= 0x80;
7133 break;
7134
7135 case 16:
7136 w1[0] |= 0x80000000;
7137 break;
7138
7139 case 17:
7140 w1[0] |= 0x800000;
7141 break;
7142
7143 case 18:
7144 w1[0] |= 0x8000;
7145 break;
7146
7147 case 19:
7148 w1[0] |= 0x80;
7149 break;
7150
7151 case 20:
7152 w1[1] |= 0x80000000;
7153 break;
7154
7155 case 21:
7156 w1[1] |= 0x800000;
7157 break;
7158
7159 case 22:
7160 w1[1] |= 0x8000;
7161 break;
7162
7163 case 23:
7164 w1[1] |= 0x80;
7165 break;
7166
7167 case 24:
7168 w1[2] |= 0x80000000;
7169 break;
7170
7171 case 25:
7172 w1[2] |= 0x800000;
7173 break;
7174
7175 case 26:
7176 w1[2] |= 0x8000;
7177 break;
7178
7179 case 27:
7180 w1[2] |= 0x80;
7181 break;
7182
7183 case 28:
7184 w1[3] |= 0x80000000;
7185 break;
7186
7187 case 29:
7188 w1[3] |= 0x800000;
7189 break;
7190
7191 case 30:
7192 w1[3] |= 0x8000;
7193 break;
7194
7195 case 31:
7196 w1[3] |= 0x80;
7197 break;
7198 }
7199 }
7200
7201 // before: append_0x80_4
7202 static void append_0x80_1x16 (u32 w[16], const u32 offset)
7203 {
7204 switch (offset)
7205 {
7206 case 0:
7207 w[ 0] = 0x80;
7208 break;
7209
7210 case 1:
7211 w[ 0] = w[ 0] | 0x8000;
7212 break;
7213
7214 case 2:
7215 w[ 0] = w[ 0] | 0x800000;
7216 break;
7217
7218 case 3:
7219 w[ 0] = w[ 0] | 0x80000000;
7220 break;
7221
7222 case 4:
7223 w[ 1] = 0x80;
7224 break;
7225
7226 case 5:
7227 w[ 1] = w[ 1] | 0x8000;
7228 break;
7229
7230 case 6:
7231 w[ 1] = w[ 1] | 0x800000;
7232 break;
7233
7234 case 7:
7235 w[ 1] = w[ 1] | 0x80000000;
7236 break;
7237
7238 case 8:
7239 w[ 2] = 0x80;
7240 break;
7241
7242 case 9:
7243 w[ 2] = w[ 2] | 0x8000;
7244 break;
7245
7246 case 10:
7247 w[ 2] = w[ 2] | 0x800000;
7248 break;
7249
7250 case 11:
7251 w[ 2] = w[ 2] | 0x80000000;
7252 break;
7253
7254 case 12:
7255 w[ 3] = 0x80;
7256 break;
7257
7258 case 13:
7259 w[ 3] = w[ 3] | 0x8000;
7260 break;
7261
7262 case 14:
7263 w[ 3] = w[ 3] | 0x800000;
7264 break;
7265
7266 case 15:
7267 w[ 3] = w[ 3] | 0x80000000;
7268 break;
7269
7270 case 16:
7271 w[ 4] = 0x80;
7272 break;
7273
7274 case 17:
7275 w[ 4] = w[ 4] | 0x8000;
7276 break;
7277
7278 case 18:
7279 w[ 4] = w[ 4] | 0x800000;
7280 break;
7281
7282 case 19:
7283 w[ 4] = w[ 4] | 0x80000000;
7284 break;
7285
7286 case 20:
7287 w[ 5] = 0x80;
7288 break;
7289
7290 case 21:
7291 w[ 5] = w[ 5] | 0x8000;
7292 break;
7293
7294 case 22:
7295 w[ 5] = w[ 5] | 0x800000;
7296 break;
7297
7298 case 23:
7299 w[ 5] = w[ 5] | 0x80000000;
7300 break;
7301
7302 case 24:
7303 w[ 6] = 0x80;
7304 break;
7305
7306 case 25:
7307 w[ 6] = w[ 6] | 0x8000;
7308 break;
7309
7310 case 26:
7311 w[ 6] = w[ 6] | 0x800000;
7312 break;
7313
7314 case 27:
7315 w[ 6] = w[ 6] | 0x80000000;
7316 break;
7317
7318 case 28:
7319 w[ 7] = 0x80;
7320 break;
7321
7322 case 29:
7323 w[ 7] = w[ 7] | 0x8000;
7324 break;
7325
7326 case 30:
7327 w[ 7] = w[ 7] | 0x800000;
7328 break;
7329
7330 case 31:
7331 w[ 7] = w[ 7] | 0x80000000;
7332 break;
7333
7334 case 32:
7335 w[ 8] = 0x80;
7336 break;
7337
7338 case 33:
7339 w[ 8] = w[ 8] | 0x8000;
7340 break;
7341
7342 case 34:
7343 w[ 8] = w[ 8] | 0x800000;
7344 break;
7345
7346 case 35:
7347 w[ 8] = w[ 8] | 0x80000000;
7348 break;
7349
7350 case 36:
7351 w[ 9] = 0x80;
7352 break;
7353
7354 case 37:
7355 w[ 9] = w[ 9] | 0x8000;
7356 break;
7357
7358 case 38:
7359 w[ 9] = w[ 9] | 0x800000;
7360 break;
7361
7362 case 39:
7363 w[ 9] = w[ 9] | 0x80000000;
7364 break;
7365
7366 case 40:
7367 w[10] = 0x80;
7368 break;
7369
7370 case 41:
7371 w[10] = w[10] | 0x8000;
7372 break;
7373
7374 case 42:
7375 w[10] = w[10] | 0x800000;
7376 break;
7377
7378 case 43:
7379 w[10] = w[10] | 0x80000000;
7380 break;
7381
7382 case 44:
7383 w[11] = 0x80;
7384 break;
7385
7386 case 45:
7387 w[11] = w[11] | 0x8000;
7388 break;
7389
7390 case 46:
7391 w[11] = w[11] | 0x800000;
7392 break;
7393
7394 case 47:
7395 w[11] = w[11] | 0x80000000;
7396 break;
7397
7398 case 48:
7399 w[12] = 0x80;
7400 break;
7401
7402 case 49:
7403 w[12] = w[12] | 0x8000;
7404 break;
7405
7406 case 50:
7407 w[12] = w[12] | 0x800000;
7408 break;
7409
7410 case 51:
7411 w[12] = w[12] | 0x80000000;
7412 break;
7413
7414 case 52:
7415 w[13] = 0x80;
7416 break;
7417
7418 case 53:
7419 w[13] = w[13] | 0x8000;
7420 break;
7421
7422 case 54:
7423 w[13] = w[13] | 0x800000;
7424 break;
7425
7426 case 55:
7427 w[13] = w[13] | 0x80000000;
7428 break;
7429
7430 case 56:
7431 w[14] = 0x80;
7432 break;
7433
7434 case 57:
7435 w[14] = w[14] | 0x8000;
7436 break;
7437
7438 case 58:
7439 w[14] = w[14] | 0x800000;
7440 break;
7441
7442 case 59:
7443 w[14] = w[14] | 0x80000000;
7444 break;
7445
7446 case 60:
7447 w[15] = 0x80;
7448 break;
7449
7450 case 61:
7451 w[15] = w[15] | 0x8000;
7452 break;
7453
7454 case 62:
7455 w[15] = w[15] | 0x800000;
7456 break;
7457
7458 case 63:
7459 w[15] = w[15] | 0x80000000;
7460 break;
7461 }
7462 }
7463
7464 // before: append_0x80_8
7465 static void append_0x80_1x32 (u32 w[32], const u32 offset)
7466 {
7467 switch (offset)
7468 {
7469 case 0:
7470 w[ 0] = 0x80;
7471 break;
7472
7473 case 1:
7474 w[ 0] = w[ 0] | 0x8000;
7475 break;
7476
7477 case 2:
7478 w[ 0] = w[ 0] | 0x800000;
7479 break;
7480
7481 case 3:
7482 w[ 0] = w[ 0] | 0x80000000;
7483 break;
7484
7485 case 4:
7486 w[ 1] = 0x80;
7487 break;
7488
7489 case 5:
7490 w[ 1] = w[ 1] | 0x8000;
7491 break;
7492
7493 case 6:
7494 w[ 1] = w[ 1] | 0x800000;
7495 break;
7496
7497 case 7:
7498 w[ 1] = w[ 1] | 0x80000000;
7499 break;
7500
7501 case 8:
7502 w[ 2] = 0x80;
7503 break;
7504
7505 case 9:
7506 w[ 2] = w[ 2] | 0x8000;
7507 break;
7508
7509 case 10:
7510 w[ 2] = w[ 2] | 0x800000;
7511 break;
7512
7513 case 11:
7514 w[ 2] = w[ 2] | 0x80000000;
7515 break;
7516
7517 case 12:
7518 w[ 3] = 0x80;
7519 break;
7520
7521 case 13:
7522 w[ 3] = w[ 3] | 0x8000;
7523 break;
7524
7525 case 14:
7526 w[ 3] = w[ 3] | 0x800000;
7527 break;
7528
7529 case 15:
7530 w[ 3] = w[ 3] | 0x80000000;
7531 break;
7532
7533 case 16:
7534 w[ 4] = 0x80;
7535 break;
7536
7537 case 17:
7538 w[ 4] = w[ 4] | 0x8000;
7539 break;
7540
7541 case 18:
7542 w[ 4] = w[ 4] | 0x800000;
7543 break;
7544
7545 case 19:
7546 w[ 4] = w[ 4] | 0x80000000;
7547 break;
7548
7549 case 20:
7550 w[ 5] = 0x80;
7551 break;
7552
7553 case 21:
7554 w[ 5] = w[ 5] | 0x8000;
7555 break;
7556
7557 case 22:
7558 w[ 5] = w[ 5] | 0x800000;
7559 break;
7560
7561 case 23:
7562 w[ 5] = w[ 5] | 0x80000000;
7563 break;
7564
7565 case 24:
7566 w[ 6] = 0x80;
7567 break;
7568
7569 case 25:
7570 w[ 6] = w[ 6] | 0x8000;
7571 break;
7572
7573 case 26:
7574 w[ 6] = w[ 6] | 0x800000;
7575 break;
7576
7577 case 27:
7578 w[ 6] = w[ 6] | 0x80000000;
7579 break;
7580
7581 case 28:
7582 w[ 7] = 0x80;
7583 break;
7584
7585 case 29:
7586 w[ 7] = w[ 7] | 0x8000;
7587 break;
7588
7589 case 30:
7590 w[ 7] = w[ 7] | 0x800000;
7591 break;
7592
7593 case 31:
7594 w[ 7] = w[ 7] | 0x80000000;
7595 break;
7596
7597 case 32:
7598 w[ 8] = 0x80;
7599 break;
7600
7601 case 33:
7602 w[ 8] = w[ 8] | 0x8000;
7603 break;
7604
7605 case 34:
7606 w[ 8] = w[ 8] | 0x800000;
7607 break;
7608
7609 case 35:
7610 w[ 8] = w[ 8] | 0x80000000;
7611 break;
7612
7613 case 36:
7614 w[ 9] = 0x80;
7615 break;
7616
7617 case 37:
7618 w[ 9] = w[ 9] | 0x8000;
7619 break;
7620
7621 case 38:
7622 w[ 9] = w[ 9] | 0x800000;
7623 break;
7624
7625 case 39:
7626 w[ 9] = w[ 9] | 0x80000000;
7627 break;
7628
7629 case 40:
7630 w[10] = 0x80;
7631 break;
7632
7633 case 41:
7634 w[10] = w[10] | 0x8000;
7635 break;
7636
7637 case 42:
7638 w[10] = w[10] | 0x800000;
7639 break;
7640
7641 case 43:
7642 w[10] = w[10] | 0x80000000;
7643 break;
7644
7645 case 44:
7646 w[11] = 0x80;
7647 break;
7648
7649 case 45:
7650 w[11] = w[11] | 0x8000;
7651 break;
7652
7653 case 46:
7654 w[11] = w[11] | 0x800000;
7655 break;
7656
7657 case 47:
7658 w[11] = w[11] | 0x80000000;
7659 break;
7660
7661 case 48:
7662 w[12] = 0x80;
7663 break;
7664
7665 case 49:
7666 w[12] = w[12] | 0x8000;
7667 break;
7668
7669 case 50:
7670 w[12] = w[12] | 0x800000;
7671 break;
7672
7673 case 51:
7674 w[12] = w[12] | 0x80000000;
7675 break;
7676
7677 case 52:
7678 w[13] = 0x80;
7679 break;
7680
7681 case 53:
7682 w[13] = w[13] | 0x8000;
7683 break;
7684
7685 case 54:
7686 w[13] = w[13] | 0x800000;
7687 break;
7688
7689 case 55:
7690 w[13] = w[13] | 0x80000000;
7691 break;
7692
7693 case 56:
7694 w[14] = 0x80;
7695 break;
7696
7697 case 57:
7698 w[14] = w[14] | 0x8000;
7699 break;
7700
7701 case 58:
7702 w[14] = w[14] | 0x800000;
7703 break;
7704
7705 case 59:
7706 w[14] = w[14] | 0x80000000;
7707 break;
7708
7709 case 60:
7710 w[15] = 0x80;
7711 break;
7712
7713 case 61:
7714 w[15] = w[15] | 0x8000;
7715 break;
7716
7717 case 62:
7718 w[15] = w[15] | 0x800000;
7719 break;
7720
7721 case 63:
7722 w[15] = w[15] | 0x80000000;
7723 break;
7724
7725 case 64:
7726 w[16] = 0x80;
7727 break;
7728
7729 case 65:
7730 w[16] = w[16] | 0x8000;
7731 break;
7732
7733 case 66:
7734 w[16] = w[16] | 0x800000;
7735 break;
7736
7737 case 67:
7738 w[16] = w[16] | 0x80000000;
7739 break;
7740
7741 case 68:
7742 w[17] = 0x80;
7743 break;
7744
7745 case 69:
7746 w[17] = w[17] | 0x8000;
7747 break;
7748
7749 case 70:
7750 w[17] = w[17] | 0x800000;
7751 break;
7752
7753 case 71:
7754 w[17] = w[17] | 0x80000000;
7755 break;
7756
7757 case 72:
7758 w[18] = 0x80;
7759 break;
7760
7761 case 73:
7762 w[18] = w[18] | 0x8000;
7763 break;
7764
7765 case 74:
7766 w[18] = w[18] | 0x800000;
7767 break;
7768
7769 case 75:
7770 w[18] = w[18] | 0x80000000;
7771 break;
7772
7773 case 76:
7774 w[19] = 0x80;
7775 break;
7776
7777 case 77:
7778 w[19] = w[19] | 0x8000;
7779 break;
7780
7781 case 78:
7782 w[19] = w[19] | 0x800000;
7783 break;
7784
7785 case 79:
7786 w[19] = w[19] | 0x80000000;
7787 break;
7788
7789 case 80:
7790 w[20] = 0x80;
7791 break;
7792
7793 case 81:
7794 w[20] = w[20] | 0x8000;
7795 break;
7796
7797 case 82:
7798 w[20] = w[20] | 0x800000;
7799 break;
7800
7801 case 83:
7802 w[20] = w[20] | 0x80000000;
7803 break;
7804
7805 case 84:
7806 w[21] = 0x80;
7807 break;
7808
7809 case 85:
7810 w[21] = w[21] | 0x8000;
7811 break;
7812
7813 case 86:
7814 w[21] = w[21] | 0x800000;
7815 break;
7816
7817 case 87:
7818 w[21] = w[21] | 0x80000000;
7819 break;
7820
7821 case 88:
7822 w[22] = 0x80;
7823 break;
7824
7825 case 89:
7826 w[22] = w[22] | 0x8000;
7827 break;
7828
7829 case 90:
7830 w[22] = w[22] | 0x800000;
7831 break;
7832
7833 case 91:
7834 w[22] = w[22] | 0x80000000;
7835 break;
7836
7837 case 92:
7838 w[23] = 0x80;
7839 break;
7840
7841 case 93:
7842 w[23] = w[23] | 0x8000;
7843 break;
7844
7845 case 94:
7846 w[23] = w[23] | 0x800000;
7847 break;
7848
7849 case 95:
7850 w[23] = w[23] | 0x80000000;
7851 break;
7852
7853 case 96:
7854 w[24] = 0x80;
7855 break;
7856
7857 case 97:
7858 w[24] = w[24] | 0x8000;
7859 break;
7860
7861 case 98:
7862 w[24] = w[24] | 0x800000;
7863 break;
7864
7865 case 99:
7866 w[24] = w[24] | 0x80000000;
7867 break;
7868
7869 case 100:
7870 w[25] = 0x80;
7871 break;
7872
7873 case 101:
7874 w[25] = w[25] | 0x8000;
7875 break;
7876
7877 case 102:
7878 w[25] = w[25] | 0x800000;
7879 break;
7880
7881 case 103:
7882 w[25] = w[25] | 0x80000000;
7883 break;
7884
7885 case 104:
7886 w[26] = 0x80;
7887 break;
7888
7889 case 105:
7890 w[26] = w[26] | 0x8000;
7891 break;
7892
7893 case 106:
7894 w[26] = w[26] | 0x800000;
7895 break;
7896
7897 case 107:
7898 w[26] = w[26] | 0x80000000;
7899 break;
7900
7901 case 108:
7902 w[27] = 0x80;
7903 break;
7904
7905 case 109:
7906 w[27] = w[27] | 0x8000;
7907 break;
7908
7909 case 110:
7910 w[27] = w[27] | 0x800000;
7911 break;
7912
7913 case 111:
7914 w[27] = w[27] | 0x80000000;
7915 break;
7916
7917 case 112:
7918 w[28] = 0x80;
7919 break;
7920
7921 case 113:
7922 w[28] = w[28] | 0x8000;
7923 break;
7924
7925 case 114:
7926 w[28] = w[28] | 0x800000;
7927 break;
7928
7929 case 115:
7930 w[28] = w[28] | 0x80000000;
7931 break;
7932
7933 case 116:
7934 w[29] = 0x80;
7935 break;
7936
7937 case 117:
7938 w[29] = w[29] | 0x8000;
7939 break;
7940
7941 case 118:
7942 w[29] = w[29] | 0x800000;
7943 break;
7944
7945 case 119:
7946 w[29] = w[29] | 0x80000000;
7947 break;
7948
7949 case 120:
7950 w[30] = 0x80;
7951 break;
7952
7953 case 121:
7954 w[30] = w[30] | 0x8000;
7955 break;
7956
7957 case 122:
7958 w[30] = w[30] | 0x800000;
7959 break;
7960
7961 case 123:
7962 w[30] = w[30] | 0x80000000;
7963 break;
7964
7965 case 124:
7966 w[31] = 0x80;
7967 break;
7968
7969 case 125:
7970 w[31] = w[31] | 0x8000;
7971 break;
7972
7973 case 126:
7974 w[31] = w[31] | 0x800000;
7975 break;
7976
7977 case 127:
7978 w[31] = w[31] | 0x80000000;
7979 break;
7980 }
7981 }
7982 */