2 * Author......: Jens Steube <jens.steube@gmail.com>
6 static int device_memcmp (const u32 d1
[4], __global u32
*d2
)
8 if (d1
[3] > d2
[DGST_R3
]) return ( 1);
9 if (d1
[3] < d2
[DGST_R3
]) return (-1);
10 if (d1
[2] > d2
[DGST_R2
]) return ( 1);
11 if (d1
[2] < d2
[DGST_R2
]) return (-1);
12 if (d1
[1] > d2
[DGST_R1
]) return ( 1);
13 if (d1
[1] < d2
[DGST_R1
]) return (-1);
14 if (d1
[0] > d2
[DGST_R0
]) return ( 1);
15 if (d1
[0] < d2
[DGST_R0
]) return (-1);
20 static int find_hash (const u32 digest
[4], const u32 digests_cnt
, __global digest_t
*digests_buf
)
22 for (u32 l
= 0, r
= digests_cnt
; r
; r
>>= 1)
28 const int cmp
= device_memcmp (digest
, digests_buf
[c
].digest_buf
);
37 if (cmp
== 0) return (c
);
43 static u32
check_bitmap (__global u32
*bitmap
, const u32 bitmap_mask
, const u32 bitmap_shift
, const u32 digest
)
45 return (bitmap
[(digest
>> bitmap_shift
) & bitmap_mask
] & (1 << (digest
& 0x1f)));
48 static u32
check (const u32 digest
[2], __global u32
*bitmap_s1_a
, __global u32
*bitmap_s1_b
, __global u32
*bitmap_s1_c
, __global u32
*bitmap_s1_d
, __global u32
*bitmap_s2_a
, __global u32
*bitmap_s2_b
, __global u32
*bitmap_s2_c
, __global u32
*bitmap_s2_d
, const u32 bitmap_mask
, const u32 bitmap_shift1
, const u32 bitmap_shift2
)
50 if (check_bitmap (bitmap_s1_a
, bitmap_mask
, bitmap_shift1
, digest
[0]) == 0) return (0);
51 if (check_bitmap (bitmap_s1_b
, bitmap_mask
, bitmap_shift1
, digest
[1]) == 0) return (0);
52 if (check_bitmap (bitmap_s1_c
, bitmap_mask
, bitmap_shift1
, digest
[2]) == 0) return (0);
53 if (check_bitmap (bitmap_s1_d
, bitmap_mask
, bitmap_shift1
, digest
[3]) == 0) return (0);
55 if (check_bitmap (bitmap_s2_a
, bitmap_mask
, bitmap_shift2
, digest
[0]) == 0) return (0);
56 if (check_bitmap (bitmap_s2_b
, bitmap_mask
, bitmap_shift2
, digest
[1]) == 0) return (0);
57 if (check_bitmap (bitmap_s2_c
, bitmap_mask
, bitmap_shift2
, digest
[2]) == 0) return (0);
58 if (check_bitmap (bitmap_s2_d
, bitmap_mask
, bitmap_shift2
, digest
[3]) == 0) return (0);
64 static void mark_hash_s0 (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
66 hashes_shown
[hash_pos
] = 1;
68 plains_buf
[hash_pos
].gidvid
= (gid
* 1) + 0;
69 plains_buf
[hash_pos
].il_pos
= il_pos
;
72 static void mark_hash_s0_warp (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
74 hashes_shown
[hash_pos
] = 1;
76 plains_buf
[hash_pos
].gidvid
= gid
;
77 plains_buf
[hash_pos
].il_pos
= (il_pos
* 1) + 0;
82 static void mark_hash_s0 (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
84 hashes_shown
[hash_pos
] = 1;
86 plains_buf
[hash_pos
].gidvid
= (gid
* 2) + 0;
87 plains_buf
[hash_pos
].il_pos
= il_pos
;
90 static void mark_hash_s1 (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
92 hashes_shown
[hash_pos
] = 1;
94 plains_buf
[hash_pos
].gidvid
= (gid
* 2) + 1;
95 plains_buf
[hash_pos
].il_pos
= il_pos
;
98 static void mark_hash_s0_warp (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
100 hashes_shown
[hash_pos
] = 1;
102 plains_buf
[hash_pos
].gidvid
= gid
;
103 plains_buf
[hash_pos
].il_pos
= (il_pos
* 2) + 0;
106 static void mark_hash_s1_warp (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
108 hashes_shown
[hash_pos
] = 1;
110 plains_buf
[hash_pos
].gidvid
= gid
;
111 plains_buf
[hash_pos
].il_pos
= (il_pos
* 2) + 1;
116 static void mark_hash_s0 (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
118 hashes_shown
[hash_pos
] = 1;
120 plains_buf
[hash_pos
].gidvid
= (gid
* 4) + 0;
121 plains_buf
[hash_pos
].il_pos
= il_pos
;
124 static void mark_hash_s1 (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
126 hashes_shown
[hash_pos
] = 1;
128 plains_buf
[hash_pos
].gidvid
= (gid
* 4) + 1;
129 plains_buf
[hash_pos
].il_pos
= il_pos
;
132 static void mark_hash_s2 (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
134 hashes_shown
[hash_pos
] = 1;
136 plains_buf
[hash_pos
].gidvid
= (gid
* 4) + 2;
137 plains_buf
[hash_pos
].il_pos
= il_pos
;
140 static void mark_hash_s3 (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
142 hashes_shown
[hash_pos
] = 1;
144 plains_buf
[hash_pos
].gidvid
= (gid
* 4) + 3;
145 plains_buf
[hash_pos
].il_pos
= il_pos
;
148 static void mark_hash_s0_warp (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
150 hashes_shown
[hash_pos
] = 1;
152 plains_buf
[hash_pos
].gidvid
= gid
;
153 plains_buf
[hash_pos
].il_pos
= (il_pos
* 4) + 0;
156 static void mark_hash_s1_warp (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
158 hashes_shown
[hash_pos
] = 1;
160 plains_buf
[hash_pos
].gidvid
= gid
;
161 plains_buf
[hash_pos
].il_pos
= (il_pos
* 4) + 1;
164 static void mark_hash_s2_warp (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
166 hashes_shown
[hash_pos
] = 1;
168 plains_buf
[hash_pos
].gidvid
= gid
;
169 plains_buf
[hash_pos
].il_pos
= (il_pos
* 4) + 2;
172 static void mark_hash_s3_warp (__global plain_t
*plains_buf
, __global u32
*hashes_shown
, const int hash_pos
, const u32 gid
, const u32 il_pos
)
174 hashes_shown
[hash_pos
] = 1;
176 plains_buf
[hash_pos
].gidvid
= gid
;
177 plains_buf
[hash_pos
].il_pos
= (il_pos
* 4) + 3;
185 static u32
swap_workaround (const u32 v
)
187 return (as_uint (as_uchar4 (v
).s3210
));
190 static u64
swap_workaround (const u64 v
)
192 return (as_ulong (as_uchar8 (v
).s76543210
));
195 static void truncate_block (u32 w
[4], const u32 len
)
204 case 1: w
[0] &= 0x000000FF;
209 case 2: w
[0] &= 0x0000FFFF;
214 case 3: w
[0] &= 0x00FFFFFF;
223 case 5: w
[1] &= 0x000000FF;
227 case 6: w
[1] &= 0x0000FFFF;
231 case 7: w
[1] &= 0x00FFFFFF;
238 case 9: w
[2] &= 0x000000FF;
241 case 10: w
[2] &= 0x0000FFFF;
244 case 11: w
[2] &= 0x00FFFFFF;
249 case 13: w
[3] &= 0x000000FF;
251 case 14: w
[3] &= 0x0000FFFF;
253 case 15: w
[3] &= 0x00FFFFFF;
258 static void make_unicode (const u32 in
[4], u32 out1
[4], u32 out2
[4])
260 out2
[3] = ((in
[3] >> 8) & 0x00FF0000) | ((in
[3] >> 16) & 0x000000FF);
261 out2
[2] = ((in
[3] << 8) & 0x00FF0000) | ((in
[3] >> 0) & 0x000000FF);
262 out2
[1] = ((in
[2] >> 8) & 0x00FF0000) | ((in
[2] >> 16) & 0x000000FF);
263 out2
[0] = ((in
[2] << 8) & 0x00FF0000) | ((in
[2] >> 0) & 0x000000FF);
264 out1
[3] = ((in
[1] >> 8) & 0x00FF0000) | ((in
[1] >> 16) & 0x000000FF);
265 out1
[2] = ((in
[1] << 8) & 0x00FF0000) | ((in
[1] >> 0) & 0x000000FF);
266 out1
[1] = ((in
[0] >> 8) & 0x00FF0000) | ((in
[0] >> 16) & 0x000000FF);
267 out1
[0] = ((in
[0] << 8) & 0x00FF0000) | ((in
[0] >> 0) & 0x000000FF);
270 static void undo_unicode (const u32 in1
[4], const u32 in2
[4], u32 out
[4])
272 out
[0] = ((in1
[0] & 0x000000ff) >> 0) | ((in1
[0] & 0x00ff0000) >> 8)
273 | ((in1
[1] & 0x000000ff) << 16) | ((in1
[1] & 0x00ff0000) << 8);
274 out
[1] = ((in1
[2] & 0x000000ff) >> 0) | ((in1
[2] & 0x00ff0000) >> 8)
275 | ((in1
[3] & 0x000000ff) << 16) | ((in1
[3] & 0x00ff0000) << 8);
276 out
[2] = ((in2
[0] & 0x000000ff) >> 0) | ((in2
[0] & 0x00ff0000) >> 8)
277 | ((in2
[1] & 0x000000ff) << 16) | ((in2
[1] & 0x00ff0000) << 8);
278 out
[3] = ((in2
[2] & 0x000000ff) >> 0) | ((in2
[2] & 0x00ff0000) >> 8)
279 | ((in2
[3] & 0x000000ff) << 16) | ((in2
[3] & 0x00ff0000) << 8);
282 static void append_0x01_1 (u32 w0
[4], const u32 offset
)
291 w0
[0] = w0
[0] | 0x0100;
295 w0
[0] = w0
[0] | 0x010000;
299 w0
[0] = w0
[0] | 0x01000000;
307 w0
[1] = w0
[1] | 0x0100;
311 w0
[1] = w0
[1] | 0x010000;
315 w0
[1] = w0
[1] | 0x01000000;
323 w0
[2] = w0
[2] | 0x0100;
327 w0
[2] = w0
[2] | 0x010000;
331 w0
[2] = w0
[2] | 0x01000000;
339 w0
[3] = w0
[3] | 0x0100;
343 w0
[3] = w0
[3] | 0x010000;
347 w0
[3] = w0
[3] | 0x01000000;
352 static void append_0x01_2 (u32 w0
[4], u32 w1
[4], const u32 offset
)
361 w0
[0] = w0
[0] | 0x0100;
365 w0
[0] = w0
[0] | 0x010000;
369 w0
[0] = w0
[0] | 0x01000000;
377 w0
[1] = w0
[1] | 0x0100;
381 w0
[1] = w0
[1] | 0x010000;
385 w0
[1] = w0
[1] | 0x01000000;
393 w0
[2] = w0
[2] | 0x0100;
397 w0
[2] = w0
[2] | 0x010000;
401 w0
[2] = w0
[2] | 0x01000000;
409 w0
[3] = w0
[3] | 0x0100;
413 w0
[3] = w0
[3] | 0x010000;
417 w0
[3] = w0
[3] | 0x01000000;
425 w1
[0] = w1
[0] | 0x0100;
429 w1
[0] = w1
[0] | 0x010000;
433 w1
[0] = w1
[0] | 0x01000000;
441 w1
[1] = w1
[1] | 0x0100;
445 w1
[1] = w1
[1] | 0x010000;
449 w1
[1] = w1
[1] | 0x01000000;
457 w1
[2] = w1
[2] | 0x0100;
461 w1
[2] = w1
[2] | 0x010000;
465 w1
[2] = w1
[2] | 0x01000000;
473 w1
[3] = w1
[3] | 0x0100;
477 w1
[3] = w1
[3] | 0x010000;
481 w1
[3] = w1
[3] | 0x01000000;
486 static void append_0x01_3 (u32 w0
[4], u32 w1
[4], u32 w2
[4], const u32 offset
)
495 w0
[0] = w0
[0] | 0x0100;
499 w0
[0] = w0
[0] | 0x010000;
503 w0
[0] = w0
[0] | 0x01000000;
511 w0
[1] = w0
[1] | 0x0100;
515 w0
[1] = w0
[1] | 0x010000;
519 w0
[1] = w0
[1] | 0x01000000;
527 w0
[2] = w0
[2] | 0x0100;
531 w0
[2] = w0
[2] | 0x010000;
535 w0
[2] = w0
[2] | 0x01000000;
543 w0
[3] = w0
[3] | 0x0100;
547 w0
[3] = w0
[3] | 0x010000;
551 w0
[3] = w0
[3] | 0x01000000;
559 w1
[0] = w1
[0] | 0x0100;
563 w1
[0] = w1
[0] | 0x010000;
567 w1
[0] = w1
[0] | 0x01000000;
575 w1
[1] = w1
[1] | 0x0100;
579 w1
[1] = w1
[1] | 0x010000;
583 w1
[1] = w1
[1] | 0x01000000;
591 w1
[2] = w1
[2] | 0x0100;
595 w1
[2] = w1
[2] | 0x010000;
599 w1
[2] = w1
[2] | 0x01000000;
607 w1
[3] = w1
[3] | 0x0100;
611 w1
[3] = w1
[3] | 0x010000;
615 w1
[3] = w1
[3] | 0x01000000;
623 w2
[0] = w2
[0] | 0x0100;
627 w2
[0] = w2
[0] | 0x010000;
631 w2
[0] = w2
[0] | 0x01000000;
639 w2
[1] = w2
[1] | 0x0100;
643 w2
[1] = w2
[1] | 0x010000;
647 w2
[1] = w2
[1] | 0x01000000;
655 w2
[2] = w2
[2] | 0x0100;
659 w2
[2] = w2
[2] | 0x010000;
663 w2
[2] = w2
[2] | 0x01000000;
671 w2
[3] = w2
[3] | 0x0100;
675 w2
[3] = w2
[3] | 0x010000;
679 w2
[3] = w2
[3] | 0x01000000;
684 static void append_0x01_4 (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], const u32 offset
)
693 w0
[0] = w0
[0] | 0x0100;
697 w0
[0] = w0
[0] | 0x010000;
701 w0
[0] = w0
[0] | 0x01000000;
709 w0
[1] = w0
[1] | 0x0100;
713 w0
[1] = w0
[1] | 0x010000;
717 w0
[1] = w0
[1] | 0x01000000;
725 w0
[2] = w0
[2] | 0x0100;
729 w0
[2] = w0
[2] | 0x010000;
733 w0
[2] = w0
[2] | 0x01000000;
741 w0
[3] = w0
[3] | 0x0100;
745 w0
[3] = w0
[3] | 0x010000;
749 w0
[3] = w0
[3] | 0x01000000;
757 w1
[0] = w1
[0] | 0x0100;
761 w1
[0] = w1
[0] | 0x010000;
765 w1
[0] = w1
[0] | 0x01000000;
773 w1
[1] = w1
[1] | 0x0100;
777 w1
[1] = w1
[1] | 0x010000;
781 w1
[1] = w1
[1] | 0x01000000;
789 w1
[2] = w1
[2] | 0x0100;
793 w1
[2] = w1
[2] | 0x010000;
797 w1
[2] = w1
[2] | 0x01000000;
805 w1
[3] = w1
[3] | 0x0100;
809 w1
[3] = w1
[3] | 0x010000;
813 w1
[3] = w1
[3] | 0x01000000;
821 w2
[0] = w2
[0] | 0x0100;
825 w2
[0] = w2
[0] | 0x010000;
829 w2
[0] = w2
[0] | 0x01000000;
837 w2
[1] = w2
[1] | 0x0100;
841 w2
[1] = w2
[1] | 0x010000;
845 w2
[1] = w2
[1] | 0x01000000;
853 w2
[2] = w2
[2] | 0x0100;
857 w2
[2] = w2
[2] | 0x010000;
861 w2
[2] = w2
[2] | 0x01000000;
869 w2
[3] = w2
[3] | 0x0100;
873 w2
[3] = w2
[3] | 0x010000;
877 w2
[3] = w2
[3] | 0x01000000;
885 w3
[0] = w3
[0] | 0x0100;
889 w3
[0] = w3
[0] | 0x010000;
893 w3
[0] = w3
[0] | 0x01000000;
901 w3
[1] = w3
[1] | 0x0100;
905 w3
[1] = w3
[1] | 0x010000;
909 w3
[1] = w3
[1] | 0x01000000;
917 w3
[2] = w3
[2] | 0x0100;
921 w3
[2] = w3
[2] | 0x010000;
925 w3
[2] = w3
[2] | 0x01000000;
933 w3
[3] = w3
[3] | 0x0100;
937 w3
[3] = w3
[3] | 0x010000;
941 w3
[3] = w3
[3] | 0x01000000;
946 static void append_0x01_8 (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], u32 w4
[4], u32 w5
[4], u32 w6
[4], u32 w7
[4], const u32 offset
)
955 w0
[0] = w0
[0] | 0x0100;
959 w0
[0] = w0
[0] | 0x010000;
963 w0
[0] = w0
[0] | 0x01000000;
971 w0
[1] = w0
[1] | 0x0100;
975 w0
[1] = w0
[1] | 0x010000;
979 w0
[1] = w0
[1] | 0x01000000;
987 w0
[2] = w0
[2] | 0x0100;
991 w0
[2] = w0
[2] | 0x010000;
995 w0
[2] = w0
[2] | 0x01000000;
1003 w0
[3] = w0
[3] | 0x0100;
1007 w0
[3] = w0
[3] | 0x010000;
1011 w0
[3] = w0
[3] | 0x01000000;
1019 w1
[0] = w1
[0] | 0x0100;
1023 w1
[0] = w1
[0] | 0x010000;
1027 w1
[0] = w1
[0] | 0x01000000;
1035 w1
[1] = w1
[1] | 0x0100;
1039 w1
[1] = w1
[1] | 0x010000;
1043 w1
[1] = w1
[1] | 0x01000000;
1051 w1
[2] = w1
[2] | 0x0100;
1055 w1
[2] = w1
[2] | 0x010000;
1059 w1
[2] = w1
[2] | 0x01000000;
1067 w1
[3] = w1
[3] | 0x0100;
1071 w1
[3] = w1
[3] | 0x010000;
1075 w1
[3] = w1
[3] | 0x01000000;
1083 w2
[0] = w2
[0] | 0x0100;
1087 w2
[0] = w2
[0] | 0x010000;
1091 w2
[0] = w2
[0] | 0x01000000;
1099 w2
[1] = w2
[1] | 0x0100;
1103 w2
[1] = w2
[1] | 0x010000;
1107 w2
[1] = w2
[1] | 0x01000000;
1115 w2
[2] = w2
[2] | 0x0100;
1119 w2
[2] = w2
[2] | 0x010000;
1123 w2
[2] = w2
[2] | 0x01000000;
1131 w2
[3] = w2
[3] | 0x0100;
1135 w2
[3] = w2
[3] | 0x010000;
1139 w2
[3] = w2
[3] | 0x01000000;
1147 w3
[0] = w3
[0] | 0x0100;
1151 w3
[0] = w3
[0] | 0x010000;
1155 w3
[0] = w3
[0] | 0x01000000;
1163 w3
[1] = w3
[1] | 0x0100;
1167 w3
[1] = w3
[1] | 0x010000;
1171 w3
[1] = w3
[1] | 0x01000000;
1179 w3
[2] = w3
[2] | 0x0100;
1183 w3
[2] = w3
[2] | 0x010000;
1187 w3
[2] = w3
[2] | 0x01000000;
1195 w3
[3] = w3
[3] | 0x0100;
1199 w3
[3] = w3
[3] | 0x010000;
1203 w3
[3] = w3
[3] | 0x01000000;
1211 w4
[0] = w4
[0] | 0x0100;
1215 w4
[0] = w4
[0] | 0x010000;
1219 w4
[0] = w4
[0] | 0x01000000;
1227 w4
[1] = w4
[1] | 0x0100;
1231 w4
[1] = w4
[1] | 0x010000;
1235 w4
[1] = w4
[1] | 0x01000000;
1243 w4
[2] = w4
[2] | 0x0100;
1247 w4
[2] = w4
[2] | 0x010000;
1251 w4
[2] = w4
[2] | 0x01000000;
1259 w4
[3] = w4
[3] | 0x0100;
1263 w4
[3] = w4
[3] | 0x010000;
1267 w4
[3] = w4
[3] | 0x01000000;
1275 w5
[0] = w5
[0] | 0x0100;
1279 w5
[0] = w5
[0] | 0x010000;
1283 w5
[0] = w5
[0] | 0x01000000;
1291 w5
[1] = w5
[1] | 0x0100;
1295 w5
[1] = w5
[1] | 0x010000;
1299 w5
[1] = w5
[1] | 0x01000000;
1307 w5
[2] = w5
[2] | 0x0100;
1311 w5
[2] = w5
[2] | 0x010000;
1315 w5
[2] = w5
[2] | 0x01000000;
1323 w5
[3] = w5
[3] | 0x0100;
1327 w5
[3] = w5
[3] | 0x010000;
1331 w5
[3] = w5
[3] | 0x01000000;
1339 w6
[0] = w6
[0] | 0x0100;
1343 w6
[0] = w6
[0] | 0x010000;
1347 w6
[0] = w6
[0] | 0x01000000;
1355 w6
[1] = w6
[1] | 0x0100;
1359 w6
[1] = w6
[1] | 0x010000;
1363 w6
[1] = w6
[1] | 0x01000000;
1371 w6
[2] = w6
[2] | 0x0100;
1375 w6
[2] = w6
[2] | 0x010000;
1379 w6
[2] = w6
[2] | 0x01000000;
1387 w6
[3] = w6
[3] | 0x0100;
1391 w6
[3] = w6
[3] | 0x010000;
1395 w6
[3] = w6
[3] | 0x01000000;
1403 w7
[0] = w7
[0] | 0x0100;
1407 w7
[0] = w7
[0] | 0x010000;
1411 w7
[0] = w7
[0] | 0x01000000;
1419 w7
[1] = w7
[1] | 0x0100;
1423 w7
[1] = w7
[1] | 0x010000;
1427 w7
[1] = w7
[1] | 0x01000000;
1435 w7
[2] = w7
[2] | 0x0100;
1439 w7
[2] = w7
[2] | 0x010000;
1443 w7
[2] = w7
[2] | 0x01000000;
1451 w7
[3] = w7
[3] | 0x0100;
1455 w7
[3] = w7
[3] | 0x010000;
1459 w7
[3] = w7
[3] | 0x01000000;
1464 static void append_0x02_1 (u32 w0
[4], const u32 offset
)
1473 w0
[0] = w0
[0] | 0x0200;
1477 w0
[0] = w0
[0] | 0x020000;
1481 w0
[0] = w0
[0] | 0x02000000;
1489 w0
[1] = w0
[1] | 0x0200;
1493 w0
[1] = w0
[1] | 0x020000;
1497 w0
[1] = w0
[1] | 0x02000000;
1505 w0
[2] = w0
[2] | 0x0200;
1509 w0
[2] = w0
[2] | 0x020000;
1513 w0
[2] = w0
[2] | 0x02000000;
1521 w0
[3] = w0
[3] | 0x0200;
1525 w0
[3] = w0
[3] | 0x020000;
1529 w0
[3] = w0
[3] | 0x02000000;
1534 static void append_0x02_2 (u32 w0
[4], u32 w1
[4], const u32 offset
)
1543 w0
[0] = w0
[0] | 0x0200;
1547 w0
[0] = w0
[0] | 0x020000;
1551 w0
[0] = w0
[0] | 0x02000000;
1559 w0
[1] = w0
[1] | 0x0200;
1563 w0
[1] = w0
[1] | 0x020000;
1567 w0
[1] = w0
[1] | 0x02000000;
1575 w0
[2] = w0
[2] | 0x0200;
1579 w0
[2] = w0
[2] | 0x020000;
1583 w0
[2] = w0
[2] | 0x02000000;
1591 w0
[3] = w0
[3] | 0x0200;
1595 w0
[3] = w0
[3] | 0x020000;
1599 w0
[3] = w0
[3] | 0x02000000;
1607 w1
[0] = w1
[0] | 0x0200;
1611 w1
[0] = w1
[0] | 0x020000;
1615 w1
[0] = w1
[0] | 0x02000000;
1623 w1
[1] = w1
[1] | 0x0200;
1627 w1
[1] = w1
[1] | 0x020000;
1631 w1
[1] = w1
[1] | 0x02000000;
1639 w1
[2] = w1
[2] | 0x0200;
1643 w1
[2] = w1
[2] | 0x020000;
1647 w1
[2] = w1
[2] | 0x02000000;
1655 w1
[3] = w1
[3] | 0x0200;
1659 w1
[3] = w1
[3] | 0x020000;
1663 w1
[3] = w1
[3] | 0x02000000;
1668 static void append_0x02_3 (u32 w0
[4], u32 w1
[4], u32 w2
[4], const u32 offset
)
1677 w0
[0] = w0
[0] | 0x0200;
1681 w0
[0] = w0
[0] | 0x020000;
1685 w0
[0] = w0
[0] | 0x02000000;
1693 w0
[1] = w0
[1] | 0x0200;
1697 w0
[1] = w0
[1] | 0x020000;
1701 w0
[1] = w0
[1] | 0x02000000;
1709 w0
[2] = w0
[2] | 0x0200;
1713 w0
[2] = w0
[2] | 0x020000;
1717 w0
[2] = w0
[2] | 0x02000000;
1725 w0
[3] = w0
[3] | 0x0200;
1729 w0
[3] = w0
[3] | 0x020000;
1733 w0
[3] = w0
[3] | 0x02000000;
1741 w1
[0] = w1
[0] | 0x0200;
1745 w1
[0] = w1
[0] | 0x020000;
1749 w1
[0] = w1
[0] | 0x02000000;
1757 w1
[1] = w1
[1] | 0x0200;
1761 w1
[1] = w1
[1] | 0x020000;
1765 w1
[1] = w1
[1] | 0x02000000;
1773 w1
[2] = w1
[2] | 0x0200;
1777 w1
[2] = w1
[2] | 0x020000;
1781 w1
[2] = w1
[2] | 0x02000000;
1789 w1
[3] = w1
[3] | 0x0200;
1793 w1
[3] = w1
[3] | 0x020000;
1797 w1
[3] = w1
[3] | 0x02000000;
1805 w2
[0] = w2
[0] | 0x0200;
1809 w2
[0] = w2
[0] | 0x020000;
1813 w2
[0] = w2
[0] | 0x02000000;
1821 w2
[1] = w2
[1] | 0x0200;
1825 w2
[1] = w2
[1] | 0x020000;
1829 w2
[1] = w2
[1] | 0x02000000;
1837 w2
[2] = w2
[2] | 0x0200;
1841 w2
[2] = w2
[2] | 0x020000;
1845 w2
[2] = w2
[2] | 0x02000000;
1853 w2
[3] = w2
[3] | 0x0200;
1857 w2
[3] = w2
[3] | 0x020000;
1861 w2
[3] = w2
[3] | 0x02000000;
1866 static void append_0x02_4 (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], const u32 offset
)
1875 w0
[0] = w0
[0] | 0x0200;
1879 w0
[0] = w0
[0] | 0x020000;
1883 w0
[0] = w0
[0] | 0x02000000;
1891 w0
[1] = w0
[1] | 0x0200;
1895 w0
[1] = w0
[1] | 0x020000;
1899 w0
[1] = w0
[1] | 0x02000000;
1907 w0
[2] = w0
[2] | 0x0200;
1911 w0
[2] = w0
[2] | 0x020000;
1915 w0
[2] = w0
[2] | 0x02000000;
1923 w0
[3] = w0
[3] | 0x0200;
1927 w0
[3] = w0
[3] | 0x020000;
1931 w0
[3] = w0
[3] | 0x02000000;
1939 w1
[0] = w1
[0] | 0x0200;
1943 w1
[0] = w1
[0] | 0x020000;
1947 w1
[0] = w1
[0] | 0x02000000;
1955 w1
[1] = w1
[1] | 0x0200;
1959 w1
[1] = w1
[1] | 0x020000;
1963 w1
[1] = w1
[1] | 0x02000000;
1971 w1
[2] = w1
[2] | 0x0200;
1975 w1
[2] = w1
[2] | 0x020000;
1979 w1
[2] = w1
[2] | 0x02000000;
1987 w1
[3] = w1
[3] | 0x0200;
1991 w1
[3] = w1
[3] | 0x020000;
1995 w1
[3] = w1
[3] | 0x02000000;
2003 w2
[0] = w2
[0] | 0x0200;
2007 w2
[0] = w2
[0] | 0x020000;
2011 w2
[0] = w2
[0] | 0x02000000;
2019 w2
[1] = w2
[1] | 0x0200;
2023 w2
[1] = w2
[1] | 0x020000;
2027 w2
[1] = w2
[1] | 0x02000000;
2035 w2
[2] = w2
[2] | 0x0200;
2039 w2
[2] = w2
[2] | 0x020000;
2043 w2
[2] = w2
[2] | 0x02000000;
2051 w2
[3] = w2
[3] | 0x0200;
2055 w2
[3] = w2
[3] | 0x020000;
2059 w2
[3] = w2
[3] | 0x02000000;
2067 w3
[0] = w3
[0] | 0x0200;
2071 w3
[0] = w3
[0] | 0x020000;
2075 w3
[0] = w3
[0] | 0x02000000;
2083 w3
[1] = w3
[1] | 0x0200;
2087 w3
[1] = w3
[1] | 0x020000;
2091 w3
[1] = w3
[1] | 0x02000000;
2099 w3
[2] = w3
[2] | 0x0200;
2103 w3
[2] = w3
[2] | 0x020000;
2107 w3
[2] = w3
[2] | 0x02000000;
2115 w3
[3] = w3
[3] | 0x0200;
2119 w3
[3] = w3
[3] | 0x020000;
2123 w3
[3] = w3
[3] | 0x02000000;
2128 static void append_0x02_8 (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], u32 w4
[4], u32 w5
[4], u32 w6
[4], u32 w7
[4], const u32 offset
)
2137 w0
[0] = w0
[0] | 0x0200;
2141 w0
[0] = w0
[0] | 0x020000;
2145 w0
[0] = w0
[0] | 0x02000000;
2153 w0
[1] = w0
[1] | 0x0200;
2157 w0
[1] = w0
[1] | 0x020000;
2161 w0
[1] = w0
[1] | 0x02000000;
2169 w0
[2] = w0
[2] | 0x0200;
2173 w0
[2] = w0
[2] | 0x020000;
2177 w0
[2] = w0
[2] | 0x02000000;
2185 w0
[3] = w0
[3] | 0x0200;
2189 w0
[3] = w0
[3] | 0x020000;
2193 w0
[3] = w0
[3] | 0x02000000;
2201 w1
[0] = w1
[0] | 0x0200;
2205 w1
[0] = w1
[0] | 0x020000;
2209 w1
[0] = w1
[0] | 0x02000000;
2217 w1
[1] = w1
[1] | 0x0200;
2221 w1
[1] = w1
[1] | 0x020000;
2225 w1
[1] = w1
[1] | 0x02000000;
2233 w1
[2] = w1
[2] | 0x0200;
2237 w1
[2] = w1
[2] | 0x020000;
2241 w1
[2] = w1
[2] | 0x02000000;
2249 w1
[3] = w1
[3] | 0x0200;
2253 w1
[3] = w1
[3] | 0x020000;
2257 w1
[3] = w1
[3] | 0x02000000;
2265 w2
[0] = w2
[0] | 0x0200;
2269 w2
[0] = w2
[0] | 0x020000;
2273 w2
[0] = w2
[0] | 0x02000000;
2281 w2
[1] = w2
[1] | 0x0200;
2285 w2
[1] = w2
[1] | 0x020000;
2289 w2
[1] = w2
[1] | 0x02000000;
2297 w2
[2] = w2
[2] | 0x0200;
2301 w2
[2] = w2
[2] | 0x020000;
2305 w2
[2] = w2
[2] | 0x02000000;
2313 w2
[3] = w2
[3] | 0x0200;
2317 w2
[3] = w2
[3] | 0x020000;
2321 w2
[3] = w2
[3] | 0x02000000;
2329 w3
[0] = w3
[0] | 0x0200;
2333 w3
[0] = w3
[0] | 0x020000;
2337 w3
[0] = w3
[0] | 0x02000000;
2345 w3
[1] = w3
[1] | 0x0200;
2349 w3
[1] = w3
[1] | 0x020000;
2353 w3
[1] = w3
[1] | 0x02000000;
2361 w3
[2] = w3
[2] | 0x0200;
2365 w3
[2] = w3
[2] | 0x020000;
2369 w3
[2] = w3
[2] | 0x02000000;
2377 w3
[3] = w3
[3] | 0x0200;
2381 w3
[3] = w3
[3] | 0x020000;
2385 w3
[3] = w3
[3] | 0x02000000;
2393 w4
[0] = w4
[0] | 0x0200;
2397 w4
[0] = w4
[0] | 0x020000;
2401 w4
[0] = w4
[0] | 0x02000000;
2409 w4
[1] = w4
[1] | 0x0200;
2413 w4
[1] = w4
[1] | 0x020000;
2417 w4
[1] = w4
[1] | 0x02000000;
2425 w4
[2] = w4
[2] | 0x0200;
2429 w4
[2] = w4
[2] | 0x020000;
2433 w4
[2] = w4
[2] | 0x02000000;
2441 w4
[3] = w4
[3] | 0x0200;
2445 w4
[3] = w4
[3] | 0x020000;
2449 w4
[3] = w4
[3] | 0x02000000;
2457 w5
[0] = w5
[0] | 0x0200;
2461 w5
[0] = w5
[0] | 0x020000;
2465 w5
[0] = w5
[0] | 0x02000000;
2473 w5
[1] = w5
[1] | 0x0200;
2477 w5
[1] = w5
[1] | 0x020000;
2481 w5
[1] = w5
[1] | 0x02000000;
2489 w5
[2] = w5
[2] | 0x0200;
2493 w5
[2] = w5
[2] | 0x020000;
2497 w5
[2] = w5
[2] | 0x02000000;
2505 w5
[3] = w5
[3] | 0x0200;
2509 w5
[3] = w5
[3] | 0x020000;
2513 w5
[3] = w5
[3] | 0x02000000;
2521 w6
[0] = w6
[0] | 0x0200;
2525 w6
[0] = w6
[0] | 0x020000;
2529 w6
[0] = w6
[0] | 0x02000000;
2537 w6
[1] = w6
[1] | 0x0200;
2541 w6
[1] = w6
[1] | 0x020000;
2545 w6
[1] = w6
[1] | 0x02000000;
2553 w6
[2] = w6
[2] | 0x0200;
2557 w6
[2] = w6
[2] | 0x020000;
2561 w6
[2] = w6
[2] | 0x02000000;
2569 w6
[3] = w6
[3] | 0x0200;
2573 w6
[3] = w6
[3] | 0x020000;
2577 w6
[3] = w6
[3] | 0x02000000;
2585 w7
[0] = w7
[0] | 0x0200;
2589 w7
[0] = w7
[0] | 0x020000;
2593 w7
[0] = w7
[0] | 0x02000000;
2601 w7
[1] = w7
[1] | 0x0200;
2605 w7
[1] = w7
[1] | 0x020000;
2609 w7
[1] = w7
[1] | 0x02000000;
2617 w7
[2] = w7
[2] | 0x0200;
2621 w7
[2] = w7
[2] | 0x020000;
2625 w7
[2] = w7
[2] | 0x02000000;
2633 w7
[3] = w7
[3] | 0x0200;
2637 w7
[3] = w7
[3] | 0x020000;
2641 w7
[3] = w7
[3] | 0x02000000;
2646 static void append_0x80_1 (u32 w0
[4], const u32 offset
)
2655 w0
[0] = w0
[0] | 0x8000;
2659 w0
[0] = w0
[0] | 0x800000;
2663 w0
[0] = w0
[0] | 0x80000000;
2671 w0
[1] = w0
[1] | 0x8000;
2675 w0
[1] = w0
[1] | 0x800000;
2679 w0
[1] = w0
[1] | 0x80000000;
2687 w0
[2] = w0
[2] | 0x8000;
2691 w0
[2] = w0
[2] | 0x800000;
2695 w0
[2] = w0
[2] | 0x80000000;
2703 w0
[3] = w0
[3] | 0x8000;
2707 w0
[3] = w0
[3] | 0x800000;
2711 w0
[3] = w0
[3] | 0x80000000;
2716 static void append_0x80_2 (u32 w0
[4], u32 w1
[4], const u32 offset
)
2725 w0
[0] = w0
[0] | 0x8000;
2729 w0
[0] = w0
[0] | 0x800000;
2733 w0
[0] = w0
[0] | 0x80000000;
2741 w0
[1] = w0
[1] | 0x8000;
2745 w0
[1] = w0
[1] | 0x800000;
2749 w0
[1] = w0
[1] | 0x80000000;
2757 w0
[2] = w0
[2] | 0x8000;
2761 w0
[2] = w0
[2] | 0x800000;
2765 w0
[2] = w0
[2] | 0x80000000;
2773 w0
[3] = w0
[3] | 0x8000;
2777 w0
[3] = w0
[3] | 0x800000;
2781 w0
[3] = w0
[3] | 0x80000000;
2789 w1
[0] = w1
[0] | 0x8000;
2793 w1
[0] = w1
[0] | 0x800000;
2797 w1
[0] = w1
[0] | 0x80000000;
2805 w1
[1] = w1
[1] | 0x8000;
2809 w1
[1] = w1
[1] | 0x800000;
2813 w1
[1] = w1
[1] | 0x80000000;
2821 w1
[2] = w1
[2] | 0x8000;
2825 w1
[2] = w1
[2] | 0x800000;
2829 w1
[2] = w1
[2] | 0x80000000;
2837 w1
[3] = w1
[3] | 0x8000;
2841 w1
[3] = w1
[3] | 0x800000;
2845 w1
[3] = w1
[3] | 0x80000000;
2850 static void append_0x80_2_be (u32 w0
[4], u32 w1
[4], const u32 offset
)
2855 w0
[0] |= 0x80000000;
2871 w0
[1] |= 0x80000000;
2887 w0
[2] |= 0x80000000;
2903 w0
[3] |= 0x80000000;
2919 w1
[0] |= 0x80000000;
2935 w1
[1] |= 0x80000000;
2951 w1
[2] |= 0x80000000;
2967 w1
[3] |= 0x80000000;
2984 static void append_0x80_3 (u32 w0
[4], u32 w1
[4], u32 w2
[4], const u32 offset
)
2993 w0
[0] = w0
[0] | 0x8000;
2997 w0
[0] = w0
[0] | 0x800000;
3001 w0
[0] = w0
[0] | 0x80000000;
3009 w0
[1] = w0
[1] | 0x8000;
3013 w0
[1] = w0
[1] | 0x800000;
3017 w0
[1] = w0
[1] | 0x80000000;
3025 w0
[2] = w0
[2] | 0x8000;
3029 w0
[2] = w0
[2] | 0x800000;
3033 w0
[2] = w0
[2] | 0x80000000;
3041 w0
[3] = w0
[3] | 0x8000;
3045 w0
[3] = w0
[3] | 0x800000;
3049 w0
[3] = w0
[3] | 0x80000000;
3057 w1
[0] = w1
[0] | 0x8000;
3061 w1
[0] = w1
[0] | 0x800000;
3065 w1
[0] = w1
[0] | 0x80000000;
3073 w1
[1] = w1
[1] | 0x8000;
3077 w1
[1] = w1
[1] | 0x800000;
3081 w1
[1] = w1
[1] | 0x80000000;
3089 w1
[2] = w1
[2] | 0x8000;
3093 w1
[2] = w1
[2] | 0x800000;
3097 w1
[2] = w1
[2] | 0x80000000;
3105 w1
[3] = w1
[3] | 0x8000;
3109 w1
[3] = w1
[3] | 0x800000;
3113 w1
[3] = w1
[3] | 0x80000000;
3121 w2
[0] = w2
[0] | 0x8000;
3125 w2
[0] = w2
[0] | 0x800000;
3129 w2
[0] = w2
[0] | 0x80000000;
3137 w2
[1] = w2
[1] | 0x8000;
3141 w2
[1] = w2
[1] | 0x800000;
3145 w2
[1] = w2
[1] | 0x80000000;
3153 w2
[2] = w2
[2] | 0x8000;
3157 w2
[2] = w2
[2] | 0x800000;
3161 w2
[2] = w2
[2] | 0x80000000;
3169 w2
[3] = w2
[3] | 0x8000;
3173 w2
[3] = w2
[3] | 0x800000;
3177 w2
[3] = w2
[3] | 0x80000000;
3182 static void append_0x80_4 (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], const u32 offset
)
3191 w0
[0] = w0
[0] | 0x8000;
3195 w0
[0] = w0
[0] | 0x800000;
3199 w0
[0] = w0
[0] | 0x80000000;
3207 w0
[1] = w0
[1] | 0x8000;
3211 w0
[1] = w0
[1] | 0x800000;
3215 w0
[1] = w0
[1] | 0x80000000;
3223 w0
[2] = w0
[2] | 0x8000;
3227 w0
[2] = w0
[2] | 0x800000;
3231 w0
[2] = w0
[2] | 0x80000000;
3239 w0
[3] = w0
[3] | 0x8000;
3243 w0
[3] = w0
[3] | 0x800000;
3247 w0
[3] = w0
[3] | 0x80000000;
3255 w1
[0] = w1
[0] | 0x8000;
3259 w1
[0] = w1
[0] | 0x800000;
3263 w1
[0] = w1
[0] | 0x80000000;
3271 w1
[1] = w1
[1] | 0x8000;
3275 w1
[1] = w1
[1] | 0x800000;
3279 w1
[1] = w1
[1] | 0x80000000;
3287 w1
[2] = w1
[2] | 0x8000;
3291 w1
[2] = w1
[2] | 0x800000;
3295 w1
[2] = w1
[2] | 0x80000000;
3303 w1
[3] = w1
[3] | 0x8000;
3307 w1
[3] = w1
[3] | 0x800000;
3311 w1
[3] = w1
[3] | 0x80000000;
3319 w2
[0] = w2
[0] | 0x8000;
3323 w2
[0] = w2
[0] | 0x800000;
3327 w2
[0] = w2
[0] | 0x80000000;
3335 w2
[1] = w2
[1] | 0x8000;
3339 w2
[1] = w2
[1] | 0x800000;
3343 w2
[1] = w2
[1] | 0x80000000;
3351 w2
[2] = w2
[2] | 0x8000;
3355 w2
[2] = w2
[2] | 0x800000;
3359 w2
[2] = w2
[2] | 0x80000000;
3367 w2
[3] = w2
[3] | 0x8000;
3371 w2
[3] = w2
[3] | 0x800000;
3375 w2
[3] = w2
[3] | 0x80000000;
3383 w3
[0] = w3
[0] | 0x8000;
3387 w3
[0] = w3
[0] | 0x800000;
3391 w3
[0] = w3
[0] | 0x80000000;
3399 w3
[1] = w3
[1] | 0x8000;
3403 w3
[1] = w3
[1] | 0x800000;
3407 w3
[1] = w3
[1] | 0x80000000;
3415 w3
[2] = w3
[2] | 0x8000;
3419 w3
[2] = w3
[2] | 0x800000;
3423 w3
[2] = w3
[2] | 0x80000000;
3431 w3
[3] = w3
[3] | 0x8000;
3435 w3
[3] = w3
[3] | 0x800000;
3439 w3
[3] = w3
[3] | 0x80000000;
3444 static void append_0x80_8 (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], u32 w4
[4], u32 w5
[4], u32 w6
[4], u32 w7
[4], const u32 offset
)
3453 w0
[0] = w0
[0] | 0x8000;
3457 w0
[0] = w0
[0] | 0x800000;
3461 w0
[0] = w0
[0] | 0x80000000;
3469 w0
[1] = w0
[1] | 0x8000;
3473 w0
[1] = w0
[1] | 0x800000;
3477 w0
[1] = w0
[1] | 0x80000000;
3485 w0
[2] = w0
[2] | 0x8000;
3489 w0
[2] = w0
[2] | 0x800000;
3493 w0
[2] = w0
[2] | 0x80000000;
3501 w0
[3] = w0
[3] | 0x8000;
3505 w0
[3] = w0
[3] | 0x800000;
3509 w0
[3] = w0
[3] | 0x80000000;
3517 w1
[0] = w1
[0] | 0x8000;
3521 w1
[0] = w1
[0] | 0x800000;
3525 w1
[0] = w1
[0] | 0x80000000;
3533 w1
[1] = w1
[1] | 0x8000;
3537 w1
[1] = w1
[1] | 0x800000;
3541 w1
[1] = w1
[1] | 0x80000000;
3549 w1
[2] = w1
[2] | 0x8000;
3553 w1
[2] = w1
[2] | 0x800000;
3557 w1
[2] = w1
[2] | 0x80000000;
3565 w1
[3] = w1
[3] | 0x8000;
3569 w1
[3] = w1
[3] | 0x800000;
3573 w1
[3] = w1
[3] | 0x80000000;
3581 w2
[0] = w2
[0] | 0x8000;
3585 w2
[0] = w2
[0] | 0x800000;
3589 w2
[0] = w2
[0] | 0x80000000;
3597 w2
[1] = w2
[1] | 0x8000;
3601 w2
[1] = w2
[1] | 0x800000;
3605 w2
[1] = w2
[1] | 0x80000000;
3613 w2
[2] = w2
[2] | 0x8000;
3617 w2
[2] = w2
[2] | 0x800000;
3621 w2
[2] = w2
[2] | 0x80000000;
3629 w2
[3] = w2
[3] | 0x8000;
3633 w2
[3] = w2
[3] | 0x800000;
3637 w2
[3] = w2
[3] | 0x80000000;
3645 w3
[0] = w3
[0] | 0x8000;
3649 w3
[0] = w3
[0] | 0x800000;
3653 w3
[0] = w3
[0] | 0x80000000;
3661 w3
[1] = w3
[1] | 0x8000;
3665 w3
[1] = w3
[1] | 0x800000;
3669 w3
[1] = w3
[1] | 0x80000000;
3677 w3
[2] = w3
[2] | 0x8000;
3681 w3
[2] = w3
[2] | 0x800000;
3685 w3
[2] = w3
[2] | 0x80000000;
3693 w3
[3] = w3
[3] | 0x8000;
3697 w3
[3] = w3
[3] | 0x800000;
3701 w3
[3] = w3
[3] | 0x80000000;
3709 w4
[0] = w4
[0] | 0x8000;
3713 w4
[0] = w4
[0] | 0x800000;
3717 w4
[0] = w4
[0] | 0x80000000;
3725 w4
[1] = w4
[1] | 0x8000;
3729 w4
[1] = w4
[1] | 0x800000;
3733 w4
[1] = w4
[1] | 0x80000000;
3741 w4
[2] = w4
[2] | 0x8000;
3745 w4
[2] = w4
[2] | 0x800000;
3749 w4
[2] = w4
[2] | 0x80000000;
3757 w4
[3] = w4
[3] | 0x8000;
3761 w4
[3] = w4
[3] | 0x800000;
3765 w4
[3] = w4
[3] | 0x80000000;
3773 w5
[0] = w5
[0] | 0x8000;
3777 w5
[0] = w5
[0] | 0x800000;
3781 w5
[0] = w5
[0] | 0x80000000;
3789 w5
[1] = w5
[1] | 0x8000;
3793 w5
[1] = w5
[1] | 0x800000;
3797 w5
[1] = w5
[1] | 0x80000000;
3805 w5
[2] = w5
[2] | 0x8000;
3809 w5
[2] = w5
[2] | 0x800000;
3813 w5
[2] = w5
[2] | 0x80000000;
3821 w5
[3] = w5
[3] | 0x8000;
3825 w5
[3] = w5
[3] | 0x800000;
3829 w5
[3] = w5
[3] | 0x80000000;
3837 w6
[0] = w6
[0] | 0x8000;
3841 w6
[0] = w6
[0] | 0x800000;
3845 w6
[0] = w6
[0] | 0x80000000;
3853 w6
[1] = w6
[1] | 0x8000;
3857 w6
[1] = w6
[1] | 0x800000;
3861 w6
[1] = w6
[1] | 0x80000000;
3869 w6
[2] = w6
[2] | 0x8000;
3873 w6
[2] = w6
[2] | 0x800000;
3877 w6
[2] = w6
[2] | 0x80000000;
3885 w6
[3] = w6
[3] | 0x8000;
3889 w6
[3] = w6
[3] | 0x800000;
3893 w6
[3] = w6
[3] | 0x80000000;
3901 w7
[0] = w7
[0] | 0x8000;
3905 w7
[0] = w7
[0] | 0x800000;
3909 w7
[0] = w7
[0] | 0x80000000;
3917 w7
[1] = w7
[1] | 0x8000;
3921 w7
[1] = w7
[1] | 0x800000;
3925 w7
[1] = w7
[1] | 0x80000000;
3933 w7
[2] = w7
[2] | 0x8000;
3937 w7
[2] = w7
[2] | 0x800000;
3941 w7
[2] = w7
[2] | 0x80000000;
3949 w7
[3] = w7
[3] | 0x8000;
3953 w7
[3] = w7
[3] | 0x800000;
3957 w7
[3] = w7
[3] | 0x80000000;
3962 static void append_0x80_4 (u32 w
[16], const u32 offset
)
3971 w
[ 0] = w
[ 0] | 0x8000;
3975 w
[ 0] = w
[ 0] | 0x800000;
3979 w
[ 0] = w
[ 0] | 0x80000000;
3987 w
[ 1] = w
[ 1] | 0x8000;
3991 w
[ 1] = w
[ 1] | 0x800000;
3995 w
[ 1] = w
[ 1] | 0x80000000;
4003 w
[ 2] = w
[ 2] | 0x8000;
4007 w
[ 2] = w
[ 2] | 0x800000;
4011 w
[ 2] = w
[ 2] | 0x80000000;
4019 w
[ 3] = w
[ 3] | 0x8000;
4023 w
[ 3] = w
[ 3] | 0x800000;
4027 w
[ 3] = w
[ 3] | 0x80000000;
4035 w
[ 4] = w
[ 4] | 0x8000;
4039 w
[ 4] = w
[ 4] | 0x800000;
4043 w
[ 4] = w
[ 4] | 0x80000000;
4051 w
[ 5] = w
[ 5] | 0x8000;
4055 w
[ 5] = w
[ 5] | 0x800000;
4059 w
[ 5] = w
[ 5] | 0x80000000;
4067 w
[ 6] = w
[ 6] | 0x8000;
4071 w
[ 6] = w
[ 6] | 0x800000;
4075 w
[ 6] = w
[ 6] | 0x80000000;
4083 w
[ 7] = w
[ 7] | 0x8000;
4087 w
[ 7] = w
[ 7] | 0x800000;
4091 w
[ 7] = w
[ 7] | 0x80000000;
4099 w
[ 8] = w
[ 8] | 0x8000;
4103 w
[ 8] = w
[ 8] | 0x800000;
4107 w
[ 8] = w
[ 8] | 0x80000000;
4115 w
[ 9] = w
[ 9] | 0x8000;
4119 w
[ 9] = w
[ 9] | 0x800000;
4123 w
[ 9] = w
[ 9] | 0x80000000;
4131 w
[10] = w
[10] | 0x8000;
4135 w
[10] = w
[10] | 0x800000;
4139 w
[10] = w
[10] | 0x80000000;
4147 w
[11] = w
[11] | 0x8000;
4151 w
[11] = w
[11] | 0x800000;
4155 w
[11] = w
[11] | 0x80000000;
4163 w
[12] = w
[12] | 0x8000;
4167 w
[12] = w
[12] | 0x800000;
4171 w
[12] = w
[12] | 0x80000000;
4179 w
[13] = w
[13] | 0x8000;
4183 w
[13] = w
[13] | 0x800000;
4187 w
[13] = w
[13] | 0x80000000;
4195 w
[14] = w
[14] | 0x8000;
4199 w
[14] = w
[14] | 0x800000;
4203 w
[14] = w
[14] | 0x80000000;
4211 w
[15] = w
[15] | 0x8000;
4215 w
[15] = w
[15] | 0x800000;
4219 w
[15] = w
[15] | 0x80000000;
4224 static void append_0x80_8 (u32 w
[32], const u32 offset
)
4233 w
[ 0] = w
[ 0] | 0x8000;
4237 w
[ 0] = w
[ 0] | 0x800000;
4241 w
[ 0] = w
[ 0] | 0x80000000;
4249 w
[ 1] = w
[ 1] | 0x8000;
4253 w
[ 1] = w
[ 1] | 0x800000;
4257 w
[ 1] = w
[ 1] | 0x80000000;
4265 w
[ 2] = w
[ 2] | 0x8000;
4269 w
[ 2] = w
[ 2] | 0x800000;
4273 w
[ 2] = w
[ 2] | 0x80000000;
4281 w
[ 3] = w
[ 3] | 0x8000;
4285 w
[ 3] = w
[ 3] | 0x800000;
4289 w
[ 3] = w
[ 3] | 0x80000000;
4297 w
[ 4] = w
[ 4] | 0x8000;
4301 w
[ 4] = w
[ 4] | 0x800000;
4305 w
[ 4] = w
[ 4] | 0x80000000;
4313 w
[ 5] = w
[ 5] | 0x8000;
4317 w
[ 5] = w
[ 5] | 0x800000;
4321 w
[ 5] = w
[ 5] | 0x80000000;
4329 w
[ 6] = w
[ 6] | 0x8000;
4333 w
[ 6] = w
[ 6] | 0x800000;
4337 w
[ 6] = w
[ 6] | 0x80000000;
4345 w
[ 7] = w
[ 7] | 0x8000;
4349 w
[ 7] = w
[ 7] | 0x800000;
4353 w
[ 7] = w
[ 7] | 0x80000000;
4361 w
[ 8] = w
[ 8] | 0x8000;
4365 w
[ 8] = w
[ 8] | 0x800000;
4369 w
[ 8] = w
[ 8] | 0x80000000;
4377 w
[ 9] = w
[ 9] | 0x8000;
4381 w
[ 9] = w
[ 9] | 0x800000;
4385 w
[ 9] = w
[ 9] | 0x80000000;
4393 w
[10] = w
[10] | 0x8000;
4397 w
[10] = w
[10] | 0x800000;
4401 w
[10] = w
[10] | 0x80000000;
4409 w
[11] = w
[11] | 0x8000;
4413 w
[11] = w
[11] | 0x800000;
4417 w
[11] = w
[11] | 0x80000000;
4425 w
[12] = w
[12] | 0x8000;
4429 w
[12] = w
[12] | 0x800000;
4433 w
[12] = w
[12] | 0x80000000;
4441 w
[13] = w
[13] | 0x8000;
4445 w
[13] = w
[13] | 0x800000;
4449 w
[13] = w
[13] | 0x80000000;
4457 w
[14] = w
[14] | 0x8000;
4461 w
[14] = w
[14] | 0x800000;
4465 w
[14] = w
[14] | 0x80000000;
4473 w
[15] = w
[15] | 0x8000;
4477 w
[15] = w
[15] | 0x800000;
4481 w
[15] = w
[15] | 0x80000000;
4489 w
[16] = w
[16] | 0x8000;
4493 w
[16] = w
[16] | 0x800000;
4497 w
[16] = w
[16] | 0x80000000;
4505 w
[17] = w
[17] | 0x8000;
4509 w
[17] = w
[17] | 0x800000;
4513 w
[17] = w
[17] | 0x80000000;
4521 w
[18] = w
[18] | 0x8000;
4525 w
[18] = w
[18] | 0x800000;
4529 w
[18] = w
[18] | 0x80000000;
4537 w
[19] = w
[19] | 0x8000;
4541 w
[19] = w
[19] | 0x800000;
4545 w
[19] = w
[19] | 0x80000000;
4553 w
[20] = w
[20] | 0x8000;
4557 w
[20] = w
[20] | 0x800000;
4561 w
[20] = w
[20] | 0x80000000;
4569 w
[21] = w
[21] | 0x8000;
4573 w
[21] = w
[21] | 0x800000;
4577 w
[21] = w
[21] | 0x80000000;
4585 w
[22] = w
[22] | 0x8000;
4589 w
[22] = w
[22] | 0x800000;
4593 w
[22] = w
[22] | 0x80000000;
4601 w
[23] = w
[23] | 0x8000;
4605 w
[23] = w
[23] | 0x800000;
4609 w
[23] = w
[23] | 0x80000000;
4617 w
[24] = w
[24] | 0x8000;
4621 w
[24] = w
[24] | 0x800000;
4625 w
[24] = w
[24] | 0x80000000;
4633 w
[25] = w
[25] | 0x8000;
4637 w
[25] = w
[25] | 0x800000;
4641 w
[25] = w
[25] | 0x80000000;
4649 w
[26] = w
[26] | 0x8000;
4653 w
[26] = w
[26] | 0x800000;
4657 w
[26] = w
[26] | 0x80000000;
4665 w
[27] = w
[27] | 0x8000;
4669 w
[27] = w
[27] | 0x800000;
4673 w
[27] = w
[27] | 0x80000000;
4681 w
[28] = w
[28] | 0x8000;
4685 w
[28] = w
[28] | 0x800000;
4689 w
[28] = w
[28] | 0x80000000;
4697 w
[29] = w
[29] | 0x8000;
4701 w
[29] = w
[29] | 0x800000;
4705 w
[29] = w
[29] | 0x80000000;
4713 w
[30] = w
[30] | 0x8000;
4717 w
[30] = w
[30] | 0x800000;
4721 w
[30] = w
[30] | 0x80000000;
4729 w
[31] = w
[31] | 0x8000;
4733 w
[31] = w
[31] | 0x800000;
4737 w
[31] = w
[31] | 0x80000000;
4742 static void device_memcat2L (const u32 offset
, u32 dst0
[2], u32 src_l0
[2], u32 src_r0
[2])
4747 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
4748 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
4752 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
4753 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
4757 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
4758 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
4762 dst0
[1] = src_r0
[0];
4766 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
4770 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
4774 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
4779 static void device_memcat4L (const u32 offset
, u32 dst0
[4], u32 src_l0
[4], u32 src_r0
[4])
4784 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
4785 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
4786 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
4787 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
4791 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
4792 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
4793 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
4794 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
4798 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
4799 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
4800 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
4801 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
4805 dst0
[1] = src_r0
[0];
4806 dst0
[2] = src_r0
[1];
4807 dst0
[3] = src_r0
[2];
4811 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
4812 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
4813 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
4817 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
4818 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
4819 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
4823 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
4824 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
4825 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
4829 dst0
[2] = src_r0
[0];
4830 dst0
[3] = src_r0
[1];
4834 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
4835 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
4839 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
4840 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
4844 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
4845 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
4849 dst0
[3] = src_r0
[0];
4853 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
4857 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
4861 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
4866 static void device_memcat8L (const u32 offset
, u32 dst0
[4], u32 dst1
[4], u32 src_l0
[4], u32 src_l1
[4], u32 src_r0
[4])
4871 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
4872 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
4873 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
4874 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
4875 dst1
[0] = src_r0
[3] >> 24;
4879 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
4880 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
4881 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
4882 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
4883 dst1
[0] = src_r0
[3] >> 16;
4887 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
4888 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
4889 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
4890 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
4891 dst1
[0] = src_r0
[3] >> 8;
4895 dst0
[1] = src_r0
[0];
4896 dst0
[2] = src_r0
[1];
4897 dst0
[3] = src_r0
[2];
4898 dst1
[0] = src_r0
[3];
4902 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
4903 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
4904 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
4905 dst1
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
4906 dst1
[1] = src_r0
[3] >> 24;
4910 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
4911 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
4912 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
4913 dst1
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
4914 dst1
[1] = src_r0
[3] >> 16;
4918 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
4919 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
4920 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
4921 dst1
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
4922 dst1
[1] = src_r0
[3] >> 8;
4926 dst0
[2] = src_r0
[0];
4927 dst0
[3] = src_r0
[1];
4928 dst1
[0] = src_r0
[2];
4929 dst1
[1] = src_r0
[3];
4933 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
4934 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
4935 dst1
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
4936 dst1
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
4937 dst1
[2] = src_r0
[3] >> 24;
4941 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
4942 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
4943 dst1
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
4944 dst1
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
4945 dst1
[2] = src_r0
[3] >> 16;
4949 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
4950 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
4951 dst1
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
4952 dst1
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
4953 dst1
[2] = src_r0
[3] >> 8;
4957 dst0
[3] = src_r0
[0];
4958 dst1
[0] = src_r0
[1];
4959 dst1
[1] = src_r0
[2];
4960 dst1
[2] = src_r0
[3];
4964 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
4965 dst1
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
4966 dst1
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
4967 dst1
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
4968 dst1
[3] = src_r0
[3] >> 24;
4972 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
4973 dst1
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
4974 dst1
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
4975 dst1
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
4976 dst1
[3] = src_r0
[3] >> 16;
4980 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
4981 dst1
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
4982 dst1
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
4983 dst1
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
4984 dst1
[3] = src_r0
[3] >> 8;
4988 dst1
[0] = src_r0
[0];
4989 dst1
[1] = src_r0
[1];
4990 dst1
[2] = src_r0
[2];
4991 dst1
[3] = src_r0
[3];
4995 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
4996 dst1
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
4997 dst1
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
4998 dst1
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5002 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
5003 dst1
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5004 dst1
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5005 dst1
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5009 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
5010 dst1
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5011 dst1
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5012 dst1
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5016 dst1
[1] = src_r0
[0];
5017 dst1
[2] = src_r0
[1];
5018 dst1
[3] = src_r0
[2];
5022 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
5023 dst1
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5024 dst1
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5028 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
5029 dst1
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5030 dst1
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5034 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
5035 dst1
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5036 dst1
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5040 dst1
[2] = src_r0
[0];
5041 dst1
[3] = src_r0
[1];
5045 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
5046 dst1
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5050 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
5051 dst1
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5055 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
5056 dst1
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5060 dst1
[3] = src_r0
[0];
5064 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
5068 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
5072 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
5077 static void device_memcat12L (const u32 offset
, u32 dst0
[4], u32 dst1
[4], u32 dst2
[4], u32 src_l0
[4], u32 src_l1
[4], u32 src_l2
[4], u32 src_r0
[4])
5082 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
5083 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5084 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5085 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5086 dst1
[0] = src_r0
[3] >> 24;
5090 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
5091 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5092 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5093 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5094 dst1
[0] = src_r0
[3] >> 16;
5098 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
5099 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5100 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5101 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5102 dst1
[0] = src_r0
[3] >> 8;
5106 dst0
[1] = src_r0
[0];
5107 dst0
[2] = src_r0
[1];
5108 dst0
[3] = src_r0
[2];
5109 dst1
[0] = src_r0
[3];
5113 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
5114 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5115 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5116 dst1
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5117 dst1
[1] = src_r0
[3] >> 24;
5121 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
5122 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5123 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5124 dst1
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5125 dst1
[1] = src_r0
[3] >> 16;
5129 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
5130 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5131 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5132 dst1
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5133 dst1
[1] = src_r0
[3] >> 8;
5137 dst0
[2] = src_r0
[0];
5138 dst0
[3] = src_r0
[1];
5139 dst1
[0] = src_r0
[2];
5140 dst1
[1] = src_r0
[3];
5144 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
5145 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5146 dst1
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5147 dst1
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5148 dst1
[2] = src_r0
[3] >> 24;
5152 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
5153 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5154 dst1
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5155 dst1
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5156 dst1
[2] = src_r0
[3] >> 16;
5160 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
5161 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5162 dst1
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5163 dst1
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5164 dst1
[2] = src_r0
[3] >> 8;
5168 dst0
[3] = src_r0
[0];
5169 dst1
[0] = src_r0
[1];
5170 dst1
[1] = src_r0
[2];
5171 dst1
[2] = src_r0
[3];
5175 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
5176 dst1
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5177 dst1
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5178 dst1
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5179 dst1
[3] = src_r0
[3] >> 24;
5183 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
5184 dst1
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5185 dst1
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5186 dst1
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5187 dst1
[3] = src_r0
[3] >> 16;
5191 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
5192 dst1
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5193 dst1
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5194 dst1
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5195 dst1
[3] = src_r0
[3] >> 8;
5199 dst1
[0] = src_r0
[0];
5200 dst1
[1] = src_r0
[1];
5201 dst1
[2] = src_r0
[2];
5202 dst1
[3] = src_r0
[3];
5206 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
5207 dst1
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5208 dst1
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5209 dst1
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5210 dst2
[0] = src_r0
[3] >> 24;
5214 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
5215 dst1
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5216 dst1
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5217 dst1
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5218 dst2
[0] = src_r0
[3] >> 16;
5222 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
5223 dst1
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5224 dst1
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5225 dst1
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5226 dst2
[0] = src_r0
[3] >> 8;
5230 dst1
[1] = src_r0
[0];
5231 dst1
[2] = src_r0
[1];
5232 dst1
[3] = src_r0
[2];
5233 dst2
[0] = src_r0
[3];
5237 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
5238 dst1
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5239 dst1
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5240 dst2
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5241 dst2
[1] = src_r0
[3] >> 24;
5245 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
5246 dst1
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5247 dst1
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5248 dst2
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5249 dst2
[1] = src_r0
[3] >> 16;
5253 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
5254 dst1
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5255 dst1
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5256 dst2
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5257 dst2
[1] = src_r0
[3] >> 8;
5261 dst1
[2] = src_r0
[0];
5262 dst1
[3] = src_r0
[1];
5263 dst2
[0] = src_r0
[2];
5264 dst2
[1] = src_r0
[3];
5268 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
5269 dst1
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5270 dst2
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5271 dst2
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5272 dst2
[2] = src_r0
[3] >> 24;
5276 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
5277 dst1
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5278 dst2
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5279 dst2
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5280 dst2
[2] = src_r0
[3] >> 16;
5284 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
5285 dst1
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5286 dst2
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5287 dst2
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5288 dst2
[2] = src_r0
[3] >> 8;
5292 dst1
[3] = src_r0
[0];
5293 dst2
[0] = src_r0
[1];
5294 dst2
[1] = src_r0
[2];
5295 dst2
[2] = src_r0
[3];
5299 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
5300 dst2
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5301 dst2
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5302 dst2
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5303 dst2
[3] = src_r0
[3] >> 24;
5307 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
5308 dst2
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5309 dst2
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5310 dst2
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5311 dst2
[3] = src_r0
[3] >> 16;
5315 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
5316 dst2
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5317 dst2
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5318 dst2
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5319 dst2
[3] = src_r0
[3] >> 8;
5323 dst2
[0] = src_r0
[0];
5324 dst2
[1] = src_r0
[1];
5325 dst2
[2] = src_r0
[2];
5326 dst2
[3] = src_r0
[3];
5330 dst2
[0] = src_l2
[0] | src_r0
[0] << 8;
5331 dst2
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5332 dst2
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5333 dst2
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5337 dst2
[0] = src_l2
[0] | src_r0
[0] << 16;
5338 dst2
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5339 dst2
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5340 dst2
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5344 dst2
[0] = src_l2
[0] | src_r0
[0] << 24;
5345 dst2
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5346 dst2
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5347 dst2
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5351 dst2
[1] = src_r0
[0];
5352 dst2
[2] = src_r0
[1];
5353 dst2
[3] = src_r0
[2];
5357 dst2
[1] = src_l2
[1] | src_r0
[0] << 8;
5358 dst2
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5359 dst2
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5363 dst2
[1] = src_l2
[1] | src_r0
[0] << 16;
5364 dst2
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5365 dst2
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5369 dst2
[1] = src_l2
[1] | src_r0
[0] << 24;
5370 dst2
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5371 dst2
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5375 dst2
[2] = src_r0
[0];
5376 dst2
[3] = src_r0
[1];
5380 dst2
[2] = src_l2
[2] | src_r0
[0] << 8;
5381 dst2
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5385 dst2
[2] = src_l2
[2] | src_r0
[0] << 16;
5386 dst2
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5390 dst2
[2] = src_l2
[2] | src_r0
[0] << 24;
5391 dst2
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5395 dst2
[3] = src_r0
[0];
5399 dst2
[3] = src_l2
[3] | src_r0
[0] << 8;
5403 dst2
[3] = src_l2
[3] | src_r0
[0] << 16;
5407 dst2
[3] = src_l2
[3] | src_r0
[0] << 24;
5412 static void device_memcat12L (const u32 offset
, u32 dst0
[4], u32 dst1
[4], u32 dst2
[4], u32 src_l0
[4], u32 src_l1
[4], u32 src_l2
[4], u32 src_r0
[4], u32 src_r1
[4])
5417 dst0
[0] = src_r0
[0];
5418 dst0
[1] = src_r0
[1];
5419 dst0
[2] = src_r0
[2];
5420 dst0
[3] = src_r0
[3];
5421 dst1
[0] = src_r1
[0];
5422 dst1
[1] = src_r1
[1];
5423 dst1
[2] = src_r1
[2];
5424 dst1
[3] = src_r1
[3];
5428 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
5429 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5430 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5431 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5432 dst1
[0] = src_r0
[3] >> 24 | src_r1
[0] << 8;
5433 dst1
[1] = src_r1
[0] >> 24 | src_r1
[1] << 8;
5434 dst1
[2] = src_r1
[1] >> 24 | src_r1
[2] << 8;
5435 dst1
[3] = src_r1
[2] >> 24 | src_r1
[3] << 8;
5436 dst2
[0] = src_r1
[3] >> 24;
5440 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
5441 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5442 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5443 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5444 dst1
[0] = src_r0
[3] >> 16 | src_r1
[0] << 16;
5445 dst1
[1] = src_r1
[0] >> 16 | src_r1
[1] << 16;
5446 dst1
[2] = src_r1
[1] >> 16 | src_r1
[2] << 16;
5447 dst1
[3] = src_r1
[2] >> 16 | src_r1
[3] << 16;
5448 dst2
[0] = src_r1
[3] >> 16;
5452 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
5453 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5454 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5455 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5456 dst1
[0] = src_r0
[3] >> 8 | src_r1
[0] << 24;
5457 dst1
[1] = src_r1
[0] >> 8 | src_r1
[1] << 24;
5458 dst1
[2] = src_r1
[1] >> 8 | src_r1
[2] << 24;
5459 dst1
[3] = src_r1
[2] >> 8 | src_r1
[3] << 24;
5460 dst2
[0] = src_r1
[3] >> 8;
5464 dst0
[1] = src_r0
[0];
5465 dst0
[2] = src_r0
[1];
5466 dst0
[3] = src_r0
[2];
5467 dst1
[0] = src_r0
[3];
5468 dst1
[1] = src_r1
[0];
5469 dst1
[2] = src_r1
[1];
5470 dst1
[3] = src_r1
[2];
5471 dst2
[0] = src_r1
[3];
5475 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
5476 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5477 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5478 dst1
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5479 dst1
[1] = src_r0
[3] >> 24 | src_r1
[0] << 8;
5480 dst1
[2] = src_r1
[0] >> 24 | src_r1
[1] << 8;
5481 dst1
[3] = src_r1
[1] >> 24 | src_r1
[2] << 8;
5482 dst2
[0] = src_r1
[2] >> 24 | src_r1
[3] << 8;
5483 dst2
[1] = src_r1
[3] >> 24;
5487 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
5488 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5489 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5490 dst1
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5491 dst1
[1] = src_r0
[3] >> 16 | src_r1
[0] << 16;
5492 dst1
[2] = src_r1
[0] >> 16 | src_r1
[1] << 16;
5493 dst1
[3] = src_r1
[1] >> 16 | src_r1
[2] << 16;
5494 dst2
[0] = src_r1
[2] >> 16 | src_r1
[3] << 16;
5495 dst2
[1] = src_r1
[3] >> 16;
5499 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
5500 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5501 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5502 dst1
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5503 dst1
[1] = src_r0
[3] >> 8 | src_r1
[0] << 24;
5504 dst1
[2] = src_r1
[0] >> 8 | src_r1
[1] << 24;
5505 dst1
[3] = src_r1
[1] >> 8 | src_r1
[2] << 24;
5506 dst2
[0] = src_r1
[2] >> 8 | src_r1
[3] << 24;
5507 dst2
[1] = src_r1
[3] >> 8;
5511 dst0
[2] = src_r0
[0];
5512 dst0
[3] = src_r0
[1];
5513 dst1
[0] = src_r0
[2];
5514 dst1
[1] = src_r0
[3];
5515 dst1
[2] = src_r1
[0];
5516 dst1
[3] = src_r1
[1];
5517 dst2
[0] = src_r1
[2];
5518 dst2
[1] = src_r1
[3];
5522 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
5523 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5524 dst1
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5525 dst1
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5526 dst1
[2] = src_r0
[3] >> 24 | src_r1
[0] << 8;
5527 dst1
[3] = src_r1
[0] >> 24 | src_r1
[1] << 8;
5528 dst2
[0] = src_r1
[1] >> 24 | src_r1
[2] << 8;
5529 dst2
[1] = src_r1
[2] >> 24 | src_r1
[3] << 8;
5530 dst2
[2] = src_r1
[3] >> 24;
5534 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
5535 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5536 dst1
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5537 dst1
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5538 dst1
[2] = src_r0
[3] >> 16 | src_r1
[0] << 16;
5539 dst1
[3] = src_r1
[0] >> 16 | src_r1
[1] << 16;
5540 dst2
[0] = src_r1
[1] >> 16 | src_r1
[2] << 16;
5541 dst2
[1] = src_r1
[2] >> 16 | src_r1
[3] << 16;
5542 dst2
[2] = src_r1
[3] >> 16;
5546 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
5547 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5548 dst1
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5549 dst1
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5550 dst1
[2] = src_r0
[3] >> 8 | src_r1
[0] << 24;
5551 dst1
[3] = src_r1
[0] >> 8 | src_r1
[1] << 24;
5552 dst2
[0] = src_r1
[1] >> 8 | src_r1
[2] << 24;
5553 dst2
[1] = src_r1
[2] >> 8 | src_r1
[3] << 24;
5554 dst2
[2] = src_r1
[3] >> 8;
5558 dst0
[3] = src_r0
[0];
5559 dst1
[0] = src_r0
[1];
5560 dst1
[1] = src_r0
[2];
5561 dst1
[2] = src_r0
[3];
5562 dst1
[3] = src_r1
[0];
5563 dst2
[0] = src_r1
[1];
5564 dst2
[1] = src_r1
[2];
5565 dst2
[2] = src_r1
[3];
5569 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
5570 dst1
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5571 dst1
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5572 dst1
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5573 dst1
[3] = src_r0
[3] >> 24 | src_r1
[0] << 8;
5574 dst2
[0] = src_r1
[0] >> 24 | src_r1
[1] << 8;
5575 dst2
[1] = src_r1
[1] >> 24 | src_r1
[2] << 8;
5576 dst2
[2] = src_r1
[2] >> 24 | src_r1
[3] << 8;
5577 dst2
[3] = src_r1
[3] >> 24;
5581 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
5582 dst1
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5583 dst1
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5584 dst1
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5585 dst1
[3] = src_r0
[3] >> 16 | src_r1
[0] << 16;
5586 dst2
[0] = src_r1
[0] >> 16 | src_r1
[1] << 16;
5587 dst2
[1] = src_r1
[1] >> 16 | src_r1
[2] << 16;
5588 dst2
[2] = src_r1
[2] >> 16 | src_r1
[3] << 16;
5589 dst2
[3] = src_r1
[3] >> 16;
5593 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
5594 dst1
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5595 dst1
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5596 dst1
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5597 dst1
[3] = src_r0
[3] >> 8 | src_r1
[0] << 24;
5598 dst2
[0] = src_r1
[0] >> 8 | src_r1
[1] << 24;
5599 dst2
[1] = src_r1
[1] >> 8 | src_r1
[2] << 24;
5600 dst2
[2] = src_r1
[2] >> 8 | src_r1
[3] << 24;
5601 dst2
[3] = src_r1
[3] >> 8;
5605 dst1
[0] = src_r0
[0];
5606 dst1
[1] = src_r0
[1];
5607 dst1
[2] = src_r0
[2];
5608 dst1
[3] = src_r0
[3];
5609 dst2
[0] = src_r1
[0];
5610 dst2
[1] = src_r1
[1];
5611 dst2
[2] = src_r1
[2];
5612 dst2
[3] = src_r1
[3];
5616 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
5617 dst1
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5618 dst1
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5619 dst1
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5620 dst2
[0] = src_r0
[3] >> 24 | src_r1
[0] << 8;
5621 dst2
[1] = src_r1
[0] >> 24 | src_r1
[1] << 8;
5622 dst2
[2] = src_r1
[1] >> 24 | src_r1
[2] << 8;
5623 dst2
[3] = src_r1
[2] >> 24 | src_r1
[3] << 8;
5627 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
5628 dst1
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5629 dst1
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5630 dst1
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5631 dst2
[0] = src_r0
[3] >> 16 | src_r1
[0] << 16;
5632 dst2
[1] = src_r1
[0] >> 16 | src_r1
[1] << 16;
5633 dst2
[2] = src_r1
[1] >> 16 | src_r1
[2] << 16;
5634 dst2
[3] = src_r1
[2] >> 16 | src_r1
[3] << 16;
5638 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
5639 dst1
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5640 dst1
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5641 dst1
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5642 dst2
[0] = src_r0
[3] >> 8 | src_r1
[0] << 24;
5643 dst2
[1] = src_r1
[0] >> 8 | src_r1
[1] << 24;
5644 dst2
[2] = src_r1
[1] >> 8 | src_r1
[2] << 24;
5645 dst2
[3] = src_r1
[2] >> 8 | src_r1
[3] << 24;
5649 dst1
[1] = src_r1
[0];
5650 dst1
[2] = src_r0
[1];
5651 dst1
[3] = src_r0
[2];
5652 dst2
[0] = src_r0
[3];
5653 dst2
[1] = src_r1
[0];
5654 dst2
[2] = src_r1
[1];
5655 dst2
[3] = src_r1
[2];
5659 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
5660 dst1
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5661 dst1
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5662 dst2
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5663 dst2
[1] = src_r0
[3] >> 24 | src_r1
[0] << 8;
5664 dst2
[2] = src_r1
[0] >> 24 | src_r1
[1] << 8;
5665 dst2
[3] = src_r1
[1] >> 24 | src_r1
[2] << 8;
5669 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
5670 dst1
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5671 dst1
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5672 dst2
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5673 dst2
[1] = src_r0
[3] >> 16 | src_r1
[0] << 16;
5674 dst2
[2] = src_r1
[0] >> 16 | src_r1
[1] << 16;
5675 dst2
[3] = src_r1
[1] >> 16 | src_r1
[2] << 16;
5679 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
5680 dst1
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5681 dst1
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5682 dst2
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5683 dst2
[1] = src_r0
[3] >> 8 | src_r1
[0] << 24;
5684 dst2
[2] = src_r1
[0] >> 8 | src_r1
[1] << 24;
5685 dst2
[3] = src_r1
[1] >> 8 | src_r1
[2] << 24;
5689 dst1
[2] = src_r1
[0];
5690 dst1
[3] = src_r0
[1];
5691 dst2
[0] = src_r0
[2];
5692 dst2
[1] = src_r0
[3];
5693 dst2
[2] = src_r1
[0];
5694 dst2
[3] = src_r1
[1];
5698 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
5699 dst1
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5700 dst2
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5701 dst2
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5702 dst2
[2] = src_r0
[3] >> 24 | src_r1
[0] << 8;
5703 dst2
[3] = src_r1
[0] >> 24 | src_r1
[1] << 8;
5707 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
5708 dst1
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5709 dst2
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5710 dst2
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5711 dst2
[2] = src_r0
[3] >> 16 | src_r1
[0] << 16;
5712 dst2
[3] = src_r1
[0] >> 16 | src_r1
[1] << 16;
5716 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
5717 dst1
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5718 dst2
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5719 dst2
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5720 dst2
[2] = src_r0
[3] >> 8 | src_r1
[0] << 24;
5721 dst2
[3] = src_r1
[0] >> 8 | src_r1
[1] << 24;
5725 dst1
[3] = src_r1
[0];
5726 dst2
[0] = src_r0
[1];
5727 dst2
[1] = src_r0
[2];
5728 dst2
[2] = src_r0
[3];
5729 dst2
[3] = src_r1
[0];
5733 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
5734 dst2
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5735 dst2
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5736 dst2
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5737 dst2
[3] = src_r0
[3] >> 24 | src_r1
[0] << 8;
5741 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
5742 dst2
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5743 dst2
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5744 dst2
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5745 dst2
[3] = src_r0
[3] >> 16 | src_r1
[0] << 16;
5749 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
5750 dst2
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5751 dst2
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5752 dst2
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5753 dst2
[3] = src_r0
[3] >> 8 | src_r1
[0] << 24;
5757 dst2
[0] = src_r0
[0];
5758 dst2
[1] = src_r0
[1];
5759 dst2
[2] = src_r0
[2];
5760 dst2
[3] = src_r0
[3];
5764 dst2
[0] = src_l2
[0] | src_r0
[0] << 8;
5765 dst2
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5766 dst2
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5767 dst2
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
5771 dst2
[0] = src_l2
[0] | src_r0
[0] << 16;
5772 dst2
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5773 dst2
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5774 dst2
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
5778 dst2
[0] = src_l2
[0] | src_r0
[0] << 24;
5779 dst2
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5780 dst2
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5781 dst2
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
5785 dst2
[1] = src_r0
[0];
5786 dst2
[2] = src_r0
[1];
5787 dst2
[3] = src_r0
[2];
5791 dst2
[1] = src_l2
[1] | src_r0
[0] << 8;
5792 dst2
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5793 dst2
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
5797 dst2
[1] = src_l2
[1] | src_r0
[0] << 16;
5798 dst2
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5799 dst2
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
5803 dst2
[1] = src_l2
[1] | src_r0
[0] << 24;
5804 dst2
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5805 dst2
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
5809 dst2
[2] = src_r0
[0];
5810 dst2
[3] = src_r0
[1];
5814 dst2
[2] = src_l2
[2] | src_r0
[0] << 8;
5815 dst2
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
5819 dst2
[2] = src_l2
[2] | src_r0
[0] << 16;
5820 dst2
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
5824 dst2
[2] = src_l2
[2] | src_r0
[0] << 24;
5825 dst2
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
5829 dst2
[3] = src_r0
[0];
5833 dst2
[3] = src_l2
[3] | src_r0
[0] << 8;
5837 dst2
[3] = src_l2
[3] | src_r0
[0] << 16;
5841 dst2
[3] = src_l2
[3] | src_r0
[0] << 24;
5846 static void memcat16_9 (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], const u32 append0
[4], const u32 append1
[4], const u32 append2
[4], const u32 offset
)
5863 w0
[0] = w0
[0] | append0
[0] << 8;
5864 w0
[1] = append0
[0] >> 24 | append0
[1] << 8;
5865 w0
[2] = append0
[1] >> 24 | append0
[2] << 8;
5866 w0
[3] = append0
[2] >> 24 | append0
[3] << 8;
5867 w1
[0] = append0
[3] >> 24 | append1
[0] << 8;
5868 w1
[1] = append1
[0] >> 24 | append1
[1] << 8;
5869 w1
[2] = append1
[1] >> 24 | append1
[2] << 8;
5870 w1
[3] = append1
[2] >> 24 | append1
[3] << 8;
5871 w2
[0] = append1
[3] >> 24 | append2
[0] << 8;
5872 w2
[1] = append2
[0] >> 24;
5876 w0
[0] = w0
[0] | append0
[0] << 16;
5877 w0
[1] = append0
[0] >> 16 | append0
[1] << 16;
5878 w0
[2] = append0
[1] >> 16 | append0
[2] << 16;
5879 w0
[3] = append0
[2] >> 16 | append0
[3] << 16;
5880 w1
[0] = append0
[3] >> 16 | append1
[0] << 16;
5881 w1
[1] = append1
[0] >> 16 | append1
[1] << 16;
5882 w1
[2] = append1
[1] >> 16 | append1
[2] << 16;
5883 w1
[3] = append1
[2] >> 16 | append1
[3] << 16;
5884 w2
[0] = append1
[3] >> 16 | append2
[0] << 16;
5885 w2
[1] = append2
[0] >> 16;
5889 w0
[0] = w0
[0] | append0
[0] << 24;
5890 w0
[1] = append0
[0] >> 8 | append0
[1] << 24;
5891 w0
[2] = append0
[1] >> 8 | append0
[2] << 24;
5892 w0
[3] = append0
[2] >> 8 | append0
[3] << 24;
5893 w1
[0] = append0
[3] >> 8 | append1
[0] << 24;
5894 w1
[1] = append1
[0] >> 8 | append1
[1] << 24;
5895 w1
[2] = append1
[1] >> 8 | append1
[2] << 24;
5896 w1
[3] = append1
[2] >> 8 | append1
[3] << 24;
5897 w2
[0] = append1
[3] >> 8 | append2
[0] << 24;
5898 w2
[1] = append2
[0] >> 8;
5914 w0
[1] = w0
[1] | append0
[0] << 8;
5915 w0
[2] = append0
[0] >> 24 | append0
[1] << 8;
5916 w0
[3] = append0
[1] >> 24 | append0
[2] << 8;
5917 w1
[0] = append0
[2] >> 24 | append0
[3] << 8;
5918 w1
[1] = append0
[3] >> 24 | append1
[0] << 8;
5919 w1
[2] = append1
[0] >> 24 | append1
[1] << 8;
5920 w1
[3] = append1
[1] >> 24 | append1
[2] << 8;
5921 w2
[0] = append1
[2] >> 24 | append1
[3] << 8;
5922 w2
[1] = append1
[3] >> 24 | append2
[0] << 8;
5923 w2
[2] = append2
[0] >> 24;
5927 w0
[1] = w0
[1] | append0
[0] << 16;
5928 w0
[2] = append0
[0] >> 16 | append0
[1] << 16;
5929 w0
[3] = append0
[1] >> 16 | append0
[2] << 16;
5930 w1
[0] = append0
[2] >> 16 | append0
[3] << 16;
5931 w1
[1] = append0
[3] >> 16 | append1
[0] << 16;
5932 w1
[2] = append1
[0] >> 16 | append1
[1] << 16;
5933 w1
[3] = append1
[1] >> 16 | append1
[2] << 16;
5934 w2
[0] = append1
[2] >> 16 | append1
[3] << 16;
5935 w2
[1] = append1
[3] >> 16 | append2
[0] << 16;
5936 w2
[2] = append2
[0] >> 16;
5940 w0
[1] = w0
[1] | append0
[0] << 24;
5941 w0
[2] = append0
[0] >> 8 | append0
[1] << 24;
5942 w0
[3] = append0
[1] >> 8 | append0
[2] << 24;
5943 w1
[0] = append0
[2] >> 8 | append0
[3] << 24;
5944 w1
[1] = append0
[3] >> 8 | append1
[0] << 24;
5945 w1
[2] = append1
[0] >> 8 | append1
[1] << 24;
5946 w1
[3] = append1
[1] >> 8 | append1
[2] << 24;
5947 w2
[0] = append1
[2] >> 8 | append1
[3] << 24;
5948 w2
[1] = append1
[3] >> 8 | append2
[0] << 24;
5949 w2
[2] = append2
[0] >> 8;
5965 w0
[2] = w0
[2] | append0
[0] << 8;
5966 w0
[3] = append0
[0] >> 24 | append0
[1] << 8;
5967 w1
[0] = append0
[1] >> 24 | append0
[2] << 8;
5968 w1
[1] = append0
[2] >> 24 | append0
[3] << 8;
5969 w1
[2] = append0
[3] >> 24 | append1
[0] << 8;
5970 w1
[3] = append1
[0] >> 24 | append1
[1] << 8;
5971 w2
[0] = append1
[1] >> 24 | append1
[2] << 8;
5972 w2
[1] = append1
[2] >> 24 | append1
[3] << 8;
5973 w2
[2] = append1
[3] >> 24 | append2
[0] << 8;
5974 w2
[3] = append2
[0] >> 24;
5978 w0
[2] = w0
[2] | append0
[0] << 16;
5979 w0
[3] = append0
[0] >> 16 | append0
[1] << 16;
5980 w1
[0] = append0
[1] >> 16 | append0
[2] << 16;
5981 w1
[1] = append0
[2] >> 16 | append0
[3] << 16;
5982 w1
[2] = append0
[3] >> 16 | append1
[0] << 16;
5983 w1
[3] = append1
[0] >> 16 | append1
[1] << 16;
5984 w2
[0] = append1
[1] >> 16 | append1
[2] << 16;
5985 w2
[1] = append1
[2] >> 16 | append1
[3] << 16;
5986 w2
[2] = append1
[3] >> 16 | append2
[0] << 16;
5987 w2
[3] = append2
[0] >> 16;
5991 w0
[2] = w0
[2] | append0
[0] << 24;
5992 w0
[3] = append0
[0] >> 8 | append0
[1] << 24;
5993 w1
[0] = append0
[1] >> 8 | append0
[2] << 24;
5994 w1
[1] = append0
[2] >> 8 | append0
[3] << 24;
5995 w1
[2] = append0
[3] >> 8 | append1
[0] << 24;
5996 w1
[3] = append1
[0] >> 8 | append1
[1] << 24;
5997 w2
[0] = append1
[1] >> 8 | append1
[2] << 24;
5998 w2
[1] = append1
[2] >> 8 | append1
[3] << 24;
5999 w2
[2] = append1
[3] >> 8 | append2
[0] << 24;
6000 w2
[3] = append2
[0] >> 8;
6016 w0
[3] = w0
[3] | append0
[0] << 8;
6017 w1
[0] = append0
[0] >> 24 | append0
[1] << 8;
6018 w1
[1] = append0
[1] >> 24 | append0
[2] << 8;
6019 w1
[2] = append0
[2] >> 24 | append0
[3] << 8;
6020 w1
[3] = append0
[3] >> 24 | append1
[0] << 8;
6021 w2
[0] = append1
[0] >> 24 | append1
[1] << 8;
6022 w2
[1] = append1
[1] >> 24 | append1
[2] << 8;
6023 w2
[2] = append1
[2] >> 24 | append1
[3] << 8;
6024 w2
[3] = append1
[3] >> 24 | append2
[0] << 8;
6025 w3
[0] = append2
[0] >> 24;
6029 w0
[3] = w0
[3] | append0
[0] << 16;
6030 w1
[0] = append0
[0] >> 16 | append0
[1] << 16;
6031 w1
[1] = append0
[1] >> 16 | append0
[2] << 16;
6032 w1
[2] = append0
[2] >> 16 | append0
[3] << 16;
6033 w1
[3] = append0
[3] >> 16 | append1
[0] << 16;
6034 w2
[0] = append1
[0] >> 16 | append1
[1] << 16;
6035 w2
[1] = append1
[1] >> 16 | append1
[2] << 16;
6036 w2
[2] = append1
[2] >> 16 | append1
[3] << 16;
6037 w2
[3] = append1
[3] >> 16 | append2
[0] << 16;
6038 w3
[0] = append2
[0] >> 16;
6042 w0
[3] = w0
[3] | append0
[0] << 24;
6043 w1
[0] = append0
[0] >> 8 | append0
[1] << 24;
6044 w1
[1] = append0
[1] >> 8 | append0
[2] << 24;
6045 w1
[2] = append0
[2] >> 8 | append0
[3] << 24;
6046 w1
[3] = append0
[3] >> 8 | append1
[0] << 24;
6047 w2
[0] = append1
[0] >> 8 | append1
[1] << 24;
6048 w2
[1] = append1
[1] >> 8 | append1
[2] << 24;
6049 w2
[2] = append1
[2] >> 8 | append1
[3] << 24;
6050 w2
[3] = append1
[3] >> 8 | append2
[0] << 24;
6051 w3
[0] = append2
[0] >> 8;
6056 static void memcat32_8 (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], const u32 append0
[4], const u32 append1
[4], const u32 offset
)
6072 w0
[0] = w0
[0] | append0
[0] << 8;
6073 w0
[1] = append0
[0] >> 24 | append0
[1] << 8;
6074 w0
[2] = append0
[1] >> 24 | append0
[2] << 8;
6075 w0
[3] = append0
[2] >> 24 | append0
[3] << 8;
6076 w1
[0] = append0
[3] >> 24 | append1
[0] << 8;
6077 w1
[1] = append1
[0] >> 24 | append1
[1] << 8;
6078 w1
[2] = append1
[1] >> 24 | append1
[2] << 8;
6079 w1
[3] = append1
[2] >> 24 | append1
[3] << 8;
6080 w2
[0] = append1
[3] >> 24;
6084 w0
[0] = w0
[0] | append0
[0] << 16;
6085 w0
[1] = append0
[0] >> 16 | append0
[1] << 16;
6086 w0
[2] = append0
[1] >> 16 | append0
[2] << 16;
6087 w0
[3] = append0
[2] >> 16 | append0
[3] << 16;
6088 w1
[0] = append0
[3] >> 16 | append1
[0] << 16;
6089 w1
[1] = append1
[0] >> 16 | append1
[1] << 16;
6090 w1
[2] = append1
[1] >> 16 | append1
[2] << 16;
6091 w1
[3] = append1
[2] >> 16 | append1
[3] << 16;
6092 w2
[0] = append1
[3] >> 16;
6096 w0
[0] = w0
[0] | append0
[0] << 24;
6097 w0
[1] = append0
[0] >> 8 | append0
[1] << 24;
6098 w0
[2] = append0
[1] >> 8 | append0
[2] << 24;
6099 w0
[3] = append0
[2] >> 8 | append0
[3] << 24;
6100 w1
[0] = append0
[3] >> 8 | append1
[0] << 24;
6101 w1
[1] = append1
[0] >> 8 | append1
[1] << 24;
6102 w1
[2] = append1
[1] >> 8 | append1
[2] << 24;
6103 w1
[3] = append1
[2] >> 8 | append1
[3] << 24;
6104 w2
[0] = append1
[3] >> 8;
6119 w0
[1] = w0
[1] | append0
[0] << 8;
6120 w0
[2] = append0
[0] >> 24 | append0
[1] << 8;
6121 w0
[3] = append0
[1] >> 24 | append0
[2] << 8;
6122 w1
[0] = append0
[2] >> 24 | append0
[3] << 8;
6123 w1
[1] = append0
[3] >> 24 | append1
[0] << 8;
6124 w1
[2] = append1
[0] >> 24 | append1
[1] << 8;
6125 w1
[3] = append1
[1] >> 24 | append1
[2] << 8;
6126 w2
[0] = append1
[2] >> 24 | append1
[3] << 8;
6127 w2
[1] = append1
[3] >> 24;
6131 w0
[1] = w0
[1] | append0
[0] << 16;
6132 w0
[2] = append0
[0] >> 16 | append0
[1] << 16;
6133 w0
[3] = append0
[1] >> 16 | append0
[2] << 16;
6134 w1
[0] = append0
[2] >> 16 | append0
[3] << 16;
6135 w1
[1] = append0
[3] >> 16 | append1
[0] << 16;
6136 w1
[2] = append1
[0] >> 16 | append1
[1] << 16;
6137 w1
[3] = append1
[1] >> 16 | append1
[2] << 16;
6138 w2
[0] = append1
[2] >> 16 | append1
[3] << 16;
6139 w2
[1] = append1
[3] >> 16;
6143 w0
[1] = w0
[1] | append0
[0] << 24;
6144 w0
[2] = append0
[0] >> 8 | append0
[1] << 24;
6145 w0
[3] = append0
[1] >> 8 | append0
[2] << 24;
6146 w1
[0] = append0
[2] >> 8 | append0
[3] << 24;
6147 w1
[1] = append0
[3] >> 8 | append1
[0] << 24;
6148 w1
[2] = append1
[0] >> 8 | append1
[1] << 24;
6149 w1
[3] = append1
[1] >> 8 | append1
[2] << 24;
6150 w2
[0] = append1
[2] >> 8 | append1
[3] << 24;
6151 w2
[1] = append1
[3] >> 8;
6166 w0
[2] = w0
[2] | append0
[0] << 8;
6167 w0
[3] = append0
[0] >> 24 | append0
[1] << 8;
6168 w1
[0] = append0
[1] >> 24 | append0
[2] << 8;
6169 w1
[1] = append0
[2] >> 24 | append0
[3] << 8;
6170 w1
[2] = append0
[3] >> 24 | append1
[0] << 8;
6171 w1
[3] = append1
[0] >> 24 | append1
[1] << 8;
6172 w2
[0] = append1
[1] >> 24 | append1
[2] << 8;
6173 w2
[1] = append1
[2] >> 24 | append1
[3] << 8;
6174 w2
[2] = append1
[3] >> 24;
6178 w0
[2] = w0
[2] | append0
[0] << 16;
6179 w0
[3] = append0
[0] >> 16 | append0
[1] << 16;
6180 w1
[0] = append0
[1] >> 16 | append0
[2] << 16;
6181 w1
[1] = append0
[2] >> 16 | append0
[3] << 16;
6182 w1
[2] = append0
[3] >> 16 | append1
[0] << 16;
6183 w1
[3] = append1
[0] >> 16 | append1
[1] << 16;
6184 w2
[0] = append1
[1] >> 16 | append1
[2] << 16;
6185 w2
[1] = append1
[2] >> 16 | append1
[3] << 16;
6186 w2
[2] = append1
[3] >> 16;
6190 w0
[2] = w0
[2] | append0
[0] << 24;
6191 w0
[3] = append0
[0] >> 8 | append0
[1] << 24;
6192 w1
[0] = append0
[1] >> 8 | append0
[2] << 24;
6193 w1
[1] = append0
[2] >> 8 | append0
[3] << 24;
6194 w1
[2] = append0
[3] >> 8 | append1
[0] << 24;
6195 w1
[3] = append1
[0] >> 8 | append1
[1] << 24;
6196 w2
[0] = append1
[1] >> 8 | append1
[2] << 24;
6197 w2
[1] = append1
[2] >> 8 | append1
[3] << 24;
6198 w2
[2] = append1
[3] >> 8;
6213 w0
[3] = w0
[3] | append0
[0] << 8;
6214 w1
[0] = append0
[0] >> 24 | append0
[1] << 8;
6215 w1
[1] = append0
[1] >> 24 | append0
[2] << 8;
6216 w1
[2] = append0
[2] >> 24 | append0
[3] << 8;
6217 w1
[3] = append0
[3] >> 24 | append1
[0] << 8;
6218 w2
[0] = append1
[0] >> 24 | append1
[1] << 8;
6219 w2
[1] = append1
[1] >> 24 | append1
[2] << 8;
6220 w2
[2] = append1
[2] >> 24 | append1
[3] << 8;
6221 w2
[3] = append1
[3] >> 24;
6225 w0
[3] = w0
[3] | append0
[0] << 16;
6226 w1
[0] = append0
[0] >> 16 | append0
[1] << 16;
6227 w1
[1] = append0
[1] >> 16 | append0
[2] << 16;
6228 w1
[2] = append0
[2] >> 16 | append0
[3] << 16;
6229 w1
[3] = append0
[3] >> 16 | append1
[0] << 16;
6230 w2
[0] = append1
[0] >> 16 | append1
[1] << 16;
6231 w2
[1] = append1
[1] >> 16 | append1
[2] << 16;
6232 w2
[2] = append1
[2] >> 16 | append1
[3] << 16;
6233 w2
[3] = append1
[3] >> 16;
6237 w0
[3] = w0
[3] | append0
[0] << 24;
6238 w1
[0] = append0
[0] >> 8 | append0
[1] << 24;
6239 w1
[1] = append0
[1] >> 8 | append0
[2] << 24;
6240 w1
[2] = append0
[2] >> 8 | append0
[3] << 24;
6241 w1
[3] = append0
[3] >> 8 | append1
[0] << 24;
6242 w2
[0] = append1
[0] >> 8 | append1
[1] << 24;
6243 w2
[1] = append1
[1] >> 8 | append1
[2] << 24;
6244 w2
[2] = append1
[2] >> 8 | append1
[3] << 24;
6245 w2
[3] = append1
[3] >> 8;
6260 w1
[0] = w1
[0] | append0
[0] << 8;
6261 w1
[1] = append0
[0] >> 24 | append0
[1] << 8;
6262 w1
[2] = append0
[1] >> 24 | append0
[2] << 8;
6263 w1
[3] = append0
[2] >> 24 | append0
[3] << 8;
6264 w2
[0] = append0
[3] >> 24 | append1
[0] << 8;
6265 w2
[1] = append1
[0] >> 24 | append1
[1] << 8;
6266 w2
[2] = append1
[1] >> 24 | append1
[2] << 8;
6267 w2
[3] = append1
[2] >> 24 | append1
[3] << 8;
6268 w3
[0] = append1
[3] >> 24;
6272 w1
[0] = w1
[0] | append0
[0] << 16;
6273 w1
[1] = append0
[0] >> 16 | append0
[1] << 16;
6274 w1
[2] = append0
[1] >> 16 | append0
[2] << 16;
6275 w1
[3] = append0
[2] >> 16 | append0
[3] << 16;
6276 w2
[0] = append0
[3] >> 16 | append1
[0] << 16;
6277 w2
[1] = append1
[0] >> 16 | append1
[1] << 16;
6278 w2
[2] = append1
[1] >> 16 | append1
[2] << 16;
6279 w2
[3] = append1
[2] >> 16 | append1
[3] << 16;
6280 w3
[0] = append1
[3] >> 16;
6284 w1
[0] = w1
[0] | append0
[0] << 24;
6285 w1
[1] = append0
[0] >> 8 | append0
[1] << 24;
6286 w1
[2] = append0
[1] >> 8 | append0
[2] << 24;
6287 w1
[3] = append0
[2] >> 8 | append0
[3] << 24;
6288 w2
[0] = append0
[3] >> 8 | append1
[0] << 24;
6289 w2
[1] = append1
[0] >> 8 | append1
[1] << 24;
6290 w2
[2] = append1
[1] >> 8 | append1
[2] << 24;
6291 w2
[3] = append1
[2] >> 8 | append1
[3] << 24;
6292 w3
[0] = append1
[3] >> 8;
6307 w1
[1] = w1
[1] | append0
[0] << 8;
6308 w1
[2] = append0
[0] >> 24 | append0
[1] << 8;
6309 w1
[3] = append0
[1] >> 24 | append0
[2] << 8;
6310 w2
[0] = append0
[2] >> 24 | append0
[3] << 8;
6311 w2
[1] = append0
[3] >> 24 | append1
[0] << 8;
6312 w2
[2] = append1
[0] >> 24 | append1
[1] << 8;
6313 w2
[3] = append1
[1] >> 24 | append1
[2] << 8;
6314 w3
[0] = append1
[2] >> 24 | append1
[3] << 8;
6315 w3
[1] = append1
[3] >> 24;
6319 w1
[1] = w1
[1] | append0
[0] << 16;
6320 w1
[2] = append0
[0] >> 16 | append0
[1] << 16;
6321 w1
[3] = append0
[1] >> 16 | append0
[2] << 16;
6322 w2
[0] = append0
[2] >> 16 | append0
[3] << 16;
6323 w2
[1] = append0
[3] >> 16 | append1
[0] << 16;
6324 w2
[2] = append1
[0] >> 16 | append1
[1] << 16;
6325 w2
[3] = append1
[1] >> 16 | append1
[2] << 16;
6326 w3
[0] = append1
[2] >> 16 | append1
[3] << 16;
6327 w3
[1] = append1
[3] >> 16;
6331 w1
[1] = w1
[1] | append0
[0] << 24;
6332 w1
[2] = append0
[0] >> 8 | append0
[1] << 24;
6333 w1
[3] = append0
[1] >> 8 | append0
[2] << 24;
6334 w2
[0] = append0
[2] >> 8 | append0
[3] << 24;
6335 w2
[1] = append0
[3] >> 8 | append1
[0] << 24;
6336 w2
[2] = append1
[0] >> 8 | append1
[1] << 24;
6337 w2
[3] = append1
[1] >> 8 | append1
[2] << 24;
6338 w3
[0] = append1
[2] >> 8 | append1
[3] << 24;
6339 w3
[1] = append1
[3] >> 8;
6354 w1
[2] = w1
[2] | append0
[0] << 8;
6355 w1
[3] = append0
[0] >> 24 | append0
[1] << 8;
6356 w2
[0] = append0
[1] >> 24 | append0
[2] << 8;
6357 w2
[1] = append0
[2] >> 24 | append0
[3] << 8;
6358 w2
[2] = append0
[3] >> 24 | append1
[0] << 8;
6359 w2
[3] = append1
[0] >> 24 | append1
[1] << 8;
6360 w3
[0] = append1
[1] >> 24 | append1
[2] << 8;
6361 w3
[1] = append1
[2] >> 24 | append1
[3] << 8;
6365 w1
[2] = w1
[2] | append0
[0] << 16;
6366 w1
[3] = append0
[0] >> 16 | append0
[1] << 16;
6367 w2
[0] = append0
[1] >> 16 | append0
[2] << 16;
6368 w2
[1] = append0
[2] >> 16 | append0
[3] << 16;
6369 w2
[2] = append0
[3] >> 16 | append1
[0] << 16;
6370 w2
[3] = append1
[0] >> 16 | append1
[1] << 16;
6371 w3
[0] = append1
[1] >> 16 | append1
[2] << 16;
6372 w3
[1] = append1
[2] >> 16 | append1
[3] << 16;
6376 w1
[2] = w1
[2] | append0
[0] << 24;
6377 w1
[3] = append0
[0] >> 8 | append0
[1] << 24;
6378 w2
[0] = append0
[1] >> 8 | append0
[2] << 24;
6379 w2
[1] = append0
[2] >> 8 | append0
[3] << 24;
6380 w2
[2] = append0
[3] >> 8 | append1
[0] << 24;
6381 w2
[3] = append1
[0] >> 8 | append1
[1] << 24;
6382 w3
[0] = append1
[1] >> 8 | append1
[2] << 24;
6383 w3
[1] = append1
[2] >> 8 | append1
[3] << 24;
6397 w1
[3] = w1
[3] | append0
[0] << 8;
6398 w2
[0] = append0
[0] >> 24 | append0
[1] << 8;
6399 w2
[1] = append0
[1] >> 24 | append0
[2] << 8;
6400 w2
[2] = append0
[2] >> 24 | append0
[3] << 8;
6401 w2
[3] = append0
[3] >> 24 | append1
[0] << 8;
6402 w3
[0] = append1
[0] >> 24 | append1
[1] << 8;
6403 w3
[1] = append1
[1] >> 24 | append1
[2] << 8;
6407 w1
[3] = w1
[3] | append0
[0] << 16;
6408 w2
[0] = append0
[0] >> 16 | append0
[1] << 16;
6409 w2
[1] = append0
[1] >> 16 | append0
[2] << 16;
6410 w2
[2] = append0
[2] >> 16 | append0
[3] << 16;
6411 w2
[3] = append0
[3] >> 16 | append1
[0] << 16;
6412 w3
[0] = append1
[0] >> 16 | append1
[1] << 16;
6413 w3
[1] = append1
[1] >> 16 | append1
[2] << 16;
6417 w1
[3] = w1
[3] | append0
[0] << 24;
6418 w2
[0] = append0
[0] >> 8 | append0
[1] << 24;
6419 w2
[1] = append0
[1] >> 8 | append0
[2] << 24;
6420 w2
[2] = append0
[2] >> 8 | append0
[3] << 24;
6421 w2
[3] = append0
[3] >> 8 | append1
[0] << 24;
6422 w3
[0] = append1
[0] >> 8 | append1
[1] << 24;
6423 w3
[1] = append1
[1] >> 8 | append1
[2] << 24;
6437 static void memcat32_9 (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], const u32 append0
[4], const u32 append1
[4], const u32 append2
[4], const u32 offset
)
6454 w0
[0] = w0
[0] | append0
[0] << 8;
6455 w0
[1] = append0
[0] >> 24 | append0
[1] << 8;
6456 w0
[2] = append0
[1] >> 24 | append0
[2] << 8;
6457 w0
[3] = append0
[2] >> 24 | append0
[3] << 8;
6458 w1
[0] = append0
[3] >> 24 | append1
[0] << 8;
6459 w1
[1] = append1
[0] >> 24 | append1
[1] << 8;
6460 w1
[2] = append1
[1] >> 24 | append1
[2] << 8;
6461 w1
[3] = append1
[2] >> 24 | append1
[3] << 8;
6462 w2
[0] = append1
[3] >> 24 | append2
[0] << 8;
6463 w2
[1] = append2
[0] >> 24;
6467 w0
[0] = w0
[0] | append0
[0] << 16;
6468 w0
[1] = append0
[0] >> 16 | append0
[1] << 16;
6469 w0
[2] = append0
[1] >> 16 | append0
[2] << 16;
6470 w0
[3] = append0
[2] >> 16 | append0
[3] << 16;
6471 w1
[0] = append0
[3] >> 16 | append1
[0] << 16;
6472 w1
[1] = append1
[0] >> 16 | append1
[1] << 16;
6473 w1
[2] = append1
[1] >> 16 | append1
[2] << 16;
6474 w1
[3] = append1
[2] >> 16 | append1
[3] << 16;
6475 w2
[0] = append1
[3] >> 16 | append2
[0] << 16;
6476 w2
[1] = append2
[0] >> 16;
6480 w0
[0] = w0
[0] | append0
[0] << 24;
6481 w0
[1] = append0
[0] >> 8 | append0
[1] << 24;
6482 w0
[2] = append0
[1] >> 8 | append0
[2] << 24;
6483 w0
[3] = append0
[2] >> 8 | append0
[3] << 24;
6484 w1
[0] = append0
[3] >> 8 | append1
[0] << 24;
6485 w1
[1] = append1
[0] >> 8 | append1
[1] << 24;
6486 w1
[2] = append1
[1] >> 8 | append1
[2] << 24;
6487 w1
[3] = append1
[2] >> 8 | append1
[3] << 24;
6488 w2
[0] = append1
[3] >> 8 | append2
[0] << 24;
6489 w2
[1] = append2
[0] >> 8;
6505 w0
[1] = w0
[1] | append0
[0] << 8;
6506 w0
[2] = append0
[0] >> 24 | append0
[1] << 8;
6507 w0
[3] = append0
[1] >> 24 | append0
[2] << 8;
6508 w1
[0] = append0
[2] >> 24 | append0
[3] << 8;
6509 w1
[1] = append0
[3] >> 24 | append1
[0] << 8;
6510 w1
[2] = append1
[0] >> 24 | append1
[1] << 8;
6511 w1
[3] = append1
[1] >> 24 | append1
[2] << 8;
6512 w2
[0] = append1
[2] >> 24 | append1
[3] << 8;
6513 w2
[1] = append1
[3] >> 24 | append2
[0] << 8;
6514 w2
[2] = append2
[0] >> 24;
6518 w0
[1] = w0
[1] | append0
[0] << 16;
6519 w0
[2] = append0
[0] >> 16 | append0
[1] << 16;
6520 w0
[3] = append0
[1] >> 16 | append0
[2] << 16;
6521 w1
[0] = append0
[2] >> 16 | append0
[3] << 16;
6522 w1
[1] = append0
[3] >> 16 | append1
[0] << 16;
6523 w1
[2] = append1
[0] >> 16 | append1
[1] << 16;
6524 w1
[3] = append1
[1] >> 16 | append1
[2] << 16;
6525 w2
[0] = append1
[2] >> 16 | append1
[3] << 16;
6526 w2
[1] = append1
[3] >> 16 | append2
[0] << 16;
6527 w2
[2] = append2
[0] >> 16;
6531 w0
[1] = w0
[1] | append0
[0] << 24;
6532 w0
[2] = append0
[0] >> 8 | append0
[1] << 24;
6533 w0
[3] = append0
[1] >> 8 | append0
[2] << 24;
6534 w1
[0] = append0
[2] >> 8 | append0
[3] << 24;
6535 w1
[1] = append0
[3] >> 8 | append1
[0] << 24;
6536 w1
[2] = append1
[0] >> 8 | append1
[1] << 24;
6537 w1
[3] = append1
[1] >> 8 | append1
[2] << 24;
6538 w2
[0] = append1
[2] >> 8 | append1
[3] << 24;
6539 w2
[1] = append1
[3] >> 8 | append2
[0] << 24;
6540 w2
[2] = append2
[0] >> 8;
6556 w0
[2] = w0
[2] | append0
[0] << 8;
6557 w0
[3] = append0
[0] >> 24 | append0
[1] << 8;
6558 w1
[0] = append0
[1] >> 24 | append0
[2] << 8;
6559 w1
[1] = append0
[2] >> 24 | append0
[3] << 8;
6560 w1
[2] = append0
[3] >> 24 | append1
[0] << 8;
6561 w1
[3] = append1
[0] >> 24 | append1
[1] << 8;
6562 w2
[0] = append1
[1] >> 24 | append1
[2] << 8;
6563 w2
[1] = append1
[2] >> 24 | append1
[3] << 8;
6564 w2
[2] = append1
[3] >> 24 | append2
[0] << 8;
6565 w2
[3] = append2
[0] >> 24;
6569 w0
[2] = w0
[2] | append0
[0] << 16;
6570 w0
[3] = append0
[0] >> 16 | append0
[1] << 16;
6571 w1
[0] = append0
[1] >> 16 | append0
[2] << 16;
6572 w1
[1] = append0
[2] >> 16 | append0
[3] << 16;
6573 w1
[2] = append0
[3] >> 16 | append1
[0] << 16;
6574 w1
[3] = append1
[0] >> 16 | append1
[1] << 16;
6575 w2
[0] = append1
[1] >> 16 | append1
[2] << 16;
6576 w2
[1] = append1
[2] >> 16 | append1
[3] << 16;
6577 w2
[2] = append1
[3] >> 16 | append2
[0] << 16;
6578 w2
[3] = append2
[0] >> 16;
6582 w0
[2] = w0
[2] | append0
[0] << 24;
6583 w0
[3] = append0
[0] >> 8 | append0
[1] << 24;
6584 w1
[0] = append0
[1] >> 8 | append0
[2] << 24;
6585 w1
[1] = append0
[2] >> 8 | append0
[3] << 24;
6586 w1
[2] = append0
[3] >> 8 | append1
[0] << 24;
6587 w1
[3] = append1
[0] >> 8 | append1
[1] << 24;
6588 w2
[0] = append1
[1] >> 8 | append1
[2] << 24;
6589 w2
[1] = append1
[2] >> 8 | append1
[3] << 24;
6590 w2
[2] = append1
[3] >> 8 | append2
[0] << 24;
6591 w2
[3] = append2
[0] >> 8;
6607 w0
[3] = w0
[3] | append0
[0] << 8;
6608 w1
[0] = append0
[0] >> 24 | append0
[1] << 8;
6609 w1
[1] = append0
[1] >> 24 | append0
[2] << 8;
6610 w1
[2] = append0
[2] >> 24 | append0
[3] << 8;
6611 w1
[3] = append0
[3] >> 24 | append1
[0] << 8;
6612 w2
[0] = append1
[0] >> 24 | append1
[1] << 8;
6613 w2
[1] = append1
[1] >> 24 | append1
[2] << 8;
6614 w2
[2] = append1
[2] >> 24 | append1
[3] << 8;
6615 w2
[3] = append1
[3] >> 24 | append2
[0] << 8;
6616 w3
[0] = append2
[0] >> 24;
6620 w0
[3] = w0
[3] | append0
[0] << 16;
6621 w1
[0] = append0
[0] >> 16 | append0
[1] << 16;
6622 w1
[1] = append0
[1] >> 16 | append0
[2] << 16;
6623 w1
[2] = append0
[2] >> 16 | append0
[3] << 16;
6624 w1
[3] = append0
[3] >> 16 | append1
[0] << 16;
6625 w2
[0] = append1
[0] >> 16 | append1
[1] << 16;
6626 w2
[1] = append1
[1] >> 16 | append1
[2] << 16;
6627 w2
[2] = append1
[2] >> 16 | append1
[3] << 16;
6628 w2
[3] = append1
[3] >> 16 | append2
[0] << 16;
6629 w3
[0] = append2
[0] >> 16;
6633 w0
[3] = w0
[3] | append0
[0] << 24;
6634 w1
[0] = append0
[0] >> 8 | append0
[1] << 24;
6635 w1
[1] = append0
[1] >> 8 | append0
[2] << 24;
6636 w1
[2] = append0
[2] >> 8 | append0
[3] << 24;
6637 w1
[3] = append0
[3] >> 8 | append1
[0] << 24;
6638 w2
[0] = append1
[0] >> 8 | append1
[1] << 24;
6639 w2
[1] = append1
[1] >> 8 | append1
[2] << 24;
6640 w2
[2] = append1
[2] >> 8 | append1
[3] << 24;
6641 w2
[3] = append1
[3] >> 8 | append2
[0] << 24;
6642 w3
[0] = append2
[0] >> 8;
6658 w1
[0] = w1
[0] | append0
[0] << 8;
6659 w1
[1] = append0
[0] >> 24 | append0
[1] << 8;
6660 w1
[2] = append0
[1] >> 24 | append0
[2] << 8;
6661 w1
[3] = append0
[2] >> 24 | append0
[3] << 8;
6662 w2
[0] = append0
[3] >> 24 | append1
[0] << 8;
6663 w2
[1] = append1
[0] >> 24 | append1
[1] << 8;
6664 w2
[2] = append1
[1] >> 24 | append1
[2] << 8;
6665 w2
[3] = append1
[2] >> 24 | append1
[3] << 8;
6666 w3
[0] = append1
[3] >> 24 | append2
[0] << 8;
6667 w3
[1] = append2
[0] >> 24;
6671 w1
[0] = w1
[0] | append0
[0] << 16;
6672 w1
[1] = append0
[0] >> 16 | append0
[1] << 16;
6673 w1
[2] = append0
[1] >> 16 | append0
[2] << 16;
6674 w1
[3] = append0
[2] >> 16 | append0
[3] << 16;
6675 w2
[0] = append0
[3] >> 16 | append1
[0] << 16;
6676 w2
[1] = append1
[0] >> 16 | append1
[1] << 16;
6677 w2
[2] = append1
[1] >> 16 | append1
[2] << 16;
6678 w2
[3] = append1
[2] >> 16 | append1
[3] << 16;
6679 w3
[0] = append1
[3] >> 16 | append2
[0] << 16;
6680 w3
[1] = append2
[0] >> 16;
6684 w1
[0] = w1
[0] | append0
[0] << 24;
6685 w1
[1] = append0
[0] >> 8 | append0
[1] << 24;
6686 w1
[2] = append0
[1] >> 8 | append0
[2] << 24;
6687 w1
[3] = append0
[2] >> 8 | append0
[3] << 24;
6688 w2
[0] = append0
[3] >> 8 | append1
[0] << 24;
6689 w2
[1] = append1
[0] >> 8 | append1
[1] << 24;
6690 w2
[2] = append1
[1] >> 8 | append1
[2] << 24;
6691 w2
[3] = append1
[2] >> 8 | append1
[3] << 24;
6692 w3
[0] = append1
[3] >> 8 | append2
[0] << 24;
6693 w3
[1] = append2
[0] >> 8;
6709 w1
[1] = w1
[1] | append0
[0] << 8;
6710 w1
[2] = append0
[0] >> 24 | append0
[1] << 8;
6711 w1
[3] = append0
[1] >> 24 | append0
[2] << 8;
6712 w2
[0] = append0
[2] >> 24 | append0
[3] << 8;
6713 w2
[1] = append0
[3] >> 24 | append1
[0] << 8;
6714 w2
[2] = append1
[0] >> 24 | append1
[1] << 8;
6715 w2
[3] = append1
[1] >> 24 | append1
[2] << 8;
6716 w3
[0] = append1
[2] >> 24 | append1
[3] << 8;
6717 w3
[1] = append1
[3] >> 24 | append2
[0] << 8;
6721 w1
[1] = w1
[1] | append0
[0] << 16;
6722 w1
[2] = append0
[0] >> 16 | append0
[1] << 16;
6723 w1
[3] = append0
[1] >> 16 | append0
[2] << 16;
6724 w2
[0] = append0
[2] >> 16 | append0
[3] << 16;
6725 w2
[1] = append0
[3] >> 16 | append1
[0] << 16;
6726 w2
[2] = append1
[0] >> 16 | append1
[1] << 16;
6727 w2
[3] = append1
[1] >> 16 | append1
[2] << 16;
6728 w3
[0] = append1
[2] >> 16 | append1
[3] << 16;
6729 w3
[1] = append1
[3] >> 16 | append2
[0] << 16;
6733 w1
[1] = w1
[1] | append0
[0] << 24;
6734 w1
[2] = append0
[0] >> 8 | append0
[1] << 24;
6735 w1
[3] = append0
[1] >> 8 | append0
[2] << 24;
6736 w2
[0] = append0
[2] >> 8 | append0
[3] << 24;
6737 w2
[1] = append0
[3] >> 8 | append1
[0] << 24;
6738 w2
[2] = append1
[0] >> 8 | append1
[1] << 24;
6739 w2
[3] = append1
[1] >> 8 | append1
[2] << 24;
6740 w3
[0] = append1
[2] >> 8 | append1
[3] << 24;
6741 w3
[1] = append1
[3] >> 8 | append2
[0] << 24;
6756 w1
[2] = w1
[2] | append0
[0] << 8;
6757 w1
[3] = append0
[0] >> 24 | append0
[1] << 8;
6758 w2
[0] = append0
[1] >> 24 | append0
[2] << 8;
6759 w2
[1] = append0
[2] >> 24 | append0
[3] << 8;
6760 w2
[2] = append0
[3] >> 24 | append1
[0] << 8;
6761 w2
[3] = append1
[0] >> 24 | append1
[1] << 8;
6762 w3
[0] = append1
[1] >> 24 | append1
[2] << 8;
6763 w3
[1] = append1
[2] >> 24 | append1
[3] << 8;
6767 w1
[2] = w1
[2] | append0
[0] << 16;
6768 w1
[3] = append0
[0] >> 16 | append0
[1] << 16;
6769 w2
[0] = append0
[1] >> 16 | append0
[2] << 16;
6770 w2
[1] = append0
[2] >> 16 | append0
[3] << 16;
6771 w2
[2] = append0
[3] >> 16 | append1
[0] << 16;
6772 w2
[3] = append1
[0] >> 16 | append1
[1] << 16;
6773 w3
[0] = append1
[1] >> 16 | append1
[2] << 16;
6774 w3
[1] = append1
[2] >> 16 | append1
[3] << 16;
6778 w1
[2] = w1
[2] | append0
[0] << 24;
6779 w1
[3] = append0
[0] >> 8 | append0
[1] << 24;
6780 w2
[0] = append0
[1] >> 8 | append0
[2] << 24;
6781 w2
[1] = append0
[2] >> 8 | append0
[3] << 24;
6782 w2
[2] = append0
[3] >> 8 | append1
[0] << 24;
6783 w2
[3] = append1
[0] >> 8 | append1
[1] << 24;
6784 w3
[0] = append1
[1] >> 8 | append1
[2] << 24;
6785 w3
[1] = append1
[2] >> 8 | append1
[3] << 24;
6799 w1
[3] = w1
[3] | append0
[0] << 8;
6800 w2
[0] = append0
[0] >> 24 | append0
[1] << 8;
6801 w2
[1] = append0
[1] >> 24 | append0
[2] << 8;
6802 w2
[2] = append0
[2] >> 24 | append0
[3] << 8;
6803 w2
[3] = append0
[3] >> 24 | append1
[0] << 8;
6804 w3
[0] = append1
[0] >> 24 | append1
[1] << 8;
6805 w3
[1] = append1
[1] >> 24 | append1
[2] << 8;
6809 w1
[3] = w1
[3] | append0
[0] << 16;
6810 w2
[0] = append0
[0] >> 16 | append0
[1] << 16;
6811 w2
[1] = append0
[1] >> 16 | append0
[2] << 16;
6812 w2
[2] = append0
[2] >> 16 | append0
[3] << 16;
6813 w2
[3] = append0
[3] >> 16 | append1
[0] << 16;
6814 w3
[0] = append1
[0] >> 16 | append1
[1] << 16;
6815 w3
[1] = append1
[1] >> 16 | append1
[2] << 16;
6819 w1
[3] = w1
[3] | append0
[0] << 24;
6820 w2
[0] = append0
[0] >> 8 | append0
[1] << 24;
6821 w2
[1] = append0
[1] >> 8 | append0
[2] << 24;
6822 w2
[2] = append0
[2] >> 8 | append0
[3] << 24;
6823 w2
[3] = append0
[3] >> 8 | append1
[0] << 24;
6824 w3
[0] = append1
[0] >> 8 | append1
[1] << 24;
6825 w3
[1] = append1
[1] >> 8 | append1
[2] << 24;
6839 static void switch_buffer_by_offset (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], const u32 offset
)
6841 const int offset_mod_4
= offset
& 3;
6843 const int offset_minus_4
= 4 - offset
;
6848 w3
[2] = amd_bytealign ( 0, w3
[1], offset_minus_4
);
6849 w3
[1] = amd_bytealign (w3
[1], w3
[0], offset_minus_4
);
6850 w3
[0] = amd_bytealign (w3
[0], w2
[3], offset_minus_4
);
6851 w2
[3] = amd_bytealign (w2
[3], w2
[2], offset_minus_4
);
6852 w2
[2] = amd_bytealign (w2
[2], w2
[1], offset_minus_4
);
6853 w2
[1] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
6854 w2
[0] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
6855 w1
[3] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
6856 w1
[2] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
6857 w1
[1] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
6858 w1
[0] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
6859 w0
[3] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
6860 w0
[2] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
6861 w0
[1] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
6862 w0
[0] = amd_bytealign (w0
[0], 0, offset_minus_4
);
6864 if (offset_mod_4
== 0)
6886 w3
[2] = amd_bytealign ( 0, w3
[0], offset_minus_4
);
6887 w3
[1] = amd_bytealign (w3
[0], w2
[3], offset_minus_4
);
6888 w3
[0] = amd_bytealign (w2
[3], w2
[2], offset_minus_4
);
6889 w2
[3] = amd_bytealign (w2
[2], w2
[1], offset_minus_4
);
6890 w2
[2] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
6891 w2
[1] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
6892 w2
[0] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
6893 w1
[3] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
6894 w1
[2] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
6895 w1
[1] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
6896 w1
[0] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
6897 w0
[3] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
6898 w0
[2] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
6899 w0
[1] = amd_bytealign (w0
[0], 0, offset_minus_4
);
6902 if (offset_mod_4
== 0)
6923 w3
[2] = amd_bytealign ( 0, w2
[3], offset_minus_4
);
6924 w3
[1] = amd_bytealign (w2
[3], w2
[2], offset_minus_4
);
6925 w3
[0] = amd_bytealign (w2
[2], w2
[1], offset_minus_4
);
6926 w2
[3] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
6927 w2
[2] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
6928 w2
[1] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
6929 w2
[0] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
6930 w1
[3] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
6931 w1
[2] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
6932 w1
[1] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
6933 w1
[0] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
6934 w0
[3] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
6935 w0
[2] = amd_bytealign (w0
[0], 0, offset_minus_4
);
6939 if (offset_mod_4
== 0)
6959 w3
[2] = amd_bytealign ( 0, w2
[2], offset_minus_4
);
6960 w3
[1] = amd_bytealign (w2
[2], w2
[1], offset_minus_4
);
6961 w3
[0] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
6962 w2
[3] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
6963 w2
[2] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
6964 w2
[1] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
6965 w2
[0] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
6966 w1
[3] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
6967 w1
[2] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
6968 w1
[1] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
6969 w1
[0] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
6970 w0
[3] = amd_bytealign (w0
[0], 0, offset_minus_4
);
6975 if (offset_mod_4
== 0)
6994 w3
[2] = amd_bytealign ( 0, w2
[1], offset_minus_4
);
6995 w3
[1] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
6996 w3
[0] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
6997 w2
[3] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
6998 w2
[2] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
6999 w2
[1] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
7000 w2
[0] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
7001 w1
[3] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
7002 w1
[2] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
7003 w1
[1] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
7004 w1
[0] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7010 if (offset_mod_4
== 0)
7028 w3
[2] = amd_bytealign ( 0, w2
[0], offset_minus_4
);
7029 w3
[1] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
7030 w3
[0] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
7031 w2
[3] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
7032 w2
[2] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
7033 w2
[1] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
7034 w2
[0] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
7035 w1
[3] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
7036 w1
[2] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
7037 w1
[1] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7044 if (offset_mod_4
== 0)
7061 w3
[2] = amd_bytealign ( 0, w1
[3], offset_minus_4
);
7062 w3
[1] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
7063 w3
[0] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
7064 w2
[3] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
7065 w2
[2] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
7066 w2
[1] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
7067 w2
[0] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
7068 w1
[3] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
7069 w1
[2] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7077 if (offset_mod_4
== 0)
7093 w3
[2] = amd_bytealign ( 0, w1
[2], offset_minus_4
);
7094 w3
[1] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
7095 w3
[0] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
7096 w2
[3] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
7097 w2
[2] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
7098 w2
[1] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
7099 w2
[0] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
7100 w1
[3] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7109 if (offset_mod_4
== 0)
7124 w3
[2] = amd_bytealign ( 0, w1
[1], offset_minus_4
);
7125 w3
[1] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
7126 w3
[0] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
7127 w2
[3] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
7128 w2
[2] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
7129 w2
[1] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
7130 w2
[0] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7140 if (offset_mod_4
== 0)
7154 w3
[2] = amd_bytealign ( 0, w1
[0], offset_minus_4
);
7155 w3
[1] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
7156 w3
[0] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
7157 w2
[3] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
7158 w2
[2] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
7159 w2
[1] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7170 if (offset_mod_4
== 0)
7183 w3
[2] = amd_bytealign ( 0, w0
[3], offset_minus_4
);
7184 w3
[1] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
7185 w3
[0] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
7186 w2
[3] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
7187 w2
[2] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7199 if (offset_mod_4
== 0)
7211 w3
[2] = amd_bytealign ( 0, w0
[2], offset_minus_4
);
7212 w3
[1] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
7213 w3
[0] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
7214 w2
[3] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7227 if (offset_mod_4
== 0)
7238 w3
[2] = amd_bytealign ( 0, w0
[1], offset_minus_4
);
7239 w3
[1] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
7240 w3
[0] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7254 if (offset_mod_4
== 0)
7264 w3
[2] = amd_bytealign ( 0, w0
[0], offset_minus_4
);
7265 w3
[1] = amd_bytealign (w0
[0], 0, offset_minus_4
);
7280 if (offset_mod_4
== 0)
7290 static void switch_buffer_by_offset_be (u32 w0
[4], u32 w1
[4], u32 w2
[4], u32 w3
[4], const u32 offset
)
7295 w3
[2] = amd_bytealign (w3
[1], 0, offset
);
7296 w3
[1] = amd_bytealign (w3
[0], w3
[1], offset
);
7297 w3
[0] = amd_bytealign (w2
[3], w3
[0], offset
);
7298 w2
[3] = amd_bytealign (w2
[2], w2
[3], offset
);
7299 w2
[2] = amd_bytealign (w2
[1], w2
[2], offset
);
7300 w2
[1] = amd_bytealign (w2
[0], w2
[1], offset
);
7301 w2
[0] = amd_bytealign (w1
[3], w2
[0], offset
);
7302 w1
[3] = amd_bytealign (w1
[2], w1
[3], offset
);
7303 w1
[2] = amd_bytealign (w1
[1], w1
[2], offset
);
7304 w1
[1] = amd_bytealign (w1
[0], w1
[1], offset
);
7305 w1
[0] = amd_bytealign (w0
[3], w1
[0], offset
);
7306 w0
[3] = amd_bytealign (w0
[2], w0
[3], offset
);
7307 w0
[2] = amd_bytealign (w0
[1], w0
[2], offset
);
7308 w0
[1] = amd_bytealign (w0
[0], w0
[1], offset
);
7309 w0
[0] = amd_bytealign ( 0, w0
[0], offset
);
7313 w3
[2] = amd_bytealign (w3
[0], 0, offset
);
7314 w3
[1] = amd_bytealign (w2
[3], w3
[0], offset
);
7315 w3
[0] = amd_bytealign (w2
[2], w2
[3], offset
);
7316 w2
[3] = amd_bytealign (w2
[1], w2
[2], offset
);
7317 w2
[2] = amd_bytealign (w2
[0], w2
[1], offset
);
7318 w2
[1] = amd_bytealign (w1
[3], w2
[0], offset
);
7319 w2
[0] = amd_bytealign (w1
[2], w1
[3], offset
);
7320 w1
[3] = amd_bytealign (w1
[1], w1
[2], offset
);
7321 w1
[2] = amd_bytealign (w1
[0], w1
[1], offset
);
7322 w1
[1] = amd_bytealign (w0
[3], w1
[0], offset
);
7323 w1
[0] = amd_bytealign (w0
[2], w0
[3], offset
);
7324 w0
[3] = amd_bytealign (w0
[1], w0
[2], offset
);
7325 w0
[2] = amd_bytealign (w0
[0], w0
[1], offset
);
7326 w0
[1] = amd_bytealign ( 0, w0
[0], offset
);
7331 w3
[2] = amd_bytealign (w2
[3], 0, offset
);
7332 w3
[1] = amd_bytealign (w2
[2], w2
[3], offset
);
7333 w3
[0] = amd_bytealign (w2
[1], w2
[2], offset
);
7334 w2
[3] = amd_bytealign (w2
[0], w2
[1], offset
);
7335 w2
[2] = amd_bytealign (w1
[3], w2
[0], offset
);
7336 w2
[1] = amd_bytealign (w1
[2], w1
[3], offset
);
7337 w2
[0] = amd_bytealign (w1
[1], w1
[2], offset
);
7338 w1
[3] = amd_bytealign (w1
[0], w1
[1], offset
);
7339 w1
[2] = amd_bytealign (w0
[3], w1
[0], offset
);
7340 w1
[1] = amd_bytealign (w0
[2], w0
[3], offset
);
7341 w1
[0] = amd_bytealign (w0
[1], w0
[2], offset
);
7342 w0
[3] = amd_bytealign (w0
[0], w0
[1], offset
);
7343 w0
[2] = amd_bytealign ( 0, w0
[0], offset
);
7349 w3
[2] = amd_bytealign (w2
[2], 0, offset
);
7350 w3
[1] = amd_bytealign (w2
[1], w2
[2], offset
);
7351 w3
[0] = amd_bytealign (w2
[0], w2
[1], offset
);
7352 w2
[3] = amd_bytealign (w1
[3], w2
[0], offset
);
7353 w2
[2] = amd_bytealign (w1
[2], w1
[3], offset
);
7354 w2
[1] = amd_bytealign (w1
[1], w1
[2], offset
);
7355 w2
[0] = amd_bytealign (w1
[0], w1
[1], offset
);
7356 w1
[3] = amd_bytealign (w0
[3], w1
[0], offset
);
7357 w1
[2] = amd_bytealign (w0
[2], w0
[3], offset
);
7358 w1
[1] = amd_bytealign (w0
[1], w0
[2], offset
);
7359 w1
[0] = amd_bytealign (w0
[0], w0
[1], offset
);
7360 w0
[3] = amd_bytealign ( 0, w0
[0], offset
);
7367 w3
[2] = amd_bytealign (w2
[1], 0, offset
);
7368 w3
[1] = amd_bytealign (w2
[0], w2
[1], offset
);
7369 w3
[0] = amd_bytealign (w1
[3], w2
[0], offset
);
7370 w2
[3] = amd_bytealign (w1
[2], w1
[3], offset
);
7371 w2
[2] = amd_bytealign (w1
[1], w1
[2], offset
);
7372 w2
[1] = amd_bytealign (w1
[0], w1
[1], offset
);
7373 w2
[0] = amd_bytealign (w0
[3], w1
[0], offset
);
7374 w1
[3] = amd_bytealign (w0
[2], w0
[3], offset
);
7375 w1
[2] = amd_bytealign (w0
[1], w0
[2], offset
);
7376 w1
[1] = amd_bytealign (w0
[0], w0
[1], offset
);
7377 w1
[0] = amd_bytealign ( 0, w0
[0], offset
);
7385 w3
[2] = amd_bytealign (w2
[0], 0, offset
);
7386 w3
[1] = amd_bytealign (w1
[3], w2
[0], offset
);
7387 w3
[0] = amd_bytealign (w1
[2], w1
[3], offset
);
7388 w2
[3] = amd_bytealign (w1
[1], w1
[2], offset
);
7389 w2
[2] = amd_bytealign (w1
[0], w1
[1], offset
);
7390 w2
[1] = amd_bytealign (w0
[3], w1
[0], offset
);
7391 w2
[0] = amd_bytealign (w0
[2], w0
[3], offset
);
7392 w1
[3] = amd_bytealign (w0
[1], w0
[2], offset
);
7393 w1
[2] = amd_bytealign (w0
[0], w0
[1], offset
);
7394 w1
[1] = amd_bytealign ( 0, w0
[0], offset
);
7403 w3
[2] = amd_bytealign (w1
[3], 0, offset
);
7404 w3
[1] = amd_bytealign (w1
[2], w1
[3], offset
);
7405 w3
[0] = amd_bytealign (w1
[1], w1
[2], offset
);
7406 w2
[3] = amd_bytealign (w1
[0], w1
[1], offset
);
7407 w2
[2] = amd_bytealign (w0
[3], w1
[0], offset
);
7408 w2
[1] = amd_bytealign (w0
[2], w0
[3], offset
);
7409 w2
[0] = amd_bytealign (w0
[1], w0
[2], offset
);
7410 w1
[3] = amd_bytealign (w0
[0], w0
[1], offset
);
7411 w1
[2] = amd_bytealign ( 0, w0
[0], offset
);
7421 w3
[2] = amd_bytealign (w1
[2], 0, offset
);
7422 w3
[1] = amd_bytealign (w1
[1], w1
[2], offset
);
7423 w3
[0] = amd_bytealign (w1
[0], w1
[1], offset
);
7424 w2
[3] = amd_bytealign (w0
[3], w1
[0], offset
);
7425 w2
[2] = amd_bytealign (w0
[2], w0
[3], offset
);
7426 w2
[1] = amd_bytealign (w0
[1], w0
[2], offset
);
7427 w2
[0] = amd_bytealign (w0
[0], w0
[1], offset
);
7428 w1
[3] = amd_bytealign ( 0, w0
[0], offset
);
7439 w3
[2] = amd_bytealign (w1
[1], 0, offset
);
7440 w3
[1] = amd_bytealign (w1
[0], w1
[1], offset
);
7441 w3
[0] = amd_bytealign (w0
[3], w1
[0], offset
);
7442 w2
[3] = amd_bytealign (w0
[2], w0
[3], offset
);
7443 w2
[2] = amd_bytealign (w0
[1], w0
[2], offset
);
7444 w2
[1] = amd_bytealign (w0
[0], w0
[1], offset
);
7445 w2
[0] = amd_bytealign ( 0, w0
[0], offset
);
7457 w3
[2] = amd_bytealign (w1
[0], 0, offset
);
7458 w3
[1] = amd_bytealign (w0
[3], w1
[0], offset
);
7459 w3
[0] = amd_bytealign (w0
[2], w0
[3], offset
);
7460 w2
[3] = amd_bytealign (w0
[1], w0
[2], offset
);
7461 w2
[2] = amd_bytealign (w0
[0], w0
[1], offset
);
7462 w2
[1] = amd_bytealign ( 0, w0
[0], offset
);
7475 w3
[2] = amd_bytealign (w0
[3], 0, offset
);
7476 w3
[1] = amd_bytealign (w0
[2], w0
[3], offset
);
7477 w3
[0] = amd_bytealign (w0
[1], w0
[2], offset
);
7478 w2
[3] = amd_bytealign (w0
[0], w0
[1], offset
);
7479 w2
[2] = amd_bytealign ( 0, w0
[0], offset
);
7493 w3
[2] = amd_bytealign (w0
[2], 0, offset
);
7494 w3
[1] = amd_bytealign (w0
[1], w0
[2], offset
);
7495 w3
[0] = amd_bytealign (w0
[0], w0
[1], offset
);
7496 w2
[3] = amd_bytealign ( 0, w0
[0], offset
);
7511 w3
[2] = amd_bytealign (w0
[1], 0, offset
);
7512 w3
[1] = amd_bytealign (w0
[0], w0
[1], offset
);
7513 w3
[0] = amd_bytealign ( 0, w0
[0], offset
);
7529 w3
[2] = amd_bytealign (w0
[0], 0, offset
);
7530 w3
[1] = amd_bytealign ( 0, w0
[0], offset
);
7553 static u32x
swap_workaround (const u32x v
)
7555 return rotl32 ((v
& 0x00FF00FF), 24u)
7556 | rotl32 ((v
& 0xFF00FF00), 8u);
7559 static u64x
swap_workaround (const u64x v
)
7561 return (((v
& 0xff00000000000000) >> 56)
7562 | ((v
& 0x00ff000000000000) >> 40)
7563 | ((v
& 0x0000ff0000000000) >> 24)
7564 | ((v
& 0x000000ff00000000) >> 8)
7565 | ((v
& 0x00000000ff000000) << 8)
7566 | ((v
& 0x0000000000ff0000) << 24)
7567 | ((v
& 0x000000000000ff00) << 40)
7568 | ((v
& 0x00000000000000ff) << 56));
7571 static void truncate_block (u32x w
[4], const u32 len
)
7580 case 1: w
[0] &= 0x000000FF;
7585 case 2: w
[0] &= 0x0000FFFF;
7590 case 3: w
[0] &= 0x00FFFFFF;
7599 case 5: w
[1] &= 0x000000FF;
7603 case 6: w
[1] &= 0x0000FFFF;
7607 case 7: w
[1] &= 0x00FFFFFF;
7614 case 9: w
[2] &= 0x000000FF;
7617 case 10: w
[2] &= 0x0000FFFF;
7620 case 11: w
[2] &= 0x00FFFFFF;
7625 case 13: w
[3] &= 0x000000FF;
7627 case 14: w
[3] &= 0x0000FFFF;
7629 case 15: w
[3] &= 0x00FFFFFF;
7634 static void make_unicode (const u32x in
[4], u32x out1
[4], u32x out2
[4])
7636 out2
[3] = ((in
[3] >> 8) & 0x00FF0000) | ((in
[3] >> 16) & 0x000000FF);
7637 out2
[2] = ((in
[3] << 8) & 0x00FF0000) | ((in
[3] >> 0) & 0x000000FF);
7638 out2
[1] = ((in
[2] >> 8) & 0x00FF0000) | ((in
[2] >> 16) & 0x000000FF);
7639 out2
[0] = ((in
[2] << 8) & 0x00FF0000) | ((in
[2] >> 0) & 0x000000FF);
7640 out1
[3] = ((in
[1] >> 8) & 0x00FF0000) | ((in
[1] >> 16) & 0x000000FF);
7641 out1
[2] = ((in
[1] << 8) & 0x00FF0000) | ((in
[1] >> 0) & 0x000000FF);
7642 out1
[1] = ((in
[0] >> 8) & 0x00FF0000) | ((in
[0] >> 16) & 0x000000FF);
7643 out1
[0] = ((in
[0] << 8) & 0x00FF0000) | ((in
[0] >> 0) & 0x000000FF);
7646 static void append_0x01_1 (u32x w0
[4], const u32 offset
)
7655 w0
[0] = w0
[0] | 0x0100;
7659 w0
[0] = w0
[0] | 0x010000;
7663 w0
[0] = w0
[0] | 0x01000000;
7671 w0
[1] = w0
[1] | 0x0100;
7675 w0
[1] = w0
[1] | 0x010000;
7679 w0
[1] = w0
[1] | 0x01000000;
7687 w0
[2] = w0
[2] | 0x0100;
7691 w0
[2] = w0
[2] | 0x010000;
7695 w0
[2] = w0
[2] | 0x01000000;
7703 w0
[3] = w0
[3] | 0x0100;
7707 w0
[3] = w0
[3] | 0x010000;
7711 w0
[3] = w0
[3] | 0x01000000;
7716 static void append_0x01_2 (u32x w0
[4], u32x w1
[4], const u32 offset
)
7725 w0
[0] = w0
[0] | 0x0100;
7729 w0
[0] = w0
[0] | 0x010000;
7733 w0
[0] = w0
[0] | 0x01000000;
7741 w0
[1] = w0
[1] | 0x0100;
7745 w0
[1] = w0
[1] | 0x010000;
7749 w0
[1] = w0
[1] | 0x01000000;
7757 w0
[2] = w0
[2] | 0x0100;
7761 w0
[2] = w0
[2] | 0x010000;
7765 w0
[2] = w0
[2] | 0x01000000;
7773 w0
[3] = w0
[3] | 0x0100;
7777 w0
[3] = w0
[3] | 0x010000;
7781 w0
[3] = w0
[3] | 0x01000000;
7789 w1
[0] = w1
[0] | 0x0100;
7793 w1
[0] = w1
[0] | 0x010000;
7797 w1
[0] = w1
[0] | 0x01000000;
7805 w1
[1] = w1
[1] | 0x0100;
7809 w1
[1] = w1
[1] | 0x010000;
7813 w1
[1] = w1
[1] | 0x01000000;
7821 w1
[2] = w1
[2] | 0x0100;
7825 w1
[2] = w1
[2] | 0x010000;
7829 w1
[2] = w1
[2] | 0x01000000;
7837 w1
[3] = w1
[3] | 0x0100;
7841 w1
[3] = w1
[3] | 0x010000;
7845 w1
[3] = w1
[3] | 0x01000000;
7850 static void append_0x01_3 (u32x w0
[4], u32x w1
[4], u32x w2
[4], const u32 offset
)
7859 w0
[0] = w0
[0] | 0x0100;
7863 w0
[0] = w0
[0] | 0x010000;
7867 w0
[0] = w0
[0] | 0x01000000;
7875 w0
[1] = w0
[1] | 0x0100;
7879 w0
[1] = w0
[1] | 0x010000;
7883 w0
[1] = w0
[1] | 0x01000000;
7891 w0
[2] = w0
[2] | 0x0100;
7895 w0
[2] = w0
[2] | 0x010000;
7899 w0
[2] = w0
[2] | 0x01000000;
7907 w0
[3] = w0
[3] | 0x0100;
7911 w0
[3] = w0
[3] | 0x010000;
7915 w0
[3] = w0
[3] | 0x01000000;
7923 w1
[0] = w1
[0] | 0x0100;
7927 w1
[0] = w1
[0] | 0x010000;
7931 w1
[0] = w1
[0] | 0x01000000;
7939 w1
[1] = w1
[1] | 0x0100;
7943 w1
[1] = w1
[1] | 0x010000;
7947 w1
[1] = w1
[1] | 0x01000000;
7955 w1
[2] = w1
[2] | 0x0100;
7959 w1
[2] = w1
[2] | 0x010000;
7963 w1
[2] = w1
[2] | 0x01000000;
7971 w1
[3] = w1
[3] | 0x0100;
7975 w1
[3] = w1
[3] | 0x010000;
7979 w1
[3] = w1
[3] | 0x01000000;
7987 w2
[0] = w2
[0] | 0x0100;
7991 w2
[0] = w2
[0] | 0x010000;
7995 w2
[0] = w2
[0] | 0x01000000;
8003 w2
[1] = w2
[1] | 0x0100;
8007 w2
[1] = w2
[1] | 0x010000;
8011 w2
[1] = w2
[1] | 0x01000000;
8019 w2
[2] = w2
[2] | 0x0100;
8023 w2
[2] = w2
[2] | 0x010000;
8027 w2
[2] = w2
[2] | 0x01000000;
8035 w2
[3] = w2
[3] | 0x0100;
8039 w2
[3] = w2
[3] | 0x010000;
8043 w2
[3] = w2
[3] | 0x01000000;
8048 static void append_0x01_4 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], const u32 offset
)
8057 w0
[0] = w0
[0] | 0x0100;
8061 w0
[0] = w0
[0] | 0x010000;
8065 w0
[0] = w0
[0] | 0x01000000;
8073 w0
[1] = w0
[1] | 0x0100;
8077 w0
[1] = w0
[1] | 0x010000;
8081 w0
[1] = w0
[1] | 0x01000000;
8089 w0
[2] = w0
[2] | 0x0100;
8093 w0
[2] = w0
[2] | 0x010000;
8097 w0
[2] = w0
[2] | 0x01000000;
8105 w0
[3] = w0
[3] | 0x0100;
8109 w0
[3] = w0
[3] | 0x010000;
8113 w0
[3] = w0
[3] | 0x01000000;
8121 w1
[0] = w1
[0] | 0x0100;
8125 w1
[0] = w1
[0] | 0x010000;
8129 w1
[0] = w1
[0] | 0x01000000;
8137 w1
[1] = w1
[1] | 0x0100;
8141 w1
[1] = w1
[1] | 0x010000;
8145 w1
[1] = w1
[1] | 0x01000000;
8153 w1
[2] = w1
[2] | 0x0100;
8157 w1
[2] = w1
[2] | 0x010000;
8161 w1
[2] = w1
[2] | 0x01000000;
8169 w1
[3] = w1
[3] | 0x0100;
8173 w1
[3] = w1
[3] | 0x010000;
8177 w1
[3] = w1
[3] | 0x01000000;
8185 w2
[0] = w2
[0] | 0x0100;
8189 w2
[0] = w2
[0] | 0x010000;
8193 w2
[0] = w2
[0] | 0x01000000;
8201 w2
[1] = w2
[1] | 0x0100;
8205 w2
[1] = w2
[1] | 0x010000;
8209 w2
[1] = w2
[1] | 0x01000000;
8217 w2
[2] = w2
[2] | 0x0100;
8221 w2
[2] = w2
[2] | 0x010000;
8225 w2
[2] = w2
[2] | 0x01000000;
8233 w2
[3] = w2
[3] | 0x0100;
8237 w2
[3] = w2
[3] | 0x010000;
8241 w2
[3] = w2
[3] | 0x01000000;
8249 w3
[0] = w3
[0] | 0x0100;
8253 w3
[0] = w3
[0] | 0x010000;
8257 w3
[0] = w3
[0] | 0x01000000;
8265 w3
[1] = w3
[1] | 0x0100;
8269 w3
[1] = w3
[1] | 0x010000;
8273 w3
[1] = w3
[1] | 0x01000000;
8281 w3
[2] = w3
[2] | 0x0100;
8285 w3
[2] = w3
[2] | 0x010000;
8289 w3
[2] = w3
[2] | 0x01000000;
8297 w3
[3] = w3
[3] | 0x0100;
8301 w3
[3] = w3
[3] | 0x010000;
8305 w3
[3] = w3
[3] | 0x01000000;
8310 static void append_0x01_8 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], u32x w4
[4], u32x w5
[4], u32x w6
[4], u32x w7
[4], const u32 offset
)
8319 w0
[0] = w0
[0] | 0x0100;
8323 w0
[0] = w0
[0] | 0x010000;
8327 w0
[0] = w0
[0] | 0x01000000;
8335 w0
[1] = w0
[1] | 0x0100;
8339 w0
[1] = w0
[1] | 0x010000;
8343 w0
[1] = w0
[1] | 0x01000000;
8351 w0
[2] = w0
[2] | 0x0100;
8355 w0
[2] = w0
[2] | 0x010000;
8359 w0
[2] = w0
[2] | 0x01000000;
8367 w0
[3] = w0
[3] | 0x0100;
8371 w0
[3] = w0
[3] | 0x010000;
8375 w0
[3] = w0
[3] | 0x01000000;
8383 w1
[0] = w1
[0] | 0x0100;
8387 w1
[0] = w1
[0] | 0x010000;
8391 w1
[0] = w1
[0] | 0x01000000;
8399 w1
[1] = w1
[1] | 0x0100;
8403 w1
[1] = w1
[1] | 0x010000;
8407 w1
[1] = w1
[1] | 0x01000000;
8415 w1
[2] = w1
[2] | 0x0100;
8419 w1
[2] = w1
[2] | 0x010000;
8423 w1
[2] = w1
[2] | 0x01000000;
8431 w1
[3] = w1
[3] | 0x0100;
8435 w1
[3] = w1
[3] | 0x010000;
8439 w1
[3] = w1
[3] | 0x01000000;
8447 w2
[0] = w2
[0] | 0x0100;
8451 w2
[0] = w2
[0] | 0x010000;
8455 w2
[0] = w2
[0] | 0x01000000;
8463 w2
[1] = w2
[1] | 0x0100;
8467 w2
[1] = w2
[1] | 0x010000;
8471 w2
[1] = w2
[1] | 0x01000000;
8479 w2
[2] = w2
[2] | 0x0100;
8483 w2
[2] = w2
[2] | 0x010000;
8487 w2
[2] = w2
[2] | 0x01000000;
8495 w2
[3] = w2
[3] | 0x0100;
8499 w2
[3] = w2
[3] | 0x010000;
8503 w2
[3] = w2
[3] | 0x01000000;
8511 w3
[0] = w3
[0] | 0x0100;
8515 w3
[0] = w3
[0] | 0x010000;
8519 w3
[0] = w3
[0] | 0x01000000;
8527 w3
[1] = w3
[1] | 0x0100;
8531 w3
[1] = w3
[1] | 0x010000;
8535 w3
[1] = w3
[1] | 0x01000000;
8543 w3
[2] = w3
[2] | 0x0100;
8547 w3
[2] = w3
[2] | 0x010000;
8551 w3
[2] = w3
[2] | 0x01000000;
8559 w3
[3] = w3
[3] | 0x0100;
8563 w3
[3] = w3
[3] | 0x010000;
8567 w3
[3] = w3
[3] | 0x01000000;
8575 w4
[0] = w4
[0] | 0x0100;
8579 w4
[0] = w4
[0] | 0x010000;
8583 w4
[0] = w4
[0] | 0x01000000;
8591 w4
[1] = w4
[1] | 0x0100;
8595 w4
[1] = w4
[1] | 0x010000;
8599 w4
[1] = w4
[1] | 0x01000000;
8607 w4
[2] = w4
[2] | 0x0100;
8611 w4
[2] = w4
[2] | 0x010000;
8615 w4
[2] = w4
[2] | 0x01000000;
8623 w4
[3] = w4
[3] | 0x0100;
8627 w4
[3] = w4
[3] | 0x010000;
8631 w4
[3] = w4
[3] | 0x01000000;
8639 w5
[0] = w5
[0] | 0x0100;
8643 w5
[0] = w5
[0] | 0x010000;
8647 w5
[0] = w5
[0] | 0x01000000;
8655 w5
[1] = w5
[1] | 0x0100;
8659 w5
[1] = w5
[1] | 0x010000;
8663 w5
[1] = w5
[1] | 0x01000000;
8671 w5
[2] = w5
[2] | 0x0100;
8675 w5
[2] = w5
[2] | 0x010000;
8679 w5
[2] = w5
[2] | 0x01000000;
8687 w5
[3] = w5
[3] | 0x0100;
8691 w5
[3] = w5
[3] | 0x010000;
8695 w5
[3] = w5
[3] | 0x01000000;
8703 w6
[0] = w6
[0] | 0x0100;
8707 w6
[0] = w6
[0] | 0x010000;
8711 w6
[0] = w6
[0] | 0x01000000;
8719 w6
[1] = w6
[1] | 0x0100;
8723 w6
[1] = w6
[1] | 0x010000;
8727 w6
[1] = w6
[1] | 0x01000000;
8735 w6
[2] = w6
[2] | 0x0100;
8739 w6
[2] = w6
[2] | 0x010000;
8743 w6
[2] = w6
[2] | 0x01000000;
8751 w6
[3] = w6
[3] | 0x0100;
8755 w6
[3] = w6
[3] | 0x010000;
8759 w6
[3] = w6
[3] | 0x01000000;
8767 w7
[0] = w7
[0] | 0x0100;
8771 w7
[0] = w7
[0] | 0x010000;
8775 w7
[0] = w7
[0] | 0x01000000;
8783 w7
[1] = w7
[1] | 0x0100;
8787 w7
[1] = w7
[1] | 0x010000;
8791 w7
[1] = w7
[1] | 0x01000000;
8799 w7
[2] = w7
[2] | 0x0100;
8803 w7
[2] = w7
[2] | 0x010000;
8807 w7
[2] = w7
[2] | 0x01000000;
8815 w7
[3] = w7
[3] | 0x0100;
8819 w7
[3] = w7
[3] | 0x010000;
8823 w7
[3] = w7
[3] | 0x01000000;
8828 static void append_0x02_1 (u32x w0
[4], const u32 offset
)
8837 w0
[0] = w0
[0] | 0x0200;
8841 w0
[0] = w0
[0] | 0x020000;
8845 w0
[0] = w0
[0] | 0x02000000;
8853 w0
[1] = w0
[1] | 0x0200;
8857 w0
[1] = w0
[1] | 0x020000;
8861 w0
[1] = w0
[1] | 0x02000000;
8869 w0
[2] = w0
[2] | 0x0200;
8873 w0
[2] = w0
[2] | 0x020000;
8877 w0
[2] = w0
[2] | 0x02000000;
8885 w0
[3] = w0
[3] | 0x0200;
8889 w0
[3] = w0
[3] | 0x020000;
8893 w0
[3] = w0
[3] | 0x02000000;
8898 static void append_0x02_2 (u32x w0
[4], u32x w1
[4], const u32 offset
)
8907 w0
[0] = w0
[0] | 0x0200;
8911 w0
[0] = w0
[0] | 0x020000;
8915 w0
[0] = w0
[0] | 0x02000000;
8923 w0
[1] = w0
[1] | 0x0200;
8927 w0
[1] = w0
[1] | 0x020000;
8931 w0
[1] = w0
[1] | 0x02000000;
8939 w0
[2] = w0
[2] | 0x0200;
8943 w0
[2] = w0
[2] | 0x020000;
8947 w0
[2] = w0
[2] | 0x02000000;
8955 w0
[3] = w0
[3] | 0x0200;
8959 w0
[3] = w0
[3] | 0x020000;
8963 w0
[3] = w0
[3] | 0x02000000;
8971 w1
[0] = w1
[0] | 0x0200;
8975 w1
[0] = w1
[0] | 0x020000;
8979 w1
[0] = w1
[0] | 0x02000000;
8987 w1
[1] = w1
[1] | 0x0200;
8991 w1
[1] = w1
[1] | 0x020000;
8995 w1
[1] = w1
[1] | 0x02000000;
9003 w1
[2] = w1
[2] | 0x0200;
9007 w1
[2] = w1
[2] | 0x020000;
9011 w1
[2] = w1
[2] | 0x02000000;
9019 w1
[3] = w1
[3] | 0x0200;
9023 w1
[3] = w1
[3] | 0x020000;
9027 w1
[3] = w1
[3] | 0x02000000;
9032 static void append_0x02_3 (u32x w0
[4], u32x w1
[4], u32x w2
[4], const u32 offset
)
9041 w0
[0] = w0
[0] | 0x0200;
9045 w0
[0] = w0
[0] | 0x020000;
9049 w0
[0] = w0
[0] | 0x02000000;
9057 w0
[1] = w0
[1] | 0x0200;
9061 w0
[1] = w0
[1] | 0x020000;
9065 w0
[1] = w0
[1] | 0x02000000;
9073 w0
[2] = w0
[2] | 0x0200;
9077 w0
[2] = w0
[2] | 0x020000;
9081 w0
[2] = w0
[2] | 0x02000000;
9089 w0
[3] = w0
[3] | 0x0200;
9093 w0
[3] = w0
[3] | 0x020000;
9097 w0
[3] = w0
[3] | 0x02000000;
9105 w1
[0] = w1
[0] | 0x0200;
9109 w1
[0] = w1
[0] | 0x020000;
9113 w1
[0] = w1
[0] | 0x02000000;
9121 w1
[1] = w1
[1] | 0x0200;
9125 w1
[1] = w1
[1] | 0x020000;
9129 w1
[1] = w1
[1] | 0x02000000;
9137 w1
[2] = w1
[2] | 0x0200;
9141 w1
[2] = w1
[2] | 0x020000;
9145 w1
[2] = w1
[2] | 0x02000000;
9153 w1
[3] = w1
[3] | 0x0200;
9157 w1
[3] = w1
[3] | 0x020000;
9161 w1
[3] = w1
[3] | 0x02000000;
9169 w2
[0] = w2
[0] | 0x0200;
9173 w2
[0] = w2
[0] | 0x020000;
9177 w2
[0] = w2
[0] | 0x02000000;
9185 w2
[1] = w2
[1] | 0x0200;
9189 w2
[1] = w2
[1] | 0x020000;
9193 w2
[1] = w2
[1] | 0x02000000;
9201 w2
[2] = w2
[2] | 0x0200;
9205 w2
[2] = w2
[2] | 0x020000;
9209 w2
[2] = w2
[2] | 0x02000000;
9217 w2
[3] = w2
[3] | 0x0200;
9221 w2
[3] = w2
[3] | 0x020000;
9225 w2
[3] = w2
[3] | 0x02000000;
9230 static void append_0x02_4 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], const u32 offset
)
9239 w0
[0] = w0
[0] | 0x0200;
9243 w0
[0] = w0
[0] | 0x020000;
9247 w0
[0] = w0
[0] | 0x02000000;
9255 w0
[1] = w0
[1] | 0x0200;
9259 w0
[1] = w0
[1] | 0x020000;
9263 w0
[1] = w0
[1] | 0x02000000;
9271 w0
[2] = w0
[2] | 0x0200;
9275 w0
[2] = w0
[2] | 0x020000;
9279 w0
[2] = w0
[2] | 0x02000000;
9287 w0
[3] = w0
[3] | 0x0200;
9291 w0
[3] = w0
[3] | 0x020000;
9295 w0
[3] = w0
[3] | 0x02000000;
9303 w1
[0] = w1
[0] | 0x0200;
9307 w1
[0] = w1
[0] | 0x020000;
9311 w1
[0] = w1
[0] | 0x02000000;
9319 w1
[1] = w1
[1] | 0x0200;
9323 w1
[1] = w1
[1] | 0x020000;
9327 w1
[1] = w1
[1] | 0x02000000;
9335 w1
[2] = w1
[2] | 0x0200;
9339 w1
[2] = w1
[2] | 0x020000;
9343 w1
[2] = w1
[2] | 0x02000000;
9351 w1
[3] = w1
[3] | 0x0200;
9355 w1
[3] = w1
[3] | 0x020000;
9359 w1
[3] = w1
[3] | 0x02000000;
9367 w2
[0] = w2
[0] | 0x0200;
9371 w2
[0] = w2
[0] | 0x020000;
9375 w2
[0] = w2
[0] | 0x02000000;
9383 w2
[1] = w2
[1] | 0x0200;
9387 w2
[1] = w2
[1] | 0x020000;
9391 w2
[1] = w2
[1] | 0x02000000;
9399 w2
[2] = w2
[2] | 0x0200;
9403 w2
[2] = w2
[2] | 0x020000;
9407 w2
[2] = w2
[2] | 0x02000000;
9415 w2
[3] = w2
[3] | 0x0200;
9419 w2
[3] = w2
[3] | 0x020000;
9423 w2
[3] = w2
[3] | 0x02000000;
9431 w3
[0] = w3
[0] | 0x0200;
9435 w3
[0] = w3
[0] | 0x020000;
9439 w3
[0] = w3
[0] | 0x02000000;
9447 w3
[1] = w3
[1] | 0x0200;
9451 w3
[1] = w3
[1] | 0x020000;
9455 w3
[1] = w3
[1] | 0x02000000;
9463 w3
[2] = w3
[2] | 0x0200;
9467 w3
[2] = w3
[2] | 0x020000;
9471 w3
[2] = w3
[2] | 0x02000000;
9479 w3
[3] = w3
[3] | 0x0200;
9483 w3
[3] = w3
[3] | 0x020000;
9487 w3
[3] = w3
[3] | 0x02000000;
9492 static void append_0x02_8 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], u32x w4
[4], u32x w5
[4], u32x w6
[4], u32x w7
[4], const u32 offset
)
9501 w0
[0] = w0
[0] | 0x0200;
9505 w0
[0] = w0
[0] | 0x020000;
9509 w0
[0] = w0
[0] | 0x02000000;
9517 w0
[1] = w0
[1] | 0x0200;
9521 w0
[1] = w0
[1] | 0x020000;
9525 w0
[1] = w0
[1] | 0x02000000;
9533 w0
[2] = w0
[2] | 0x0200;
9537 w0
[2] = w0
[2] | 0x020000;
9541 w0
[2] = w0
[2] | 0x02000000;
9549 w0
[3] = w0
[3] | 0x0200;
9553 w0
[3] = w0
[3] | 0x020000;
9557 w0
[3] = w0
[3] | 0x02000000;
9565 w1
[0] = w1
[0] | 0x0200;
9569 w1
[0] = w1
[0] | 0x020000;
9573 w1
[0] = w1
[0] | 0x02000000;
9581 w1
[1] = w1
[1] | 0x0200;
9585 w1
[1] = w1
[1] | 0x020000;
9589 w1
[1] = w1
[1] | 0x02000000;
9597 w1
[2] = w1
[2] | 0x0200;
9601 w1
[2] = w1
[2] | 0x020000;
9605 w1
[2] = w1
[2] | 0x02000000;
9613 w1
[3] = w1
[3] | 0x0200;
9617 w1
[3] = w1
[3] | 0x020000;
9621 w1
[3] = w1
[3] | 0x02000000;
9629 w2
[0] = w2
[0] | 0x0200;
9633 w2
[0] = w2
[0] | 0x020000;
9637 w2
[0] = w2
[0] | 0x02000000;
9645 w2
[1] = w2
[1] | 0x0200;
9649 w2
[1] = w2
[1] | 0x020000;
9653 w2
[1] = w2
[1] | 0x02000000;
9661 w2
[2] = w2
[2] | 0x0200;
9665 w2
[2] = w2
[2] | 0x020000;
9669 w2
[2] = w2
[2] | 0x02000000;
9677 w2
[3] = w2
[3] | 0x0200;
9681 w2
[3] = w2
[3] | 0x020000;
9685 w2
[3] = w2
[3] | 0x02000000;
9693 w3
[0] = w3
[0] | 0x0200;
9697 w3
[0] = w3
[0] | 0x020000;
9701 w3
[0] = w3
[0] | 0x02000000;
9709 w3
[1] = w3
[1] | 0x0200;
9713 w3
[1] = w3
[1] | 0x020000;
9717 w3
[1] = w3
[1] | 0x02000000;
9725 w3
[2] = w3
[2] | 0x0200;
9729 w3
[2] = w3
[2] | 0x020000;
9733 w3
[2] = w3
[2] | 0x02000000;
9741 w3
[3] = w3
[3] | 0x0200;
9745 w3
[3] = w3
[3] | 0x020000;
9749 w3
[3] = w3
[3] | 0x02000000;
9757 w4
[0] = w4
[0] | 0x0200;
9761 w4
[0] = w4
[0] | 0x020000;
9765 w4
[0] = w4
[0] | 0x02000000;
9773 w4
[1] = w4
[1] | 0x0200;
9777 w4
[1] = w4
[1] | 0x020000;
9781 w4
[1] = w4
[1] | 0x02000000;
9789 w4
[2] = w4
[2] | 0x0200;
9793 w4
[2] = w4
[2] | 0x020000;
9797 w4
[2] = w4
[2] | 0x02000000;
9805 w4
[3] = w4
[3] | 0x0200;
9809 w4
[3] = w4
[3] | 0x020000;
9813 w4
[3] = w4
[3] | 0x02000000;
9821 w5
[0] = w5
[0] | 0x0200;
9825 w5
[0] = w5
[0] | 0x020000;
9829 w5
[0] = w5
[0] | 0x02000000;
9837 w5
[1] = w5
[1] | 0x0200;
9841 w5
[1] = w5
[1] | 0x020000;
9845 w5
[1] = w5
[1] | 0x02000000;
9853 w5
[2] = w5
[2] | 0x0200;
9857 w5
[2] = w5
[2] | 0x020000;
9861 w5
[2] = w5
[2] | 0x02000000;
9869 w5
[3] = w5
[3] | 0x0200;
9873 w5
[3] = w5
[3] | 0x020000;
9877 w5
[3] = w5
[3] | 0x02000000;
9885 w6
[0] = w6
[0] | 0x0200;
9889 w6
[0] = w6
[0] | 0x020000;
9893 w6
[0] = w6
[0] | 0x02000000;
9901 w6
[1] = w6
[1] | 0x0200;
9905 w6
[1] = w6
[1] | 0x020000;
9909 w6
[1] = w6
[1] | 0x02000000;
9917 w6
[2] = w6
[2] | 0x0200;
9921 w6
[2] = w6
[2] | 0x020000;
9925 w6
[2] = w6
[2] | 0x02000000;
9933 w6
[3] = w6
[3] | 0x0200;
9937 w6
[3] = w6
[3] | 0x020000;
9941 w6
[3] = w6
[3] | 0x02000000;
9949 w7
[0] = w7
[0] | 0x0200;
9953 w7
[0] = w7
[0] | 0x020000;
9957 w7
[0] = w7
[0] | 0x02000000;
9965 w7
[1] = w7
[1] | 0x0200;
9969 w7
[1] = w7
[1] | 0x020000;
9973 w7
[1] = w7
[1] | 0x02000000;
9981 w7
[2] = w7
[2] | 0x0200;
9985 w7
[2] = w7
[2] | 0x020000;
9989 w7
[2] = w7
[2] | 0x02000000;
9997 w7
[3] = w7
[3] | 0x0200;
10001 w7
[3] = w7
[3] | 0x020000;
10005 w7
[3] = w7
[3] | 0x02000000;
10010 static void append_0x80_1 (u32x w0
[4], const u32 offset
)
10019 w0
[0] = w0
[0] | 0x8000;
10023 w0
[0] = w0
[0] | 0x800000;
10027 w0
[0] = w0
[0] | 0x80000000;
10035 w0
[1] = w0
[1] | 0x8000;
10039 w0
[1] = w0
[1] | 0x800000;
10043 w0
[1] = w0
[1] | 0x80000000;
10051 w0
[2] = w0
[2] | 0x8000;
10055 w0
[2] = w0
[2] | 0x800000;
10059 w0
[2] = w0
[2] | 0x80000000;
10067 w0
[3] = w0
[3] | 0x8000;
10071 w0
[3] = w0
[3] | 0x800000;
10075 w0
[3] = w0
[3] | 0x80000000;
10080 static void append_0x80_2 (u32x w0
[4], u32x w1
[4], const u32 offset
)
10089 w0
[0] = w0
[0] | 0x8000;
10093 w0
[0] = w0
[0] | 0x800000;
10097 w0
[0] = w0
[0] | 0x80000000;
10105 w0
[1] = w0
[1] | 0x8000;
10109 w0
[1] = w0
[1] | 0x800000;
10113 w0
[1] = w0
[1] | 0x80000000;
10121 w0
[2] = w0
[2] | 0x8000;
10125 w0
[2] = w0
[2] | 0x800000;
10129 w0
[2] = w0
[2] | 0x80000000;
10137 w0
[3] = w0
[3] | 0x8000;
10141 w0
[3] = w0
[3] | 0x800000;
10145 w0
[3] = w0
[3] | 0x80000000;
10153 w1
[0] = w1
[0] | 0x8000;
10157 w1
[0] = w1
[0] | 0x800000;
10161 w1
[0] = w1
[0] | 0x80000000;
10169 w1
[1] = w1
[1] | 0x8000;
10173 w1
[1] = w1
[1] | 0x800000;
10177 w1
[1] = w1
[1] | 0x80000000;
10185 w1
[2] = w1
[2] | 0x8000;
10189 w1
[2] = w1
[2] | 0x800000;
10193 w1
[2] = w1
[2] | 0x80000000;
10201 w1
[3] = w1
[3] | 0x8000;
10205 w1
[3] = w1
[3] | 0x800000;
10209 w1
[3] = w1
[3] | 0x80000000;
10214 static void append_0x80_2_be (u32x w0
[4], u32x w1
[4], const u32 offset
)
10219 w0
[0] |= 0x80000000;
10235 w0
[1] |= 0x80000000;
10251 w0
[2] |= 0x80000000;
10267 w0
[3] |= 0x80000000;
10283 w1
[0] |= 0x80000000;
10299 w1
[1] |= 0x80000000;
10315 w1
[2] |= 0x80000000;
10331 w1
[3] |= 0x80000000;
10348 static void append_0x80_3 (u32x w0
[4], u32x w1
[4], u32x w2
[4], const u32 offset
)
10357 w0
[0] = w0
[0] | 0x8000;
10361 w0
[0] = w0
[0] | 0x800000;
10365 w0
[0] = w0
[0] | 0x80000000;
10373 w0
[1] = w0
[1] | 0x8000;
10377 w0
[1] = w0
[1] | 0x800000;
10381 w0
[1] = w0
[1] | 0x80000000;
10389 w0
[2] = w0
[2] | 0x8000;
10393 w0
[2] = w0
[2] | 0x800000;
10397 w0
[2] = w0
[2] | 0x80000000;
10405 w0
[3] = w0
[3] | 0x8000;
10409 w0
[3] = w0
[3] | 0x800000;
10413 w0
[3] = w0
[3] | 0x80000000;
10421 w1
[0] = w1
[0] | 0x8000;
10425 w1
[0] = w1
[0] | 0x800000;
10429 w1
[0] = w1
[0] | 0x80000000;
10437 w1
[1] = w1
[1] | 0x8000;
10441 w1
[1] = w1
[1] | 0x800000;
10445 w1
[1] = w1
[1] | 0x80000000;
10453 w1
[2] = w1
[2] | 0x8000;
10457 w1
[2] = w1
[2] | 0x800000;
10461 w1
[2] = w1
[2] | 0x80000000;
10469 w1
[3] = w1
[3] | 0x8000;
10473 w1
[3] = w1
[3] | 0x800000;
10477 w1
[3] = w1
[3] | 0x80000000;
10485 w2
[0] = w2
[0] | 0x8000;
10489 w2
[0] = w2
[0] | 0x800000;
10493 w2
[0] = w2
[0] | 0x80000000;
10501 w2
[1] = w2
[1] | 0x8000;
10505 w2
[1] = w2
[1] | 0x800000;
10509 w2
[1] = w2
[1] | 0x80000000;
10517 w2
[2] = w2
[2] | 0x8000;
10521 w2
[2] = w2
[2] | 0x800000;
10525 w2
[2] = w2
[2] | 0x80000000;
10533 w2
[3] = w2
[3] | 0x8000;
10537 w2
[3] = w2
[3] | 0x800000;
10541 w2
[3] = w2
[3] | 0x80000000;
10546 static void append_0x80_4 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], const u32 offset
)
10555 w0
[0] = w0
[0] | 0x8000;
10559 w0
[0] = w0
[0] | 0x800000;
10563 w0
[0] = w0
[0] | 0x80000000;
10571 w0
[1] = w0
[1] | 0x8000;
10575 w0
[1] = w0
[1] | 0x800000;
10579 w0
[1] = w0
[1] | 0x80000000;
10587 w0
[2] = w0
[2] | 0x8000;
10591 w0
[2] = w0
[2] | 0x800000;
10595 w0
[2] = w0
[2] | 0x80000000;
10603 w0
[3] = w0
[3] | 0x8000;
10607 w0
[3] = w0
[3] | 0x800000;
10611 w0
[3] = w0
[3] | 0x80000000;
10619 w1
[0] = w1
[0] | 0x8000;
10623 w1
[0] = w1
[0] | 0x800000;
10627 w1
[0] = w1
[0] | 0x80000000;
10635 w1
[1] = w1
[1] | 0x8000;
10639 w1
[1] = w1
[1] | 0x800000;
10643 w1
[1] = w1
[1] | 0x80000000;
10651 w1
[2] = w1
[2] | 0x8000;
10655 w1
[2] = w1
[2] | 0x800000;
10659 w1
[2] = w1
[2] | 0x80000000;
10667 w1
[3] = w1
[3] | 0x8000;
10671 w1
[3] = w1
[3] | 0x800000;
10675 w1
[3] = w1
[3] | 0x80000000;
10683 w2
[0] = w2
[0] | 0x8000;
10687 w2
[0] = w2
[0] | 0x800000;
10691 w2
[0] = w2
[0] | 0x80000000;
10699 w2
[1] = w2
[1] | 0x8000;
10703 w2
[1] = w2
[1] | 0x800000;
10707 w2
[1] = w2
[1] | 0x80000000;
10715 w2
[2] = w2
[2] | 0x8000;
10719 w2
[2] = w2
[2] | 0x800000;
10723 w2
[2] = w2
[2] | 0x80000000;
10731 w2
[3] = w2
[3] | 0x8000;
10735 w2
[3] = w2
[3] | 0x800000;
10739 w2
[3] = w2
[3] | 0x80000000;
10747 w3
[0] = w3
[0] | 0x8000;
10751 w3
[0] = w3
[0] | 0x800000;
10755 w3
[0] = w3
[0] | 0x80000000;
10763 w3
[1] = w3
[1] | 0x8000;
10767 w3
[1] = w3
[1] | 0x800000;
10771 w3
[1] = w3
[1] | 0x80000000;
10779 w3
[2] = w3
[2] | 0x8000;
10783 w3
[2] = w3
[2] | 0x800000;
10787 w3
[2] = w3
[2] | 0x80000000;
10795 w3
[3] = w3
[3] | 0x8000;
10799 w3
[3] = w3
[3] | 0x800000;
10803 w3
[3] = w3
[3] | 0x80000000;
10808 static void append_0x80_8 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], u32x w4
[4], u32x w5
[4], u32x w6
[4], u32x w7
[4], const u32 offset
)
10817 w0
[0] = w0
[0] | 0x8000;
10821 w0
[0] = w0
[0] | 0x800000;
10825 w0
[0] = w0
[0] | 0x80000000;
10833 w0
[1] = w0
[1] | 0x8000;
10837 w0
[1] = w0
[1] | 0x800000;
10841 w0
[1] = w0
[1] | 0x80000000;
10849 w0
[2] = w0
[2] | 0x8000;
10853 w0
[2] = w0
[2] | 0x800000;
10857 w0
[2] = w0
[2] | 0x80000000;
10865 w0
[3] = w0
[3] | 0x8000;
10869 w0
[3] = w0
[3] | 0x800000;
10873 w0
[3] = w0
[3] | 0x80000000;
10881 w1
[0] = w1
[0] | 0x8000;
10885 w1
[0] = w1
[0] | 0x800000;
10889 w1
[0] = w1
[0] | 0x80000000;
10897 w1
[1] = w1
[1] | 0x8000;
10901 w1
[1] = w1
[1] | 0x800000;
10905 w1
[1] = w1
[1] | 0x80000000;
10913 w1
[2] = w1
[2] | 0x8000;
10917 w1
[2] = w1
[2] | 0x800000;
10921 w1
[2] = w1
[2] | 0x80000000;
10929 w1
[3] = w1
[3] | 0x8000;
10933 w1
[3] = w1
[3] | 0x800000;
10937 w1
[3] = w1
[3] | 0x80000000;
10945 w2
[0] = w2
[0] | 0x8000;
10949 w2
[0] = w2
[0] | 0x800000;
10953 w2
[0] = w2
[0] | 0x80000000;
10961 w2
[1] = w2
[1] | 0x8000;
10965 w2
[1] = w2
[1] | 0x800000;
10969 w2
[1] = w2
[1] | 0x80000000;
10977 w2
[2] = w2
[2] | 0x8000;
10981 w2
[2] = w2
[2] | 0x800000;
10985 w2
[2] = w2
[2] | 0x80000000;
10993 w2
[3] = w2
[3] | 0x8000;
10997 w2
[3] = w2
[3] | 0x800000;
11001 w2
[3] = w2
[3] | 0x80000000;
11009 w3
[0] = w3
[0] | 0x8000;
11013 w3
[0] = w3
[0] | 0x800000;
11017 w3
[0] = w3
[0] | 0x80000000;
11025 w3
[1] = w3
[1] | 0x8000;
11029 w3
[1] = w3
[1] | 0x800000;
11033 w3
[1] = w3
[1] | 0x80000000;
11041 w3
[2] = w3
[2] | 0x8000;
11045 w3
[2] = w3
[2] | 0x800000;
11049 w3
[2] = w3
[2] | 0x80000000;
11057 w3
[3] = w3
[3] | 0x8000;
11061 w3
[3] = w3
[3] | 0x800000;
11065 w3
[3] = w3
[3] | 0x80000000;
11073 w4
[0] = w4
[0] | 0x8000;
11077 w4
[0] = w4
[0] | 0x800000;
11081 w4
[0] = w4
[0] | 0x80000000;
11089 w4
[1] = w4
[1] | 0x8000;
11093 w4
[1] = w4
[1] | 0x800000;
11097 w4
[1] = w4
[1] | 0x80000000;
11105 w4
[2] = w4
[2] | 0x8000;
11109 w4
[2] = w4
[2] | 0x800000;
11113 w4
[2] = w4
[2] | 0x80000000;
11121 w4
[3] = w4
[3] | 0x8000;
11125 w4
[3] = w4
[3] | 0x800000;
11129 w4
[3] = w4
[3] | 0x80000000;
11137 w5
[0] = w5
[0] | 0x8000;
11141 w5
[0] = w5
[0] | 0x800000;
11145 w5
[0] = w5
[0] | 0x80000000;
11153 w5
[1] = w5
[1] | 0x8000;
11157 w5
[1] = w5
[1] | 0x800000;
11161 w5
[1] = w5
[1] | 0x80000000;
11169 w5
[2] = w5
[2] | 0x8000;
11173 w5
[2] = w5
[2] | 0x800000;
11177 w5
[2] = w5
[2] | 0x80000000;
11185 w5
[3] = w5
[3] | 0x8000;
11189 w5
[3] = w5
[3] | 0x800000;
11193 w5
[3] = w5
[3] | 0x80000000;
11201 w6
[0] = w6
[0] | 0x8000;
11205 w6
[0] = w6
[0] | 0x800000;
11209 w6
[0] = w6
[0] | 0x80000000;
11217 w6
[1] = w6
[1] | 0x8000;
11221 w6
[1] = w6
[1] | 0x800000;
11225 w6
[1] = w6
[1] | 0x80000000;
11233 w6
[2] = w6
[2] | 0x8000;
11237 w6
[2] = w6
[2] | 0x800000;
11241 w6
[2] = w6
[2] | 0x80000000;
11249 w6
[3] = w6
[3] | 0x8000;
11253 w6
[3] = w6
[3] | 0x800000;
11257 w6
[3] = w6
[3] | 0x80000000;
11265 w7
[0] = w7
[0] | 0x8000;
11269 w7
[0] = w7
[0] | 0x800000;
11273 w7
[0] = w7
[0] | 0x80000000;
11281 w7
[1] = w7
[1] | 0x8000;
11285 w7
[1] = w7
[1] | 0x800000;
11289 w7
[1] = w7
[1] | 0x80000000;
11297 w7
[2] = w7
[2] | 0x8000;
11301 w7
[2] = w7
[2] | 0x800000;
11305 w7
[2] = w7
[2] | 0x80000000;
11313 w7
[3] = w7
[3] | 0x8000;
11317 w7
[3] = w7
[3] | 0x800000;
11321 w7
[3] = w7
[3] | 0x80000000;
11326 static void append_0x80_4 (u32x w
[16], const u32 offset
)
11335 w
[ 0] = w
[ 0] | 0x8000;
11339 w
[ 0] = w
[ 0] | 0x800000;
11343 w
[ 0] = w
[ 0] | 0x80000000;
11351 w
[ 1] = w
[ 1] | 0x8000;
11355 w
[ 1] = w
[ 1] | 0x800000;
11359 w
[ 1] = w
[ 1] | 0x80000000;
11367 w
[ 2] = w
[ 2] | 0x8000;
11371 w
[ 2] = w
[ 2] | 0x800000;
11375 w
[ 2] = w
[ 2] | 0x80000000;
11383 w
[ 3] = w
[ 3] | 0x8000;
11387 w
[ 3] = w
[ 3] | 0x800000;
11391 w
[ 3] = w
[ 3] | 0x80000000;
11399 w
[ 4] = w
[ 4] | 0x8000;
11403 w
[ 4] = w
[ 4] | 0x800000;
11407 w
[ 4] = w
[ 4] | 0x80000000;
11415 w
[ 5] = w
[ 5] | 0x8000;
11419 w
[ 5] = w
[ 5] | 0x800000;
11423 w
[ 5] = w
[ 5] | 0x80000000;
11431 w
[ 6] = w
[ 6] | 0x8000;
11435 w
[ 6] = w
[ 6] | 0x800000;
11439 w
[ 6] = w
[ 6] | 0x80000000;
11447 w
[ 7] = w
[ 7] | 0x8000;
11451 w
[ 7] = w
[ 7] | 0x800000;
11455 w
[ 7] = w
[ 7] | 0x80000000;
11463 w
[ 8] = w
[ 8] | 0x8000;
11467 w
[ 8] = w
[ 8] | 0x800000;
11471 w
[ 8] = w
[ 8] | 0x80000000;
11479 w
[ 9] = w
[ 9] | 0x8000;
11483 w
[ 9] = w
[ 9] | 0x800000;
11487 w
[ 9] = w
[ 9] | 0x80000000;
11495 w
[10] = w
[10] | 0x8000;
11499 w
[10] = w
[10] | 0x800000;
11503 w
[10] = w
[10] | 0x80000000;
11511 w
[11] = w
[11] | 0x8000;
11515 w
[11] = w
[11] | 0x800000;
11519 w
[11] = w
[11] | 0x80000000;
11527 w
[12] = w
[12] | 0x8000;
11531 w
[12] = w
[12] | 0x800000;
11535 w
[12] = w
[12] | 0x80000000;
11543 w
[13] = w
[13] | 0x8000;
11547 w
[13] = w
[13] | 0x800000;
11551 w
[13] = w
[13] | 0x80000000;
11559 w
[14] = w
[14] | 0x8000;
11563 w
[14] = w
[14] | 0x800000;
11567 w
[14] = w
[14] | 0x80000000;
11575 w
[15] = w
[15] | 0x8000;
11579 w
[15] = w
[15] | 0x800000;
11583 w
[15] = w
[15] | 0x80000000;
11588 static void device_memcat2L (const u32 offset
, u32x dst0
[2], u32x src_l0
[2], u32 src_r0
[2])
11593 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
11594 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11598 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
11599 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11603 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
11604 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11608 dst0
[1] = src_r0
[0];
11612 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
11616 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
11620 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
11625 static void device_memcat2L (const u32 offset
, u32x dst0
[2], u32x src_l0
[2], u32x src_r0
[2])
11630 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
11631 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11635 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
11636 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11640 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
11641 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11645 dst0
[1] = src_r0
[0];
11649 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
11653 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
11657 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
11662 static void device_memcat4L (const u32 offset
, u32x dst0
[4], u32x src_l0
[4], u32 src_r0
[4])
11667 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
11668 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11669 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11670 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
11674 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
11675 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11676 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
11677 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
11681 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
11682 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11683 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
11684 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
11688 dst0
[1] = src_r0
[0];
11689 dst0
[2] = src_r0
[1];
11690 dst0
[3] = src_r0
[2];
11694 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
11695 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11696 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11700 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
11701 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11702 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
11706 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
11707 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11708 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
11712 dst0
[2] = src_r0
[0];
11713 dst0
[3] = src_r0
[1];
11717 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
11718 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11722 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
11723 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11727 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
11728 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11732 dst0
[3] = src_r0
[0];
11736 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
11740 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
11744 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
11749 static void device_memcat4L (const u32 offset
, u32x dst0
[4], u32x src_l0
[4], u32x src_r0
[4])
11754 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
11755 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11756 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11757 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
11761 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
11762 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11763 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
11764 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
11768 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
11769 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11770 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
11771 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
11775 dst0
[1] = src_r0
[0];
11776 dst0
[2] = src_r0
[1];
11777 dst0
[3] = src_r0
[2];
11781 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
11782 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11783 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11787 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
11788 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11789 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
11793 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
11794 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11795 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
11799 dst0
[2] = src_r0
[0];
11800 dst0
[3] = src_r0
[1];
11804 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
11805 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11809 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
11810 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11814 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
11815 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11819 dst0
[3] = src_r0
[0];
11823 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
11827 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
11831 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
11836 static void device_memcat8L (const u32 offset
, u32x dst0
[4], u32x dst1
[4], u32x src_l0
[4], u32x src_l1
[4], u32 src_r0
[4])
11841 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
11842 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11843 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11844 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
11845 dst1
[0] = src_r0
[3] >> 24;
11849 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
11850 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11851 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
11852 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
11853 dst1
[0] = src_r0
[3] >> 16;
11857 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
11858 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11859 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
11860 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
11861 dst1
[0] = src_r0
[3] >> 8;
11865 dst0
[1] = src_r0
[0];
11866 dst0
[2] = src_r0
[1];
11867 dst0
[3] = src_r0
[2];
11868 dst1
[0] = src_r0
[3];
11872 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
11873 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11874 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11875 dst1
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
11876 dst1
[1] = src_r0
[3] >> 24;
11880 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
11881 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11882 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
11883 dst1
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
11884 dst1
[1] = src_r0
[3] >> 16;
11888 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
11889 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11890 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
11891 dst1
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
11892 dst1
[1] = src_r0
[3] >> 8;
11896 dst0
[2] = src_r0
[0];
11897 dst0
[3] = src_r0
[1];
11898 dst1
[0] = src_r0
[2];
11899 dst1
[1] = src_r0
[3];
11903 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
11904 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11905 dst1
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11906 dst1
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
11907 dst1
[2] = src_r0
[3] >> 24;
11911 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
11912 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11913 dst1
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
11914 dst1
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
11915 dst1
[2] = src_r0
[3] >> 16;
11919 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
11920 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11921 dst1
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
11922 dst1
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
11923 dst1
[2] = src_r0
[3] >> 8;
11927 dst0
[3] = src_r0
[0];
11928 dst1
[0] = src_r0
[1];
11929 dst1
[1] = src_r0
[2];
11930 dst1
[2] = src_r0
[3];
11934 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
11935 dst1
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11936 dst1
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11937 dst1
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
11938 dst1
[3] = src_r0
[3] >> 24;
11942 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
11943 dst1
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11944 dst1
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
11945 dst1
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
11946 dst1
[3] = src_r0
[3] >> 16;
11950 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
11951 dst1
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11952 dst1
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
11953 dst1
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
11954 dst1
[3] = src_r0
[3] >> 8;
11958 dst1
[0] = src_r0
[0];
11959 dst1
[1] = src_r0
[1];
11960 dst1
[2] = src_r0
[2];
11961 dst1
[3] = src_r0
[3];
11965 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
11966 dst1
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11967 dst1
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11968 dst1
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
11972 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
11973 dst1
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
11974 dst1
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
11975 dst1
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
11979 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
11980 dst1
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
11981 dst1
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
11982 dst1
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
11986 dst1
[1] = src_r0
[0];
11987 dst1
[2] = src_r0
[1];
11988 dst1
[3] = src_r0
[2];
11992 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
11993 dst1
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
11994 dst1
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
11998 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
11999 dst1
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12000 dst1
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12004 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
12005 dst1
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12006 dst1
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12010 dst1
[2] = src_r0
[0];
12011 dst1
[3] = src_r0
[1];
12015 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
12016 dst1
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12020 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
12021 dst1
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12025 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
12026 dst1
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12030 dst1
[3] = src_r0
[0];
12034 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
12038 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
12042 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
12047 static void device_memcat8L (const u32 offset
, u32x dst0
[4], u32x dst1
[4], u32x src_l0
[4], u32x src_l1
[4], u32x src_r0
[4])
12052 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
12053 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12054 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12055 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12056 dst1
[0] = src_r0
[3] >> 24;
12060 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
12061 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12062 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12063 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12064 dst1
[0] = src_r0
[3] >> 16;
12068 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
12069 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12070 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12071 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12072 dst1
[0] = src_r0
[3] >> 8;
12076 dst0
[1] = src_r0
[0];
12077 dst0
[2] = src_r0
[1];
12078 dst0
[3] = src_r0
[2];
12079 dst1
[0] = src_r0
[3];
12083 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
12084 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12085 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12086 dst1
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12087 dst1
[1] = src_r0
[3] >> 24;
12091 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
12092 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12093 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12094 dst1
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12095 dst1
[1] = src_r0
[3] >> 16;
12099 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
12100 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12101 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12102 dst1
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12103 dst1
[1] = src_r0
[3] >> 8;
12107 dst0
[2] = src_r0
[0];
12108 dst0
[3] = src_r0
[1];
12109 dst1
[0] = src_r0
[2];
12110 dst1
[1] = src_r0
[3];
12114 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
12115 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12116 dst1
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12117 dst1
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12118 dst1
[2] = src_r0
[3] >> 24;
12122 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
12123 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12124 dst1
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12125 dst1
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12126 dst1
[2] = src_r0
[3] >> 16;
12130 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
12131 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12132 dst1
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12133 dst1
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12134 dst1
[2] = src_r0
[3] >> 8;
12138 dst0
[3] = src_r0
[0];
12139 dst1
[0] = src_r0
[1];
12140 dst1
[1] = src_r0
[2];
12141 dst1
[2] = src_r0
[3];
12145 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
12146 dst1
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12147 dst1
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12148 dst1
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12149 dst1
[3] = src_r0
[3] >> 24;
12153 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
12154 dst1
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12155 dst1
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12156 dst1
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12157 dst1
[3] = src_r0
[3] >> 16;
12161 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
12162 dst1
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12163 dst1
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12164 dst1
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12165 dst1
[3] = src_r0
[3] >> 8;
12169 dst1
[0] = src_r0
[0];
12170 dst1
[1] = src_r0
[1];
12171 dst1
[2] = src_r0
[2];
12172 dst1
[3] = src_r0
[3];
12176 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
12177 dst1
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12178 dst1
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12179 dst1
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12183 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
12184 dst1
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12185 dst1
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12186 dst1
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12190 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
12191 dst1
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12192 dst1
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12193 dst1
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12197 dst1
[1] = src_r0
[0];
12198 dst1
[2] = src_r0
[1];
12199 dst1
[3] = src_r0
[2];
12203 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
12204 dst1
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12205 dst1
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12209 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
12210 dst1
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12211 dst1
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12215 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
12216 dst1
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12217 dst1
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12221 dst1
[2] = src_r0
[0];
12222 dst1
[3] = src_r0
[1];
12226 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
12227 dst1
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12231 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
12232 dst1
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12236 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
12237 dst1
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12241 dst1
[3] = src_r0
[0];
12245 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
12249 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
12253 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
12258 static void device_memcat12L (const u32 offset
, u32x dst0
[4], u32x dst1
[4], u32x dst2
[4], u32x src_l0
[4], u32x src_l1
[4], u32x src_l2
[4], u32 src_r0
[4])
12263 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
12264 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12265 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12266 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12267 dst1
[0] = src_r0
[3] >> 24;
12271 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
12272 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12273 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12274 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12275 dst1
[0] = src_r0
[3] >> 16;
12279 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
12280 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12281 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12282 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12283 dst1
[0] = src_r0
[3] >> 8;
12287 dst0
[1] = src_r0
[0];
12288 dst0
[2] = src_r0
[1];
12289 dst0
[3] = src_r0
[2];
12290 dst1
[0] = src_r0
[3];
12294 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
12295 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12296 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12297 dst1
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12298 dst1
[1] = src_r0
[3] >> 24;
12302 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
12303 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12304 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12305 dst1
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12306 dst1
[1] = src_r0
[3] >> 16;
12310 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
12311 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12312 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12313 dst1
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12314 dst1
[1] = src_r0
[3] >> 8;
12318 dst0
[2] = src_r0
[0];
12319 dst0
[3] = src_r0
[1];
12320 dst1
[0] = src_r0
[2];
12321 dst1
[1] = src_r0
[3];
12325 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
12326 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12327 dst1
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12328 dst1
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12329 dst1
[2] = src_r0
[3] >> 24;
12333 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
12334 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12335 dst1
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12336 dst1
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12337 dst1
[2] = src_r0
[3] >> 16;
12341 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
12342 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12343 dst1
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12344 dst1
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12345 dst1
[2] = src_r0
[3] >> 8;
12349 dst0
[3] = src_r0
[0];
12350 dst1
[0] = src_r0
[1];
12351 dst1
[1] = src_r0
[2];
12352 dst1
[2] = src_r0
[3];
12356 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
12357 dst1
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12358 dst1
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12359 dst1
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12360 dst1
[3] = src_r0
[3] >> 24;
12364 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
12365 dst1
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12366 dst1
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12367 dst1
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12368 dst1
[3] = src_r0
[3] >> 16;
12372 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
12373 dst1
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12374 dst1
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12375 dst1
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12376 dst1
[3] = src_r0
[3] >> 8;
12380 dst1
[0] = src_r0
[0];
12381 dst1
[1] = src_r0
[1];
12382 dst1
[2] = src_r0
[2];
12383 dst1
[3] = src_r0
[3];
12387 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
12388 dst1
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12389 dst1
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12390 dst1
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12391 dst2
[0] = src_r0
[3] >> 24;
12395 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
12396 dst1
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12397 dst1
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12398 dst1
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12399 dst2
[0] = src_r0
[3] >> 16;
12403 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
12404 dst1
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12405 dst1
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12406 dst1
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12407 dst2
[0] = src_r0
[3] >> 8;
12411 dst1
[1] = src_r0
[0];
12412 dst1
[2] = src_r0
[1];
12413 dst1
[3] = src_r0
[2];
12414 dst2
[0] = src_r0
[3];
12418 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
12419 dst1
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12420 dst1
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12421 dst2
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12422 dst2
[1] = src_r0
[3] >> 24;
12426 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
12427 dst1
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12428 dst1
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12429 dst2
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12430 dst2
[1] = src_r0
[3] >> 16;
12434 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
12435 dst1
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12436 dst1
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12437 dst2
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12438 dst2
[1] = src_r0
[3] >> 8;
12442 dst1
[2] = src_r0
[0];
12443 dst1
[3] = src_r0
[1];
12444 dst2
[0] = src_r0
[2];
12445 dst2
[1] = src_r0
[3];
12449 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
12450 dst1
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12451 dst2
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12452 dst2
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12453 dst2
[2] = src_r0
[3] >> 24;
12457 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
12458 dst1
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12459 dst2
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12460 dst2
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12461 dst2
[2] = src_r0
[3] >> 16;
12465 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
12466 dst1
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12467 dst2
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12468 dst2
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12469 dst2
[2] = src_r0
[3] >> 8;
12473 dst1
[3] = src_r0
[0];
12474 dst2
[0] = src_r0
[1];
12475 dst2
[1] = src_r0
[2];
12476 dst2
[2] = src_r0
[3];
12480 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
12481 dst2
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12482 dst2
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12483 dst2
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12484 dst2
[3] = src_r0
[3] >> 24;
12488 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
12489 dst2
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12490 dst2
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12491 dst2
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12492 dst2
[3] = src_r0
[3] >> 16;
12496 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
12497 dst2
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12498 dst2
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12499 dst2
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12500 dst2
[3] = src_r0
[3] >> 8;
12504 dst2
[0] = src_r0
[0];
12505 dst2
[1] = src_r0
[1];
12506 dst2
[2] = src_r0
[2];
12507 dst2
[3] = src_r0
[3];
12511 dst2
[0] = src_l2
[0] | src_r0
[0] << 8;
12512 dst2
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12513 dst2
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12514 dst2
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12518 dst2
[0] = src_l2
[0] | src_r0
[0] << 16;
12519 dst2
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12520 dst2
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12521 dst2
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12525 dst2
[0] = src_l2
[0] | src_r0
[0] << 24;
12526 dst2
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12527 dst2
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12528 dst2
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12532 dst2
[1] = src_r0
[0];
12533 dst2
[2] = src_r0
[1];
12534 dst2
[3] = src_r0
[2];
12538 dst2
[1] = src_l2
[1] | src_r0
[0] << 8;
12539 dst2
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12540 dst2
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12544 dst2
[1] = src_l2
[1] | src_r0
[0] << 16;
12545 dst2
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12546 dst2
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12550 dst2
[1] = src_l2
[1] | src_r0
[0] << 24;
12551 dst2
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12552 dst2
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12556 dst2
[2] = src_r0
[0];
12557 dst2
[3] = src_r0
[1];
12561 dst2
[2] = src_l2
[2] | src_r0
[0] << 8;
12562 dst2
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12566 dst2
[2] = src_l2
[2] | src_r0
[0] << 16;
12567 dst2
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12571 dst2
[2] = src_l2
[2] | src_r0
[0] << 24;
12572 dst2
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12576 dst2
[3] = src_r0
[0];
12580 dst2
[3] = src_l2
[3] | src_r0
[0] << 8;
12584 dst2
[3] = src_l2
[3] | src_r0
[0] << 16;
12588 dst2
[3] = src_l2
[3] | src_r0
[0] << 24;
12593 static void device_memcat12L (const u32 offset
, u32x dst0
[4], u32x dst1
[4], u32x dst2
[4], u32x src_l0
[4], u32x src_l1
[4], u32x src_l2
[4], u32x src_r0
[4])
12598 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
12599 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12600 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12601 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12602 dst1
[0] = src_r0
[3] >> 24;
12606 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
12607 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12608 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12609 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12610 dst1
[0] = src_r0
[3] >> 16;
12614 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
12615 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12616 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12617 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12618 dst1
[0] = src_r0
[3] >> 8;
12622 dst0
[1] = src_r0
[0];
12623 dst0
[2] = src_r0
[1];
12624 dst0
[3] = src_r0
[2];
12625 dst1
[0] = src_r0
[3];
12629 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
12630 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12631 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12632 dst1
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12633 dst1
[1] = src_r0
[3] >> 24;
12637 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
12638 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12639 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12640 dst1
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12641 dst1
[1] = src_r0
[3] >> 16;
12645 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
12646 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12647 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12648 dst1
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12649 dst1
[1] = src_r0
[3] >> 8;
12653 dst0
[2] = src_r0
[0];
12654 dst0
[3] = src_r0
[1];
12655 dst1
[0] = src_r0
[2];
12656 dst1
[1] = src_r0
[3];
12660 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
12661 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12662 dst1
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12663 dst1
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12664 dst1
[2] = src_r0
[3] >> 24;
12668 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
12669 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12670 dst1
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12671 dst1
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12672 dst1
[2] = src_r0
[3] >> 16;
12676 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
12677 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12678 dst1
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12679 dst1
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12680 dst1
[2] = src_r0
[3] >> 8;
12684 dst0
[3] = src_r0
[0];
12685 dst1
[0] = src_r0
[1];
12686 dst1
[1] = src_r0
[2];
12687 dst1
[2] = src_r0
[3];
12691 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
12692 dst1
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12693 dst1
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12694 dst1
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12695 dst1
[3] = src_r0
[3] >> 24;
12699 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
12700 dst1
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12701 dst1
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12702 dst1
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12703 dst1
[3] = src_r0
[3] >> 16;
12707 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
12708 dst1
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12709 dst1
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12710 dst1
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12711 dst1
[3] = src_r0
[3] >> 8;
12715 dst1
[0] = src_r0
[0];
12716 dst1
[1] = src_r0
[1];
12717 dst1
[2] = src_r0
[2];
12718 dst1
[3] = src_r0
[3];
12722 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
12723 dst1
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12724 dst1
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12725 dst1
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12726 dst2
[0] = src_r0
[3] >> 24;
12730 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
12731 dst1
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12732 dst1
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12733 dst1
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12734 dst2
[0] = src_r0
[3] >> 16;
12738 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
12739 dst1
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12740 dst1
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12741 dst1
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12742 dst2
[0] = src_r0
[3] >> 8;
12746 dst1
[1] = src_r0
[0];
12747 dst1
[2] = src_r0
[1];
12748 dst1
[3] = src_r0
[2];
12749 dst2
[0] = src_r0
[3];
12753 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
12754 dst1
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12755 dst1
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12756 dst2
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12757 dst2
[1] = src_r0
[3] >> 24;
12761 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
12762 dst1
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12763 dst1
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12764 dst2
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12765 dst2
[1] = src_r0
[3] >> 16;
12769 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
12770 dst1
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12771 dst1
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12772 dst2
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12773 dst2
[1] = src_r0
[3] >> 8;
12777 dst1
[2] = src_r0
[0];
12778 dst1
[3] = src_r0
[1];
12779 dst2
[0] = src_r0
[2];
12780 dst2
[1] = src_r0
[3];
12784 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
12785 dst1
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12786 dst2
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12787 dst2
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12788 dst2
[2] = src_r0
[3] >> 24;
12792 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
12793 dst1
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12794 dst2
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12795 dst2
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12796 dst2
[2] = src_r0
[3] >> 16;
12800 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
12801 dst1
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12802 dst2
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12803 dst2
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12804 dst2
[2] = src_r0
[3] >> 8;
12808 dst1
[3] = src_r0
[0];
12809 dst2
[0] = src_r0
[1];
12810 dst2
[1] = src_r0
[2];
12811 dst2
[2] = src_r0
[3];
12815 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
12816 dst2
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12817 dst2
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12818 dst2
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12819 dst2
[3] = src_r0
[3] >> 24;
12823 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
12824 dst2
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12825 dst2
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12826 dst2
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12827 dst2
[3] = src_r0
[3] >> 16;
12831 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
12832 dst2
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12833 dst2
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12834 dst2
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12835 dst2
[3] = src_r0
[3] >> 8;
12839 dst2
[0] = src_r0
[0];
12840 dst2
[1] = src_r0
[1];
12841 dst2
[2] = src_r0
[2];
12842 dst2
[3] = src_r0
[3];
12846 dst2
[0] = src_l2
[0] | src_r0
[0] << 8;
12847 dst2
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12848 dst2
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12849 dst2
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12853 dst2
[0] = src_l2
[0] | src_r0
[0] << 16;
12854 dst2
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12855 dst2
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12856 dst2
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12860 dst2
[0] = src_l2
[0] | src_r0
[0] << 24;
12861 dst2
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12862 dst2
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12863 dst2
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12867 dst2
[1] = src_r0
[0];
12868 dst2
[2] = src_r0
[1];
12869 dst2
[3] = src_r0
[2];
12873 dst2
[1] = src_l2
[1] | src_r0
[0] << 8;
12874 dst2
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12875 dst2
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12879 dst2
[1] = src_l2
[1] | src_r0
[0] << 16;
12880 dst2
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12881 dst2
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12885 dst2
[1] = src_l2
[1] | src_r0
[0] << 24;
12886 dst2
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12887 dst2
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12891 dst2
[2] = src_r0
[0];
12892 dst2
[3] = src_r0
[1];
12896 dst2
[2] = src_l2
[2] | src_r0
[0] << 8;
12897 dst2
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12901 dst2
[2] = src_l2
[2] | src_r0
[0] << 16;
12902 dst2
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12906 dst2
[2] = src_l2
[2] | src_r0
[0] << 24;
12907 dst2
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12911 dst2
[3] = src_r0
[0];
12915 dst2
[3] = src_l2
[3] | src_r0
[0] << 8;
12919 dst2
[3] = src_l2
[3] | src_r0
[0] << 16;
12923 dst2
[3] = src_l2
[3] | src_r0
[0] << 24;
12928 static void device_memcat12L (const u32 offset
, u32x dst0
[4], u32x dst1
[4], u32x dst2
[4], u32x src_l0
[4], u32x src_l1
[4], u32x src_l2
[4], u32x src_r0
[4], u32x src_r1
[4])
12933 dst0
[0] = src_r0
[0];
12934 dst0
[1] = src_r0
[1];
12935 dst0
[2] = src_r0
[2];
12936 dst0
[3] = src_r0
[3];
12937 dst1
[0] = src_r1
[0];
12938 dst1
[1] = src_r1
[1];
12939 dst1
[2] = src_r1
[2];
12940 dst1
[3] = src_r1
[3];
12944 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
12945 dst0
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12946 dst0
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12947 dst0
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12948 dst1
[0] = src_r0
[3] >> 24 | src_r1
[0] << 8;
12949 dst1
[1] = src_r1
[0] >> 24 | src_r1
[1] << 8;
12950 dst1
[2] = src_r1
[1] >> 24 | src_r1
[2] << 8;
12951 dst1
[3] = src_r1
[2] >> 24 | src_r1
[3] << 8;
12952 dst2
[0] = src_r1
[3] >> 24;
12956 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
12957 dst0
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
12958 dst0
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
12959 dst0
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
12960 dst1
[0] = src_r0
[3] >> 16 | src_r1
[0] << 16;
12961 dst1
[1] = src_r1
[0] >> 16 | src_r1
[1] << 16;
12962 dst1
[2] = src_r1
[1] >> 16 | src_r1
[2] << 16;
12963 dst1
[3] = src_r1
[2] >> 16 | src_r1
[3] << 16;
12964 dst2
[0] = src_r1
[3] >> 16;
12968 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
12969 dst0
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
12970 dst0
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
12971 dst0
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
12972 dst1
[0] = src_r0
[3] >> 8 | src_r1
[0] << 24;
12973 dst1
[1] = src_r1
[0] >> 8 | src_r1
[1] << 24;
12974 dst1
[2] = src_r1
[1] >> 8 | src_r1
[2] << 24;
12975 dst1
[3] = src_r1
[2] >> 8 | src_r1
[3] << 24;
12976 dst2
[0] = src_r1
[3] >> 8;
12980 dst0
[1] = src_r0
[0];
12981 dst0
[2] = src_r0
[1];
12982 dst0
[3] = src_r0
[2];
12983 dst1
[0] = src_r0
[3];
12984 dst1
[1] = src_r1
[0];
12985 dst1
[2] = src_r1
[1];
12986 dst1
[3] = src_r1
[2];
12987 dst2
[0] = src_r1
[3];
12991 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
12992 dst0
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
12993 dst0
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
12994 dst1
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
12995 dst1
[1] = src_r0
[3] >> 24 | src_r1
[0] << 8;
12996 dst1
[2] = src_r1
[0] >> 24 | src_r1
[1] << 8;
12997 dst1
[3] = src_r1
[1] >> 24 | src_r1
[2] << 8;
12998 dst2
[0] = src_r1
[2] >> 24 | src_r1
[3] << 8;
12999 dst2
[1] = src_r1
[3] >> 24;
13003 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
13004 dst0
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13005 dst0
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
13006 dst1
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
13007 dst1
[1] = src_r0
[3] >> 16 | src_r1
[0] << 16;
13008 dst1
[2] = src_r1
[0] >> 16 | src_r1
[1] << 16;
13009 dst1
[3] = src_r1
[1] >> 16 | src_r1
[2] << 16;
13010 dst2
[0] = src_r1
[2] >> 16 | src_r1
[3] << 16;
13011 dst2
[1] = src_r1
[3] >> 16;
13015 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
13016 dst0
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13017 dst0
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
13018 dst1
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
13019 dst1
[1] = src_r0
[3] >> 8 | src_r1
[0] << 24;
13020 dst1
[2] = src_r1
[0] >> 8 | src_r1
[1] << 24;
13021 dst1
[3] = src_r1
[1] >> 8 | src_r1
[2] << 24;
13022 dst2
[0] = src_r1
[2] >> 8 | src_r1
[3] << 24;
13023 dst2
[1] = src_r1
[3] >> 8;
13027 dst0
[2] = src_r0
[0];
13028 dst0
[3] = src_r0
[1];
13029 dst1
[0] = src_r0
[2];
13030 dst1
[1] = src_r0
[3];
13031 dst1
[2] = src_r1
[0];
13032 dst1
[3] = src_r1
[1];
13033 dst2
[0] = src_r1
[2];
13034 dst2
[1] = src_r1
[3];
13038 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
13039 dst0
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
13040 dst1
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
13041 dst1
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
13042 dst1
[2] = src_r0
[3] >> 24 | src_r1
[0] << 8;
13043 dst1
[3] = src_r1
[0] >> 24 | src_r1
[1] << 8;
13044 dst2
[0] = src_r1
[1] >> 24 | src_r1
[2] << 8;
13045 dst2
[1] = src_r1
[2] >> 24 | src_r1
[3] << 8;
13046 dst2
[2] = src_r1
[3] >> 24;
13050 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
13051 dst0
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13052 dst1
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
13053 dst1
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
13054 dst1
[2] = src_r0
[3] >> 16 | src_r1
[0] << 16;
13055 dst1
[3] = src_r1
[0] >> 16 | src_r1
[1] << 16;
13056 dst2
[0] = src_r1
[1] >> 16 | src_r1
[2] << 16;
13057 dst2
[1] = src_r1
[2] >> 16 | src_r1
[3] << 16;
13058 dst2
[2] = src_r1
[3] >> 16;
13062 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
13063 dst0
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13064 dst1
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
13065 dst1
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
13066 dst1
[2] = src_r0
[3] >> 8 | src_r1
[0] << 24;
13067 dst1
[3] = src_r1
[0] >> 8 | src_r1
[1] << 24;
13068 dst2
[0] = src_r1
[1] >> 8 | src_r1
[2] << 24;
13069 dst2
[1] = src_r1
[2] >> 8 | src_r1
[3] << 24;
13070 dst2
[2] = src_r1
[3] >> 8;
13074 dst0
[3] = src_r0
[0];
13075 dst1
[0] = src_r0
[1];
13076 dst1
[1] = src_r0
[2];
13077 dst1
[2] = src_r0
[3];
13078 dst1
[3] = src_r1
[0];
13079 dst2
[0] = src_r1
[1];
13080 dst2
[1] = src_r1
[2];
13081 dst2
[2] = src_r1
[3];
13085 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
13086 dst1
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
13087 dst1
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
13088 dst1
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
13089 dst1
[3] = src_r0
[3] >> 24 | src_r1
[0] << 8;
13090 dst2
[0] = src_r1
[0] >> 24 | src_r1
[1] << 8;
13091 dst2
[1] = src_r1
[1] >> 24 | src_r1
[2] << 8;
13092 dst2
[2] = src_r1
[2] >> 24 | src_r1
[3] << 8;
13093 dst2
[3] = src_r1
[3] >> 24;
13097 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
13098 dst1
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13099 dst1
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
13100 dst1
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
13101 dst1
[3] = src_r0
[3] >> 16 | src_r1
[0] << 16;
13102 dst2
[0] = src_r1
[0] >> 16 | src_r1
[1] << 16;
13103 dst2
[1] = src_r1
[1] >> 16 | src_r1
[2] << 16;
13104 dst2
[2] = src_r1
[2] >> 16 | src_r1
[3] << 16;
13105 dst2
[3] = src_r1
[3] >> 16;
13109 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
13110 dst1
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13111 dst1
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
13112 dst1
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
13113 dst1
[3] = src_r0
[3] >> 8 | src_r1
[0] << 24;
13114 dst2
[0] = src_r1
[0] >> 8 | src_r1
[1] << 24;
13115 dst2
[1] = src_r1
[1] >> 8 | src_r1
[2] << 24;
13116 dst2
[2] = src_r1
[2] >> 8 | src_r1
[3] << 24;
13117 dst2
[3] = src_r1
[3] >> 8;
13121 dst1
[0] = src_r0
[0];
13122 dst1
[1] = src_r0
[1];
13123 dst1
[2] = src_r0
[2];
13124 dst1
[3] = src_r0
[3];
13125 dst2
[0] = src_r1
[0];
13126 dst2
[1] = src_r1
[1];
13127 dst2
[2] = src_r1
[2];
13128 dst2
[3] = src_r1
[3];
13132 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
13133 dst1
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
13134 dst1
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
13135 dst1
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
13136 dst2
[0] = src_r0
[3] >> 24 | src_r1
[0] << 8;
13137 dst2
[1] = src_r1
[0] >> 24 | src_r1
[1] << 8;
13138 dst2
[2] = src_r1
[1] >> 24 | src_r1
[2] << 8;
13139 dst2
[3] = src_r1
[2] >> 24 | src_r1
[3] << 8;
13143 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
13144 dst1
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13145 dst1
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
13146 dst1
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
13147 dst2
[0] = src_r0
[3] >> 16 | src_r1
[0] << 16;
13148 dst2
[1] = src_r1
[0] >> 16 | src_r1
[1] << 16;
13149 dst2
[2] = src_r1
[1] >> 16 | src_r1
[2] << 16;
13150 dst2
[3] = src_r1
[2] >> 16 | src_r1
[3] << 16;
13154 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
13155 dst1
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13156 dst1
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
13157 dst1
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
13158 dst2
[0] = src_r0
[3] >> 8 | src_r1
[0] << 24;
13159 dst2
[1] = src_r1
[0] >> 8 | src_r1
[1] << 24;
13160 dst2
[2] = src_r1
[1] >> 8 | src_r1
[2] << 24;
13161 dst2
[3] = src_r1
[2] >> 8 | src_r1
[3] << 24;
13165 dst1
[1] = src_r1
[0];
13166 dst1
[2] = src_r0
[1];
13167 dst1
[3] = src_r0
[2];
13168 dst2
[0] = src_r0
[3];
13169 dst2
[1] = src_r1
[0];
13170 dst2
[2] = src_r1
[1];
13171 dst2
[3] = src_r1
[2];
13175 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
13176 dst1
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
13177 dst1
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
13178 dst2
[0] = src_r0
[2] >> 24 | src_r0
[3] << 8;
13179 dst2
[1] = src_r0
[3] >> 24 | src_r1
[0] << 8;
13180 dst2
[2] = src_r1
[0] >> 24 | src_r1
[1] << 8;
13181 dst2
[3] = src_r1
[1] >> 24 | src_r1
[2] << 8;
13185 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
13186 dst1
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13187 dst1
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
13188 dst2
[0] = src_r0
[2] >> 16 | src_r0
[3] << 16;
13189 dst2
[1] = src_r0
[3] >> 16 | src_r1
[0] << 16;
13190 dst2
[2] = src_r1
[0] >> 16 | src_r1
[1] << 16;
13191 dst2
[3] = src_r1
[1] >> 16 | src_r1
[2] << 16;
13195 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
13196 dst1
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13197 dst1
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
13198 dst2
[0] = src_r0
[2] >> 8 | src_r0
[3] << 24;
13199 dst2
[1] = src_r0
[3] >> 8 | src_r1
[0] << 24;
13200 dst2
[2] = src_r1
[0] >> 8 | src_r1
[1] << 24;
13201 dst2
[3] = src_r1
[1] >> 8 | src_r1
[2] << 24;
13205 dst1
[2] = src_r1
[0];
13206 dst1
[3] = src_r0
[1];
13207 dst2
[0] = src_r0
[2];
13208 dst2
[1] = src_r0
[3];
13209 dst2
[2] = src_r1
[0];
13210 dst2
[3] = src_r1
[1];
13214 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
13215 dst1
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
13216 dst2
[0] = src_r0
[1] >> 24 | src_r0
[2] << 8;
13217 dst2
[1] = src_r0
[2] >> 24 | src_r0
[3] << 8;
13218 dst2
[2] = src_r0
[3] >> 24 | src_r1
[0] << 8;
13219 dst2
[3] = src_r1
[0] >> 24 | src_r1
[1] << 8;
13223 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
13224 dst1
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13225 dst2
[0] = src_r0
[1] >> 16 | src_r0
[2] << 16;
13226 dst2
[1] = src_r0
[2] >> 16 | src_r0
[3] << 16;
13227 dst2
[2] = src_r0
[3] >> 16 | src_r1
[0] << 16;
13228 dst2
[3] = src_r1
[0] >> 16 | src_r1
[1] << 16;
13232 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
13233 dst1
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13234 dst2
[0] = src_r0
[1] >> 8 | src_r0
[2] << 24;
13235 dst2
[1] = src_r0
[2] >> 8 | src_r0
[3] << 24;
13236 dst2
[2] = src_r0
[3] >> 8 | src_r1
[0] << 24;
13237 dst2
[3] = src_r1
[0] >> 8 | src_r1
[1] << 24;
13241 dst1
[3] = src_r1
[0];
13242 dst2
[0] = src_r0
[1];
13243 dst2
[1] = src_r0
[2];
13244 dst2
[2] = src_r0
[3];
13245 dst2
[3] = src_r1
[0];
13249 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
13250 dst2
[0] = src_r0
[0] >> 24 | src_r0
[1] << 8;
13251 dst2
[1] = src_r0
[1] >> 24 | src_r0
[2] << 8;
13252 dst2
[2] = src_r0
[2] >> 24 | src_r0
[3] << 8;
13253 dst2
[3] = src_r0
[3] >> 24 | src_r1
[0] << 8;
13257 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
13258 dst2
[0] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13259 dst2
[1] = src_r0
[1] >> 16 | src_r0
[2] << 16;
13260 dst2
[2] = src_r0
[2] >> 16 | src_r0
[3] << 16;
13261 dst2
[3] = src_r0
[3] >> 16 | src_r1
[0] << 16;
13265 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
13266 dst2
[0] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13267 dst2
[1] = src_r0
[1] >> 8 | src_r0
[2] << 24;
13268 dst2
[2] = src_r0
[2] >> 8 | src_r0
[3] << 24;
13269 dst2
[3] = src_r0
[3] >> 8 | src_r1
[0] << 24;
13273 dst2
[0] = src_r0
[0];
13274 dst2
[1] = src_r0
[1];
13275 dst2
[2] = src_r0
[2];
13276 dst2
[3] = src_r0
[3];
13280 dst2
[0] = src_l2
[0] | src_r0
[0] << 8;
13281 dst2
[1] = src_r0
[0] >> 24 | src_r0
[1] << 8;
13282 dst2
[2] = src_r0
[1] >> 24 | src_r0
[2] << 8;
13283 dst2
[3] = src_r0
[2] >> 24 | src_r0
[3] << 8;
13287 dst2
[0] = src_l2
[0] | src_r0
[0] << 16;
13288 dst2
[1] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13289 dst2
[2] = src_r0
[1] >> 16 | src_r0
[2] << 16;
13290 dst2
[3] = src_r0
[2] >> 16 | src_r0
[3] << 16;
13294 dst2
[0] = src_l2
[0] | src_r0
[0] << 24;
13295 dst2
[1] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13296 dst2
[2] = src_r0
[1] >> 8 | src_r0
[2] << 24;
13297 dst2
[3] = src_r0
[2] >> 8 | src_r0
[3] << 24;
13301 dst2
[1] = src_r0
[0];
13302 dst2
[2] = src_r0
[1];
13303 dst2
[3] = src_r0
[2];
13307 dst2
[1] = src_l2
[1] | src_r0
[0] << 8;
13308 dst2
[2] = src_r0
[0] >> 24 | src_r0
[1] << 8;
13309 dst2
[3] = src_r0
[1] >> 24 | src_r0
[2] << 8;
13313 dst2
[1] = src_l2
[1] | src_r0
[0] << 16;
13314 dst2
[2] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13315 dst2
[3] = src_r0
[1] >> 16 | src_r0
[2] << 16;
13319 dst2
[1] = src_l2
[1] | src_r0
[0] << 24;
13320 dst2
[2] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13321 dst2
[3] = src_r0
[1] >> 8 | src_r0
[2] << 24;
13325 dst2
[2] = src_r0
[0];
13326 dst2
[3] = src_r0
[1];
13330 dst2
[2] = src_l2
[2] | src_r0
[0] << 8;
13331 dst2
[3] = src_r0
[0] >> 24 | src_r0
[1] << 8;
13335 dst2
[2] = src_l2
[2] | src_r0
[0] << 16;
13336 dst2
[3] = src_r0
[0] >> 16 | src_r0
[1] << 16;
13340 dst2
[2] = src_l2
[2] | src_r0
[0] << 24;
13341 dst2
[3] = src_r0
[0] >> 8 | src_r0
[1] << 24;
13345 dst2
[3] = src_r0
[0];
13349 dst2
[3] = src_l2
[3] | src_r0
[0] << 8;
13353 dst2
[3] = src_l2
[3] | src_r0
[0] << 16;
13357 dst2
[3] = src_l2
[3] | src_r0
[0] << 24;
13362 static void memcat16_9 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], const u32 append0
[4], const u32 append1
[4], const u32 append2
[4], const u32 offset
)
13367 w0
[0] = append0
[0];
13368 w0
[1] = append0
[1];
13369 w0
[2] = append0
[2];
13370 w0
[3] = append0
[3];
13371 w1
[0] = append1
[0];
13372 w1
[1] = append1
[1];
13373 w1
[2] = append1
[2];
13374 w1
[3] = append1
[3];
13375 w2
[0] = append2
[0];
13379 w0
[0] = w0
[0] | append0
[0] << 8;
13380 w0
[1] = append0
[0] >> 24 | append0
[1] << 8;
13381 w0
[2] = append0
[1] >> 24 | append0
[2] << 8;
13382 w0
[3] = append0
[2] >> 24 | append0
[3] << 8;
13383 w1
[0] = append0
[3] >> 24 | append1
[0] << 8;
13384 w1
[1] = append1
[0] >> 24 | append1
[1] << 8;
13385 w1
[2] = append1
[1] >> 24 | append1
[2] << 8;
13386 w1
[3] = append1
[2] >> 24 | append1
[3] << 8;
13387 w2
[0] = append1
[3] >> 24 | append2
[0] << 8;
13388 w2
[1] = append2
[0] >> 24;
13392 w0
[0] = w0
[0] | append0
[0] << 16;
13393 w0
[1] = append0
[0] >> 16 | append0
[1] << 16;
13394 w0
[2] = append0
[1] >> 16 | append0
[2] << 16;
13395 w0
[3] = append0
[2] >> 16 | append0
[3] << 16;
13396 w1
[0] = append0
[3] >> 16 | append1
[0] << 16;
13397 w1
[1] = append1
[0] >> 16 | append1
[1] << 16;
13398 w1
[2] = append1
[1] >> 16 | append1
[2] << 16;
13399 w1
[3] = append1
[2] >> 16 | append1
[3] << 16;
13400 w2
[0] = append1
[3] >> 16 | append2
[0] << 16;
13401 w2
[1] = append2
[0] >> 16;
13405 w0
[0] = w0
[0] | append0
[0] << 24;
13406 w0
[1] = append0
[0] >> 8 | append0
[1] << 24;
13407 w0
[2] = append0
[1] >> 8 | append0
[2] << 24;
13408 w0
[3] = append0
[2] >> 8 | append0
[3] << 24;
13409 w1
[0] = append0
[3] >> 8 | append1
[0] << 24;
13410 w1
[1] = append1
[0] >> 8 | append1
[1] << 24;
13411 w1
[2] = append1
[1] >> 8 | append1
[2] << 24;
13412 w1
[3] = append1
[2] >> 8 | append1
[3] << 24;
13413 w2
[0] = append1
[3] >> 8 | append2
[0] << 24;
13414 w2
[1] = append2
[0] >> 8;
13418 w0
[1] = append0
[0];
13419 w0
[2] = append0
[1];
13420 w0
[3] = append0
[2];
13421 w1
[0] = append0
[3];
13422 w1
[1] = append1
[0];
13423 w1
[2] = append1
[1];
13424 w1
[3] = append1
[2];
13425 w2
[0] = append1
[3];
13426 w2
[1] = append2
[0];
13430 w0
[1] = w0
[1] | append0
[0] << 8;
13431 w0
[2] = append0
[0] >> 24 | append0
[1] << 8;
13432 w0
[3] = append0
[1] >> 24 | append0
[2] << 8;
13433 w1
[0] = append0
[2] >> 24 | append0
[3] << 8;
13434 w1
[1] = append0
[3] >> 24 | append1
[0] << 8;
13435 w1
[2] = append1
[0] >> 24 | append1
[1] << 8;
13436 w1
[3] = append1
[1] >> 24 | append1
[2] << 8;
13437 w2
[0] = append1
[2] >> 24 | append1
[3] << 8;
13438 w2
[1] = append1
[3] >> 24 | append2
[0] << 8;
13439 w2
[2] = append2
[0] >> 24;
13443 w0
[1] = w0
[1] | append0
[0] << 16;
13444 w0
[2] = append0
[0] >> 16 | append0
[1] << 16;
13445 w0
[3] = append0
[1] >> 16 | append0
[2] << 16;
13446 w1
[0] = append0
[2] >> 16 | append0
[3] << 16;
13447 w1
[1] = append0
[3] >> 16 | append1
[0] << 16;
13448 w1
[2] = append1
[0] >> 16 | append1
[1] << 16;
13449 w1
[3] = append1
[1] >> 16 | append1
[2] << 16;
13450 w2
[0] = append1
[2] >> 16 | append1
[3] << 16;
13451 w2
[1] = append1
[3] >> 16 | append2
[0] << 16;
13452 w2
[2] = append2
[0] >> 16;
13456 w0
[1] = w0
[1] | append0
[0] << 24;
13457 w0
[2] = append0
[0] >> 8 | append0
[1] << 24;
13458 w0
[3] = append0
[1] >> 8 | append0
[2] << 24;
13459 w1
[0] = append0
[2] >> 8 | append0
[3] << 24;
13460 w1
[1] = append0
[3] >> 8 | append1
[0] << 24;
13461 w1
[2] = append1
[0] >> 8 | append1
[1] << 24;
13462 w1
[3] = append1
[1] >> 8 | append1
[2] << 24;
13463 w2
[0] = append1
[2] >> 8 | append1
[3] << 24;
13464 w2
[1] = append1
[3] >> 8 | append2
[0] << 24;
13465 w2
[2] = append2
[0] >> 8;
13469 w0
[2] = append0
[0];
13470 w0
[3] = append0
[1];
13471 w1
[0] = append0
[2];
13472 w1
[1] = append0
[3];
13473 w1
[2] = append1
[0];
13474 w1
[3] = append1
[1];
13475 w2
[0] = append1
[2];
13476 w2
[1] = append1
[3];
13477 w2
[2] = append2
[0];
13481 w0
[2] = w0
[2] | append0
[0] << 8;
13482 w0
[3] = append0
[0] >> 24 | append0
[1] << 8;
13483 w1
[0] = append0
[1] >> 24 | append0
[2] << 8;
13484 w1
[1] = append0
[2] >> 24 | append0
[3] << 8;
13485 w1
[2] = append0
[3] >> 24 | append1
[0] << 8;
13486 w1
[3] = append1
[0] >> 24 | append1
[1] << 8;
13487 w2
[0] = append1
[1] >> 24 | append1
[2] << 8;
13488 w2
[1] = append1
[2] >> 24 | append1
[3] << 8;
13489 w2
[2] = append1
[3] >> 24 | append2
[0] << 8;
13490 w2
[3] = append2
[0] >> 24;
13494 w0
[2] = w0
[2] | append0
[0] << 16;
13495 w0
[3] = append0
[0] >> 16 | append0
[1] << 16;
13496 w1
[0] = append0
[1] >> 16 | append0
[2] << 16;
13497 w1
[1] = append0
[2] >> 16 | append0
[3] << 16;
13498 w1
[2] = append0
[3] >> 16 | append1
[0] << 16;
13499 w1
[3] = append1
[0] >> 16 | append1
[1] << 16;
13500 w2
[0] = append1
[1] >> 16 | append1
[2] << 16;
13501 w2
[1] = append1
[2] >> 16 | append1
[3] << 16;
13502 w2
[2] = append1
[3] >> 16 | append2
[0] << 16;
13503 w2
[3] = append2
[0] >> 16;
13507 w0
[2] = w0
[2] | append0
[0] << 24;
13508 w0
[3] = append0
[0] >> 8 | append0
[1] << 24;
13509 w1
[0] = append0
[1] >> 8 | append0
[2] << 24;
13510 w1
[1] = append0
[2] >> 8 | append0
[3] << 24;
13511 w1
[2] = append0
[3] >> 8 | append1
[0] << 24;
13512 w1
[3] = append1
[0] >> 8 | append1
[1] << 24;
13513 w2
[0] = append1
[1] >> 8 | append1
[2] << 24;
13514 w2
[1] = append1
[2] >> 8 | append1
[3] << 24;
13515 w2
[2] = append1
[3] >> 8 | append2
[0] << 24;
13516 w2
[3] = append2
[0] >> 8;
13520 w0
[3] = append0
[0];
13521 w1
[0] = append0
[1];
13522 w1
[1] = append0
[2];
13523 w1
[2] = append0
[3];
13524 w1
[3] = append1
[0];
13525 w2
[0] = append1
[1];
13526 w2
[1] = append1
[2];
13527 w2
[2] = append1
[3];
13528 w2
[3] = append2
[0];
13532 w0
[3] = w0
[3] | append0
[0] << 8;
13533 w1
[0] = append0
[0] >> 24 | append0
[1] << 8;
13534 w1
[1] = append0
[1] >> 24 | append0
[2] << 8;
13535 w1
[2] = append0
[2] >> 24 | append0
[3] << 8;
13536 w1
[3] = append0
[3] >> 24 | append1
[0] << 8;
13537 w2
[0] = append1
[0] >> 24 | append1
[1] << 8;
13538 w2
[1] = append1
[1] >> 24 | append1
[2] << 8;
13539 w2
[2] = append1
[2] >> 24 | append1
[3] << 8;
13540 w2
[3] = append1
[3] >> 24 | append2
[0] << 8;
13541 w3
[0] = append2
[0] >> 24;
13545 w0
[3] = w0
[3] | append0
[0] << 16;
13546 w1
[0] = append0
[0] >> 16 | append0
[1] << 16;
13547 w1
[1] = append0
[1] >> 16 | append0
[2] << 16;
13548 w1
[2] = append0
[2] >> 16 | append0
[3] << 16;
13549 w1
[3] = append0
[3] >> 16 | append1
[0] << 16;
13550 w2
[0] = append1
[0] >> 16 | append1
[1] << 16;
13551 w2
[1] = append1
[1] >> 16 | append1
[2] << 16;
13552 w2
[2] = append1
[2] >> 16 | append1
[3] << 16;
13553 w2
[3] = append1
[3] >> 16 | append2
[0] << 16;
13554 w3
[0] = append2
[0] >> 16;
13558 w0
[3] = w0
[3] | append0
[0] << 24;
13559 w1
[0] = append0
[0] >> 8 | append0
[1] << 24;
13560 w1
[1] = append0
[1] >> 8 | append0
[2] << 24;
13561 w1
[2] = append0
[2] >> 8 | append0
[3] << 24;
13562 w1
[3] = append0
[3] >> 8 | append1
[0] << 24;
13563 w2
[0] = append1
[0] >> 8 | append1
[1] << 24;
13564 w2
[1] = append1
[1] >> 8 | append1
[2] << 24;
13565 w2
[2] = append1
[2] >> 8 | append1
[3] << 24;
13566 w2
[3] = append1
[3] >> 8 | append2
[0] << 24;
13567 w3
[0] = append2
[0] >> 8;
13572 static void memcat16_9 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], const u32x append0
[4], const u32x append1
[4], const u32x append2
[4], const u32 offset
)
13577 w0
[0] = append0
[0];
13578 w0
[1] = append0
[1];
13579 w0
[2] = append0
[2];
13580 w0
[3] = append0
[3];
13581 w1
[0] = append1
[0];
13582 w1
[1] = append1
[1];
13583 w1
[2] = append1
[2];
13584 w1
[3] = append1
[3];
13585 w2
[0] = append2
[0];
13589 w0
[0] = w0
[0] | append0
[0] << 8;
13590 w0
[1] = append0
[0] >> 24 | append0
[1] << 8;
13591 w0
[2] = append0
[1] >> 24 | append0
[2] << 8;
13592 w0
[3] = append0
[2] >> 24 | append0
[3] << 8;
13593 w1
[0] = append0
[3] >> 24 | append1
[0] << 8;
13594 w1
[1] = append1
[0] >> 24 | append1
[1] << 8;
13595 w1
[2] = append1
[1] >> 24 | append1
[2] << 8;
13596 w1
[3] = append1
[2] >> 24 | append1
[3] << 8;
13597 w2
[0] = append1
[3] >> 24 | append2
[0] << 8;
13598 w2
[1] = append2
[0] >> 24;
13602 w0
[0] = w0
[0] | append0
[0] << 16;
13603 w0
[1] = append0
[0] >> 16 | append0
[1] << 16;
13604 w0
[2] = append0
[1] >> 16 | append0
[2] << 16;
13605 w0
[3] = append0
[2] >> 16 | append0
[3] << 16;
13606 w1
[0] = append0
[3] >> 16 | append1
[0] << 16;
13607 w1
[1] = append1
[0] >> 16 | append1
[1] << 16;
13608 w1
[2] = append1
[1] >> 16 | append1
[2] << 16;
13609 w1
[3] = append1
[2] >> 16 | append1
[3] << 16;
13610 w2
[0] = append1
[3] >> 16 | append2
[0] << 16;
13611 w2
[1] = append2
[0] >> 16;
13615 w0
[0] = w0
[0] | append0
[0] << 24;
13616 w0
[1] = append0
[0] >> 8 | append0
[1] << 24;
13617 w0
[2] = append0
[1] >> 8 | append0
[2] << 24;
13618 w0
[3] = append0
[2] >> 8 | append0
[3] << 24;
13619 w1
[0] = append0
[3] >> 8 | append1
[0] << 24;
13620 w1
[1] = append1
[0] >> 8 | append1
[1] << 24;
13621 w1
[2] = append1
[1] >> 8 | append1
[2] << 24;
13622 w1
[3] = append1
[2] >> 8 | append1
[3] << 24;
13623 w2
[0] = append1
[3] >> 8 | append2
[0] << 24;
13624 w2
[1] = append2
[0] >> 8;
13628 w0
[1] = append0
[0];
13629 w0
[2] = append0
[1];
13630 w0
[3] = append0
[2];
13631 w1
[0] = append0
[3];
13632 w1
[1] = append1
[0];
13633 w1
[2] = append1
[1];
13634 w1
[3] = append1
[2];
13635 w2
[0] = append1
[3];
13636 w2
[1] = append2
[0];
13640 w0
[1] = w0
[1] | append0
[0] << 8;
13641 w0
[2] = append0
[0] >> 24 | append0
[1] << 8;
13642 w0
[3] = append0
[1] >> 24 | append0
[2] << 8;
13643 w1
[0] = append0
[2] >> 24 | append0
[3] << 8;
13644 w1
[1] = append0
[3] >> 24 | append1
[0] << 8;
13645 w1
[2] = append1
[0] >> 24 | append1
[1] << 8;
13646 w1
[3] = append1
[1] >> 24 | append1
[2] << 8;
13647 w2
[0] = append1
[2] >> 24 | append1
[3] << 8;
13648 w2
[1] = append1
[3] >> 24 | append2
[0] << 8;
13649 w2
[2] = append2
[0] >> 24;
13653 w0
[1] = w0
[1] | append0
[0] << 16;
13654 w0
[2] = append0
[0] >> 16 | append0
[1] << 16;
13655 w0
[3] = append0
[1] >> 16 | append0
[2] << 16;
13656 w1
[0] = append0
[2] >> 16 | append0
[3] << 16;
13657 w1
[1] = append0
[3] >> 16 | append1
[0] << 16;
13658 w1
[2] = append1
[0] >> 16 | append1
[1] << 16;
13659 w1
[3] = append1
[1] >> 16 | append1
[2] << 16;
13660 w2
[0] = append1
[2] >> 16 | append1
[3] << 16;
13661 w2
[1] = append1
[3] >> 16 | append2
[0] << 16;
13662 w2
[2] = append2
[0] >> 16;
13666 w0
[1] = w0
[1] | append0
[0] << 24;
13667 w0
[2] = append0
[0] >> 8 | append0
[1] << 24;
13668 w0
[3] = append0
[1] >> 8 | append0
[2] << 24;
13669 w1
[0] = append0
[2] >> 8 | append0
[3] << 24;
13670 w1
[1] = append0
[3] >> 8 | append1
[0] << 24;
13671 w1
[2] = append1
[0] >> 8 | append1
[1] << 24;
13672 w1
[3] = append1
[1] >> 8 | append1
[2] << 24;
13673 w2
[0] = append1
[2] >> 8 | append1
[3] << 24;
13674 w2
[1] = append1
[3] >> 8 | append2
[0] << 24;
13675 w2
[2] = append2
[0] >> 8;
13679 w0
[2] = append0
[0];
13680 w0
[3] = append0
[1];
13681 w1
[0] = append0
[2];
13682 w1
[1] = append0
[3];
13683 w1
[2] = append1
[0];
13684 w1
[3] = append1
[1];
13685 w2
[0] = append1
[2];
13686 w2
[1] = append1
[3];
13687 w2
[2] = append2
[0];
13691 w0
[2] = w0
[2] | append0
[0] << 8;
13692 w0
[3] = append0
[0] >> 24 | append0
[1] << 8;
13693 w1
[0] = append0
[1] >> 24 | append0
[2] << 8;
13694 w1
[1] = append0
[2] >> 24 | append0
[3] << 8;
13695 w1
[2] = append0
[3] >> 24 | append1
[0] << 8;
13696 w1
[3] = append1
[0] >> 24 | append1
[1] << 8;
13697 w2
[0] = append1
[1] >> 24 | append1
[2] << 8;
13698 w2
[1] = append1
[2] >> 24 | append1
[3] << 8;
13699 w2
[2] = append1
[3] >> 24 | append2
[0] << 8;
13700 w2
[3] = append2
[0] >> 24;
13704 w0
[2] = w0
[2] | append0
[0] << 16;
13705 w0
[3] = append0
[0] >> 16 | append0
[1] << 16;
13706 w1
[0] = append0
[1] >> 16 | append0
[2] << 16;
13707 w1
[1] = append0
[2] >> 16 | append0
[3] << 16;
13708 w1
[2] = append0
[3] >> 16 | append1
[0] << 16;
13709 w1
[3] = append1
[0] >> 16 | append1
[1] << 16;
13710 w2
[0] = append1
[1] >> 16 | append1
[2] << 16;
13711 w2
[1] = append1
[2] >> 16 | append1
[3] << 16;
13712 w2
[2] = append1
[3] >> 16 | append2
[0] << 16;
13713 w2
[3] = append2
[0] >> 16;
13717 w0
[2] = w0
[2] | append0
[0] << 24;
13718 w0
[3] = append0
[0] >> 8 | append0
[1] << 24;
13719 w1
[0] = append0
[1] >> 8 | append0
[2] << 24;
13720 w1
[1] = append0
[2] >> 8 | append0
[3] << 24;
13721 w1
[2] = append0
[3] >> 8 | append1
[0] << 24;
13722 w1
[3] = append1
[0] >> 8 | append1
[1] << 24;
13723 w2
[0] = append1
[1] >> 8 | append1
[2] << 24;
13724 w2
[1] = append1
[2] >> 8 | append1
[3] << 24;
13725 w2
[2] = append1
[3] >> 8 | append2
[0] << 24;
13726 w2
[3] = append2
[0] >> 8;
13730 w0
[3] = append0
[0];
13731 w1
[0] = append0
[1];
13732 w1
[1] = append0
[2];
13733 w1
[2] = append0
[3];
13734 w1
[3] = append1
[0];
13735 w2
[0] = append1
[1];
13736 w2
[1] = append1
[2];
13737 w2
[2] = append1
[3];
13738 w2
[3] = append2
[0];
13742 w0
[3] = w0
[3] | append0
[0] << 8;
13743 w1
[0] = append0
[0] >> 24 | append0
[1] << 8;
13744 w1
[1] = append0
[1] >> 24 | append0
[2] << 8;
13745 w1
[2] = append0
[2] >> 24 | append0
[3] << 8;
13746 w1
[3] = append0
[3] >> 24 | append1
[0] << 8;
13747 w2
[0] = append1
[0] >> 24 | append1
[1] << 8;
13748 w2
[1] = append1
[1] >> 24 | append1
[2] << 8;
13749 w2
[2] = append1
[2] >> 24 | append1
[3] << 8;
13750 w2
[3] = append1
[3] >> 24 | append2
[0] << 8;
13751 w3
[0] = append2
[0] >> 24;
13755 w0
[3] = w0
[3] | append0
[0] << 16;
13756 w1
[0] = append0
[0] >> 16 | append0
[1] << 16;
13757 w1
[1] = append0
[1] >> 16 | append0
[2] << 16;
13758 w1
[2] = append0
[2] >> 16 | append0
[3] << 16;
13759 w1
[3] = append0
[3] >> 16 | append1
[0] << 16;
13760 w2
[0] = append1
[0] >> 16 | append1
[1] << 16;
13761 w2
[1] = append1
[1] >> 16 | append1
[2] << 16;
13762 w2
[2] = append1
[2] >> 16 | append1
[3] << 16;
13763 w2
[3] = append1
[3] >> 16 | append2
[0] << 16;
13764 w3
[0] = append2
[0] >> 16;
13768 w0
[3] = w0
[3] | append0
[0] << 24;
13769 w1
[0] = append0
[0] >> 8 | append0
[1] << 24;
13770 w1
[1] = append0
[1] >> 8 | append0
[2] << 24;
13771 w1
[2] = append0
[2] >> 8 | append0
[3] << 24;
13772 w1
[3] = append0
[3] >> 8 | append1
[0] << 24;
13773 w2
[0] = append1
[0] >> 8 | append1
[1] << 24;
13774 w2
[1] = append1
[1] >> 8 | append1
[2] << 24;
13775 w2
[2] = append1
[2] >> 8 | append1
[3] << 24;
13776 w2
[3] = append1
[3] >> 8 | append2
[0] << 24;
13777 w3
[0] = append2
[0] >> 8;
13782 static void memcat32_8 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], const u32 append0
[4], const u32 append1
[4], const u32 offset
)
13787 w0
[0] = append0
[0];
13788 w0
[1] = append0
[1];
13789 w0
[2] = append0
[2];
13790 w0
[3] = append0
[3];
13791 w1
[0] = append1
[0];
13792 w1
[1] = append1
[1];
13793 w1
[2] = append1
[2];
13794 w1
[3] = append1
[3];
13798 w0
[0] = w0
[0] | append0
[0] << 8;
13799 w0
[1] = append0
[0] >> 24 | append0
[1] << 8;
13800 w0
[2] = append0
[1] >> 24 | append0
[2] << 8;
13801 w0
[3] = append0
[2] >> 24 | append0
[3] << 8;
13802 w1
[0] = append0
[3] >> 24 | append1
[0] << 8;
13803 w1
[1] = append1
[0] >> 24 | append1
[1] << 8;
13804 w1
[2] = append1
[1] >> 24 | append1
[2] << 8;
13805 w1
[3] = append1
[2] >> 24 | append1
[3] << 8;
13806 w2
[0] = append1
[3] >> 24;
13810 w0
[0] = w0
[0] | append0
[0] << 16;
13811 w0
[1] = append0
[0] >> 16 | append0
[1] << 16;
13812 w0
[2] = append0
[1] >> 16 | append0
[2] << 16;
13813 w0
[3] = append0
[2] >> 16 | append0
[3] << 16;
13814 w1
[0] = append0
[3] >> 16 | append1
[0] << 16;
13815 w1
[1] = append1
[0] >> 16 | append1
[1] << 16;
13816 w1
[2] = append1
[1] >> 16 | append1
[2] << 16;
13817 w1
[3] = append1
[2] >> 16 | append1
[3] << 16;
13818 w2
[0] = append1
[3] >> 16;
13822 w0
[0] = w0
[0] | append0
[0] << 24;
13823 w0
[1] = append0
[0] >> 8 | append0
[1] << 24;
13824 w0
[2] = append0
[1] >> 8 | append0
[2] << 24;
13825 w0
[3] = append0
[2] >> 8 | append0
[3] << 24;
13826 w1
[0] = append0
[3] >> 8 | append1
[0] << 24;
13827 w1
[1] = append1
[0] >> 8 | append1
[1] << 24;
13828 w1
[2] = append1
[1] >> 8 | append1
[2] << 24;
13829 w1
[3] = append1
[2] >> 8 | append1
[3] << 24;
13830 w2
[0] = append1
[3] >> 8;
13834 w0
[1] = append0
[0];
13835 w0
[2] = append0
[1];
13836 w0
[3] = append0
[2];
13837 w1
[0] = append0
[3];
13838 w1
[1] = append1
[0];
13839 w1
[2] = append1
[1];
13840 w1
[3] = append1
[2];
13841 w2
[0] = append1
[3];
13845 w0
[1] = w0
[1] | append0
[0] << 8;
13846 w0
[2] = append0
[0] >> 24 | append0
[1] << 8;
13847 w0
[3] = append0
[1] >> 24 | append0
[2] << 8;
13848 w1
[0] = append0
[2] >> 24 | append0
[3] << 8;
13849 w1
[1] = append0
[3] >> 24 | append1
[0] << 8;
13850 w1
[2] = append1
[0] >> 24 | append1
[1] << 8;
13851 w1
[3] = append1
[1] >> 24 | append1
[2] << 8;
13852 w2
[0] = append1
[2] >> 24 | append1
[3] << 8;
13853 w2
[1] = append1
[3] >> 24;
13857 w0
[1] = w0
[1] | append0
[0] << 16;
13858 w0
[2] = append0
[0] >> 16 | append0
[1] << 16;
13859 w0
[3] = append0
[1] >> 16 | append0
[2] << 16;
13860 w1
[0] = append0
[2] >> 16 | append0
[3] << 16;
13861 w1
[1] = append0
[3] >> 16 | append1
[0] << 16;
13862 w1
[2] = append1
[0] >> 16 | append1
[1] << 16;
13863 w1
[3] = append1
[1] >> 16 | append1
[2] << 16;
13864 w2
[0] = append1
[2] >> 16 | append1
[3] << 16;
13865 w2
[1] = append1
[3] >> 16;
13869 w0
[1] = w0
[1] | append0
[0] << 24;
13870 w0
[2] = append0
[0] >> 8 | append0
[1] << 24;
13871 w0
[3] = append0
[1] >> 8 | append0
[2] << 24;
13872 w1
[0] = append0
[2] >> 8 | append0
[3] << 24;
13873 w1
[1] = append0
[3] >> 8 | append1
[0] << 24;
13874 w1
[2] = append1
[0] >> 8 | append1
[1] << 24;
13875 w1
[3] = append1
[1] >> 8 | append1
[2] << 24;
13876 w2
[0] = append1
[2] >> 8 | append1
[3] << 24;
13877 w2
[1] = append1
[3] >> 8;
13881 w0
[2] = append0
[0];
13882 w0
[3] = append0
[1];
13883 w1
[0] = append0
[2];
13884 w1
[1] = append0
[3];
13885 w1
[2] = append1
[0];
13886 w1
[3] = append1
[1];
13887 w2
[0] = append1
[2];
13888 w2
[1] = append1
[3];
13892 w0
[2] = w0
[2] | append0
[0] << 8;
13893 w0
[3] = append0
[0] >> 24 | append0
[1] << 8;
13894 w1
[0] = append0
[1] >> 24 | append0
[2] << 8;
13895 w1
[1] = append0
[2] >> 24 | append0
[3] << 8;
13896 w1
[2] = append0
[3] >> 24 | append1
[0] << 8;
13897 w1
[3] = append1
[0] >> 24 | append1
[1] << 8;
13898 w2
[0] = append1
[1] >> 24 | append1
[2] << 8;
13899 w2
[1] = append1
[2] >> 24 | append1
[3] << 8;
13900 w2
[2] = append1
[3] >> 24;
13904 w0
[2] = w0
[2] | append0
[0] << 16;
13905 w0
[3] = append0
[0] >> 16 | append0
[1] << 16;
13906 w1
[0] = append0
[1] >> 16 | append0
[2] << 16;
13907 w1
[1] = append0
[2] >> 16 | append0
[3] << 16;
13908 w1
[2] = append0
[3] >> 16 | append1
[0] << 16;
13909 w1
[3] = append1
[0] >> 16 | append1
[1] << 16;
13910 w2
[0] = append1
[1] >> 16 | append1
[2] << 16;
13911 w2
[1] = append1
[2] >> 16 | append1
[3] << 16;
13912 w2
[2] = append1
[3] >> 16;
13916 w0
[2] = w0
[2] | append0
[0] << 24;
13917 w0
[3] = append0
[0] >> 8 | append0
[1] << 24;
13918 w1
[0] = append0
[1] >> 8 | append0
[2] << 24;
13919 w1
[1] = append0
[2] >> 8 | append0
[3] << 24;
13920 w1
[2] = append0
[3] >> 8 | append1
[0] << 24;
13921 w1
[3] = append1
[0] >> 8 | append1
[1] << 24;
13922 w2
[0] = append1
[1] >> 8 | append1
[2] << 24;
13923 w2
[1] = append1
[2] >> 8 | append1
[3] << 24;
13924 w2
[2] = append1
[3] >> 8;
13928 w0
[3] = append0
[0];
13929 w1
[0] = append0
[1];
13930 w1
[1] = append0
[2];
13931 w1
[2] = append0
[3];
13932 w1
[3] = append1
[0];
13933 w2
[0] = append1
[1];
13934 w2
[1] = append1
[2];
13935 w2
[2] = append1
[3];
13939 w0
[3] = w0
[3] | append0
[0] << 8;
13940 w1
[0] = append0
[0] >> 24 | append0
[1] << 8;
13941 w1
[1] = append0
[1] >> 24 | append0
[2] << 8;
13942 w1
[2] = append0
[2] >> 24 | append0
[3] << 8;
13943 w1
[3] = append0
[3] >> 24 | append1
[0] << 8;
13944 w2
[0] = append1
[0] >> 24 | append1
[1] << 8;
13945 w2
[1] = append1
[1] >> 24 | append1
[2] << 8;
13946 w2
[2] = append1
[2] >> 24 | append1
[3] << 8;
13947 w2
[3] = append1
[3] >> 24;
13951 w0
[3] = w0
[3] | append0
[0] << 16;
13952 w1
[0] = append0
[0] >> 16 | append0
[1] << 16;
13953 w1
[1] = append0
[1] >> 16 | append0
[2] << 16;
13954 w1
[2] = append0
[2] >> 16 | append0
[3] << 16;
13955 w1
[3] = append0
[3] >> 16 | append1
[0] << 16;
13956 w2
[0] = append1
[0] >> 16 | append1
[1] << 16;
13957 w2
[1] = append1
[1] >> 16 | append1
[2] << 16;
13958 w2
[2] = append1
[2] >> 16 | append1
[3] << 16;
13959 w2
[3] = append1
[3] >> 16;
13963 w0
[3] = w0
[3] | append0
[0] << 24;
13964 w1
[0] = append0
[0] >> 8 | append0
[1] << 24;
13965 w1
[1] = append0
[1] >> 8 | append0
[2] << 24;
13966 w1
[2] = append0
[2] >> 8 | append0
[3] << 24;
13967 w1
[3] = append0
[3] >> 8 | append1
[0] << 24;
13968 w2
[0] = append1
[0] >> 8 | append1
[1] << 24;
13969 w2
[1] = append1
[1] >> 8 | append1
[2] << 24;
13970 w2
[2] = append1
[2] >> 8 | append1
[3] << 24;
13971 w2
[3] = append1
[3] >> 8;
13975 w1
[0] = append0
[0];
13976 w1
[1] = append0
[1];
13977 w1
[2] = append0
[2];
13978 w1
[3] = append0
[3];
13979 w2
[0] = append1
[0];
13980 w2
[1] = append1
[1];
13981 w2
[2] = append1
[2];
13982 w2
[3] = append1
[3];
13986 w1
[0] = w1
[0] | append0
[0] << 8;
13987 w1
[1] = append0
[0] >> 24 | append0
[1] << 8;
13988 w1
[2] = append0
[1] >> 24 | append0
[2] << 8;
13989 w1
[3] = append0
[2] >> 24 | append0
[3] << 8;
13990 w2
[0] = append0
[3] >> 24 | append1
[0] << 8;
13991 w2
[1] = append1
[0] >> 24 | append1
[1] << 8;
13992 w2
[2] = append1
[1] >> 24 | append1
[2] << 8;
13993 w2
[3] = append1
[2] >> 24 | append1
[3] << 8;
13994 w3
[0] = append1
[3] >> 24;
13998 w1
[0] = w1
[0] | append0
[0] << 16;
13999 w1
[1] = append0
[0] >> 16 | append0
[1] << 16;
14000 w1
[2] = append0
[1] >> 16 | append0
[2] << 16;
14001 w1
[3] = append0
[2] >> 16 | append0
[3] << 16;
14002 w2
[0] = append0
[3] >> 16 | append1
[0] << 16;
14003 w2
[1] = append1
[0] >> 16 | append1
[1] << 16;
14004 w2
[2] = append1
[1] >> 16 | append1
[2] << 16;
14005 w2
[3] = append1
[2] >> 16 | append1
[3] << 16;
14006 w3
[0] = append1
[3] >> 16;
14010 w1
[0] = w1
[0] | append0
[0] << 24;
14011 w1
[1] = append0
[0] >> 8 | append0
[1] << 24;
14012 w1
[2] = append0
[1] >> 8 | append0
[2] << 24;
14013 w1
[3] = append0
[2] >> 8 | append0
[3] << 24;
14014 w2
[0] = append0
[3] >> 8 | append1
[0] << 24;
14015 w2
[1] = append1
[0] >> 8 | append1
[1] << 24;
14016 w2
[2] = append1
[1] >> 8 | append1
[2] << 24;
14017 w2
[3] = append1
[2] >> 8 | append1
[3] << 24;
14018 w3
[0] = append1
[3] >> 8;
14022 w1
[1] = append0
[0];
14023 w1
[2] = append0
[1];
14024 w1
[3] = append0
[2];
14025 w2
[0] = append0
[3];
14026 w2
[1] = append1
[0];
14027 w2
[2] = append1
[1];
14028 w2
[3] = append1
[2];
14029 w3
[0] = append1
[3];
14033 w1
[1] = w1
[1] | append0
[0] << 8;
14034 w1
[2] = append0
[0] >> 24 | append0
[1] << 8;
14035 w1
[3] = append0
[1] >> 24 | append0
[2] << 8;
14036 w2
[0] = append0
[2] >> 24 | append0
[3] << 8;
14037 w2
[1] = append0
[3] >> 24 | append1
[0] << 8;
14038 w2
[2] = append1
[0] >> 24 | append1
[1] << 8;
14039 w2
[3] = append1
[1] >> 24 | append1
[2] << 8;
14040 w3
[0] = append1
[2] >> 24 | append1
[3] << 8;
14041 w3
[1] = append1
[3] >> 24;
14045 w1
[1] = w1
[1] | append0
[0] << 16;
14046 w1
[2] = append0
[0] >> 16 | append0
[1] << 16;
14047 w1
[3] = append0
[1] >> 16 | append0
[2] << 16;
14048 w2
[0] = append0
[2] >> 16 | append0
[3] << 16;
14049 w2
[1] = append0
[3] >> 16 | append1
[0] << 16;
14050 w2
[2] = append1
[0] >> 16 | append1
[1] << 16;
14051 w2
[3] = append1
[1] >> 16 | append1
[2] << 16;
14052 w3
[0] = append1
[2] >> 16 | append1
[3] << 16;
14053 w3
[1] = append1
[3] >> 16;
14057 w1
[1] = w1
[1] | append0
[0] << 24;
14058 w1
[2] = append0
[0] >> 8 | append0
[1] << 24;
14059 w1
[3] = append0
[1] >> 8 | append0
[2] << 24;
14060 w2
[0] = append0
[2] >> 8 | append0
[3] << 24;
14061 w2
[1] = append0
[3] >> 8 | append1
[0] << 24;
14062 w2
[2] = append1
[0] >> 8 | append1
[1] << 24;
14063 w2
[3] = append1
[1] >> 8 | append1
[2] << 24;
14064 w3
[0] = append1
[2] >> 8 | append1
[3] << 24;
14065 w3
[1] = append1
[3] >> 8;
14069 w1
[2] = append0
[0];
14070 w1
[3] = append0
[1];
14071 w2
[0] = append0
[2];
14072 w2
[1] = append0
[3];
14073 w2
[2] = append1
[0];
14074 w2
[3] = append1
[1];
14075 w3
[0] = append1
[2];
14076 w3
[1] = append1
[3];
14080 w1
[2] = w1
[2] | append0
[0] << 8;
14081 w1
[3] = append0
[0] >> 24 | append0
[1] << 8;
14082 w2
[0] = append0
[1] >> 24 | append0
[2] << 8;
14083 w2
[1] = append0
[2] >> 24 | append0
[3] << 8;
14084 w2
[2] = append0
[3] >> 24 | append1
[0] << 8;
14085 w2
[3] = append1
[0] >> 24 | append1
[1] << 8;
14086 w3
[0] = append1
[1] >> 24 | append1
[2] << 8;
14087 w3
[1] = append1
[2] >> 24 | append1
[3] << 8;
14091 w1
[2] = w1
[2] | append0
[0] << 16;
14092 w1
[3] = append0
[0] >> 16 | append0
[1] << 16;
14093 w2
[0] = append0
[1] >> 16 | append0
[2] << 16;
14094 w2
[1] = append0
[2] >> 16 | append0
[3] << 16;
14095 w2
[2] = append0
[3] >> 16 | append1
[0] << 16;
14096 w2
[3] = append1
[0] >> 16 | append1
[1] << 16;
14097 w3
[0] = append1
[1] >> 16 | append1
[2] << 16;
14098 w3
[1] = append1
[2] >> 16 | append1
[3] << 16;
14102 w1
[2] = w1
[2] | append0
[0] << 24;
14103 w1
[3] = append0
[0] >> 8 | append0
[1] << 24;
14104 w2
[0] = append0
[1] >> 8 | append0
[2] << 24;
14105 w2
[1] = append0
[2] >> 8 | append0
[3] << 24;
14106 w2
[2] = append0
[3] >> 8 | append1
[0] << 24;
14107 w2
[3] = append1
[0] >> 8 | append1
[1] << 24;
14108 w3
[0] = append1
[1] >> 8 | append1
[2] << 24;
14109 w3
[1] = append1
[2] >> 8 | append1
[3] << 24;
14113 w1
[3] = append0
[0];
14114 w2
[0] = append0
[1];
14115 w2
[1] = append0
[2];
14116 w2
[2] = append0
[3];
14117 w2
[3] = append1
[0];
14118 w3
[0] = append1
[1];
14119 w3
[1] = append1
[2];
14123 w1
[3] = w1
[3] | append0
[0] << 8;
14124 w2
[0] = append0
[0] >> 24 | append0
[1] << 8;
14125 w2
[1] = append0
[1] >> 24 | append0
[2] << 8;
14126 w2
[2] = append0
[2] >> 24 | append0
[3] << 8;
14127 w2
[3] = append0
[3] >> 24 | append1
[0] << 8;
14128 w3
[0] = append1
[0] >> 24 | append1
[1] << 8;
14129 w3
[1] = append1
[1] >> 24 | append1
[2] << 8;
14133 w1
[3] = w1
[3] | append0
[0] << 16;
14134 w2
[0] = append0
[0] >> 16 | append0
[1] << 16;
14135 w2
[1] = append0
[1] >> 16 | append0
[2] << 16;
14136 w2
[2] = append0
[2] >> 16 | append0
[3] << 16;
14137 w2
[3] = append0
[3] >> 16 | append1
[0] << 16;
14138 w3
[0] = append1
[0] >> 16 | append1
[1] << 16;
14139 w3
[1] = append1
[1] >> 16 | append1
[2] << 16;
14143 w1
[3] = w1
[3] | append0
[0] << 24;
14144 w2
[0] = append0
[0] >> 8 | append0
[1] << 24;
14145 w2
[1] = append0
[1] >> 8 | append0
[2] << 24;
14146 w2
[2] = append0
[2] >> 8 | append0
[3] << 24;
14147 w2
[3] = append0
[3] >> 8 | append1
[0] << 24;
14148 w3
[0] = append1
[0] >> 8 | append1
[1] << 24;
14149 w3
[1] = append1
[1] >> 8 | append1
[2] << 24;
14153 w2
[0] = append0
[0];
14154 w2
[1] = append0
[1];
14155 w2
[2] = append0
[2];
14156 w2
[3] = append0
[3];
14157 w3
[0] = append1
[0];
14158 w3
[1] = append1
[1];
14163 static void memcat32_9 (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], const u32 append0
[4], const u32 append1
[4], const u32 append2
[4], const u32 offset
)
14168 w0
[0] = append0
[0];
14169 w0
[1] = append0
[1];
14170 w0
[2] = append0
[2];
14171 w0
[3] = append0
[3];
14172 w1
[0] = append1
[0];
14173 w1
[1] = append1
[1];
14174 w1
[2] = append1
[2];
14175 w1
[3] = append1
[3];
14176 w2
[0] = append2
[0];
14180 w0
[0] = w0
[0] | append0
[0] << 8;
14181 w0
[1] = append0
[0] >> 24 | append0
[1] << 8;
14182 w0
[2] = append0
[1] >> 24 | append0
[2] << 8;
14183 w0
[3] = append0
[2] >> 24 | append0
[3] << 8;
14184 w1
[0] = append0
[3] >> 24 | append1
[0] << 8;
14185 w1
[1] = append1
[0] >> 24 | append1
[1] << 8;
14186 w1
[2] = append1
[1] >> 24 | append1
[2] << 8;
14187 w1
[3] = append1
[2] >> 24 | append1
[3] << 8;
14188 w2
[0] = append1
[3] >> 24 | append2
[0] << 8;
14189 w2
[1] = append2
[0] >> 24;
14193 w0
[0] = w0
[0] | append0
[0] << 16;
14194 w0
[1] = append0
[0] >> 16 | append0
[1] << 16;
14195 w0
[2] = append0
[1] >> 16 | append0
[2] << 16;
14196 w0
[3] = append0
[2] >> 16 | append0
[3] << 16;
14197 w1
[0] = append0
[3] >> 16 | append1
[0] << 16;
14198 w1
[1] = append1
[0] >> 16 | append1
[1] << 16;
14199 w1
[2] = append1
[1] >> 16 | append1
[2] << 16;
14200 w1
[3] = append1
[2] >> 16 | append1
[3] << 16;
14201 w2
[0] = append1
[3] >> 16 | append2
[0] << 16;
14202 w2
[1] = append2
[0] >> 16;
14206 w0
[0] = w0
[0] | append0
[0] << 24;
14207 w0
[1] = append0
[0] >> 8 | append0
[1] << 24;
14208 w0
[2] = append0
[1] >> 8 | append0
[2] << 24;
14209 w0
[3] = append0
[2] >> 8 | append0
[3] << 24;
14210 w1
[0] = append0
[3] >> 8 | append1
[0] << 24;
14211 w1
[1] = append1
[0] >> 8 | append1
[1] << 24;
14212 w1
[2] = append1
[1] >> 8 | append1
[2] << 24;
14213 w1
[3] = append1
[2] >> 8 | append1
[3] << 24;
14214 w2
[0] = append1
[3] >> 8 | append2
[0] << 24;
14215 w2
[1] = append2
[0] >> 8;
14219 w0
[1] = append0
[0];
14220 w0
[2] = append0
[1];
14221 w0
[3] = append0
[2];
14222 w1
[0] = append0
[3];
14223 w1
[1] = append1
[0];
14224 w1
[2] = append1
[1];
14225 w1
[3] = append1
[2];
14226 w2
[0] = append1
[3];
14227 w2
[1] = append2
[0];
14231 w0
[1] = w0
[1] | append0
[0] << 8;
14232 w0
[2] = append0
[0] >> 24 | append0
[1] << 8;
14233 w0
[3] = append0
[1] >> 24 | append0
[2] << 8;
14234 w1
[0] = append0
[2] >> 24 | append0
[3] << 8;
14235 w1
[1] = append0
[3] >> 24 | append1
[0] << 8;
14236 w1
[2] = append1
[0] >> 24 | append1
[1] << 8;
14237 w1
[3] = append1
[1] >> 24 | append1
[2] << 8;
14238 w2
[0] = append1
[2] >> 24 | append1
[3] << 8;
14239 w2
[1] = append1
[3] >> 24 | append2
[0] << 8;
14240 w2
[2] = append2
[0] >> 24;
14244 w0
[1] = w0
[1] | append0
[0] << 16;
14245 w0
[2] = append0
[0] >> 16 | append0
[1] << 16;
14246 w0
[3] = append0
[1] >> 16 | append0
[2] << 16;
14247 w1
[0] = append0
[2] >> 16 | append0
[3] << 16;
14248 w1
[1] = append0
[3] >> 16 | append1
[0] << 16;
14249 w1
[2] = append1
[0] >> 16 | append1
[1] << 16;
14250 w1
[3] = append1
[1] >> 16 | append1
[2] << 16;
14251 w2
[0] = append1
[2] >> 16 | append1
[3] << 16;
14252 w2
[1] = append1
[3] >> 16 | append2
[0] << 16;
14253 w2
[2] = append2
[0] >> 16;
14257 w0
[1] = w0
[1] | append0
[0] << 24;
14258 w0
[2] = append0
[0] >> 8 | append0
[1] << 24;
14259 w0
[3] = append0
[1] >> 8 | append0
[2] << 24;
14260 w1
[0] = append0
[2] >> 8 | append0
[3] << 24;
14261 w1
[1] = append0
[3] >> 8 | append1
[0] << 24;
14262 w1
[2] = append1
[0] >> 8 | append1
[1] << 24;
14263 w1
[3] = append1
[1] >> 8 | append1
[2] << 24;
14264 w2
[0] = append1
[2] >> 8 | append1
[3] << 24;
14265 w2
[1] = append1
[3] >> 8 | append2
[0] << 24;
14266 w2
[2] = append2
[0] >> 8;
14270 w0
[2] = append0
[0];
14271 w0
[3] = append0
[1];
14272 w1
[0] = append0
[2];
14273 w1
[1] = append0
[3];
14274 w1
[2] = append1
[0];
14275 w1
[3] = append1
[1];
14276 w2
[0] = append1
[2];
14277 w2
[1] = append1
[3];
14278 w2
[2] = append2
[0];
14282 w0
[2] = w0
[2] | append0
[0] << 8;
14283 w0
[3] = append0
[0] >> 24 | append0
[1] << 8;
14284 w1
[0] = append0
[1] >> 24 | append0
[2] << 8;
14285 w1
[1] = append0
[2] >> 24 | append0
[3] << 8;
14286 w1
[2] = append0
[3] >> 24 | append1
[0] << 8;
14287 w1
[3] = append1
[0] >> 24 | append1
[1] << 8;
14288 w2
[0] = append1
[1] >> 24 | append1
[2] << 8;
14289 w2
[1] = append1
[2] >> 24 | append1
[3] << 8;
14290 w2
[2] = append1
[3] >> 24 | append2
[0] << 8;
14291 w2
[3] = append2
[0] >> 24;
14295 w0
[2] = w0
[2] | append0
[0] << 16;
14296 w0
[3] = append0
[0] >> 16 | append0
[1] << 16;
14297 w1
[0] = append0
[1] >> 16 | append0
[2] << 16;
14298 w1
[1] = append0
[2] >> 16 | append0
[3] << 16;
14299 w1
[2] = append0
[3] >> 16 | append1
[0] << 16;
14300 w1
[3] = append1
[0] >> 16 | append1
[1] << 16;
14301 w2
[0] = append1
[1] >> 16 | append1
[2] << 16;
14302 w2
[1] = append1
[2] >> 16 | append1
[3] << 16;
14303 w2
[2] = append1
[3] >> 16 | append2
[0] << 16;
14304 w2
[3] = append2
[0] >> 16;
14308 w0
[2] = w0
[2] | append0
[0] << 24;
14309 w0
[3] = append0
[0] >> 8 | append0
[1] << 24;
14310 w1
[0] = append0
[1] >> 8 | append0
[2] << 24;
14311 w1
[1] = append0
[2] >> 8 | append0
[3] << 24;
14312 w1
[2] = append0
[3] >> 8 | append1
[0] << 24;
14313 w1
[3] = append1
[0] >> 8 | append1
[1] << 24;
14314 w2
[0] = append1
[1] >> 8 | append1
[2] << 24;
14315 w2
[1] = append1
[2] >> 8 | append1
[3] << 24;
14316 w2
[2] = append1
[3] >> 8 | append2
[0] << 24;
14317 w2
[3] = append2
[0] >> 8;
14321 w0
[3] = append0
[0];
14322 w1
[0] = append0
[1];
14323 w1
[1] = append0
[2];
14324 w1
[2] = append0
[3];
14325 w1
[3] = append1
[0];
14326 w2
[0] = append1
[1];
14327 w2
[1] = append1
[2];
14328 w2
[2] = append1
[3];
14329 w2
[3] = append2
[0];
14333 w0
[3] = w0
[3] | append0
[0] << 8;
14334 w1
[0] = append0
[0] >> 24 | append0
[1] << 8;
14335 w1
[1] = append0
[1] >> 24 | append0
[2] << 8;
14336 w1
[2] = append0
[2] >> 24 | append0
[3] << 8;
14337 w1
[3] = append0
[3] >> 24 | append1
[0] << 8;
14338 w2
[0] = append1
[0] >> 24 | append1
[1] << 8;
14339 w2
[1] = append1
[1] >> 24 | append1
[2] << 8;
14340 w2
[2] = append1
[2] >> 24 | append1
[3] << 8;
14341 w2
[3] = append1
[3] >> 24 | append2
[0] << 8;
14342 w3
[0] = append2
[0] >> 24;
14346 w0
[3] = w0
[3] | append0
[0] << 16;
14347 w1
[0] = append0
[0] >> 16 | append0
[1] << 16;
14348 w1
[1] = append0
[1] >> 16 | append0
[2] << 16;
14349 w1
[2] = append0
[2] >> 16 | append0
[3] << 16;
14350 w1
[3] = append0
[3] >> 16 | append1
[0] << 16;
14351 w2
[0] = append1
[0] >> 16 | append1
[1] << 16;
14352 w2
[1] = append1
[1] >> 16 | append1
[2] << 16;
14353 w2
[2] = append1
[2] >> 16 | append1
[3] << 16;
14354 w2
[3] = append1
[3] >> 16 | append2
[0] << 16;
14355 w3
[0] = append2
[0] >> 16;
14359 w0
[3] = w0
[3] | append0
[0] << 24;
14360 w1
[0] = append0
[0] >> 8 | append0
[1] << 24;
14361 w1
[1] = append0
[1] >> 8 | append0
[2] << 24;
14362 w1
[2] = append0
[2] >> 8 | append0
[3] << 24;
14363 w1
[3] = append0
[3] >> 8 | append1
[0] << 24;
14364 w2
[0] = append1
[0] >> 8 | append1
[1] << 24;
14365 w2
[1] = append1
[1] >> 8 | append1
[2] << 24;
14366 w2
[2] = append1
[2] >> 8 | append1
[3] << 24;
14367 w2
[3] = append1
[3] >> 8 | append2
[0] << 24;
14368 w3
[0] = append2
[0] >> 8;
14372 w1
[0] = append0
[0];
14373 w1
[1] = append0
[1];
14374 w1
[2] = append0
[2];
14375 w1
[3] = append0
[3];
14376 w2
[0] = append1
[0];
14377 w2
[1] = append1
[1];
14378 w2
[2] = append1
[2];
14379 w2
[3] = append1
[3];
14380 w3
[0] = append2
[0];
14384 w1
[0] = w1
[0] | append0
[0] << 8;
14385 w1
[1] = append0
[0] >> 24 | append0
[1] << 8;
14386 w1
[2] = append0
[1] >> 24 | append0
[2] << 8;
14387 w1
[3] = append0
[2] >> 24 | append0
[3] << 8;
14388 w2
[0] = append0
[3] >> 24 | append1
[0] << 8;
14389 w2
[1] = append1
[0] >> 24 | append1
[1] << 8;
14390 w2
[2] = append1
[1] >> 24 | append1
[2] << 8;
14391 w2
[3] = append1
[2] >> 24 | append1
[3] << 8;
14392 w3
[0] = append1
[3] >> 24 | append2
[0] << 8;
14393 w3
[1] = append2
[0] >> 24;
14397 w1
[0] = w1
[0] | append0
[0] << 16;
14398 w1
[1] = append0
[0] >> 16 | append0
[1] << 16;
14399 w1
[2] = append0
[1] >> 16 | append0
[2] << 16;
14400 w1
[3] = append0
[2] >> 16 | append0
[3] << 16;
14401 w2
[0] = append0
[3] >> 16 | append1
[0] << 16;
14402 w2
[1] = append1
[0] >> 16 | append1
[1] << 16;
14403 w2
[2] = append1
[1] >> 16 | append1
[2] << 16;
14404 w2
[3] = append1
[2] >> 16 | append1
[3] << 16;
14405 w3
[0] = append1
[3] >> 16 | append2
[0] << 16;
14406 w3
[1] = append2
[0] >> 16;
14410 w1
[0] = w1
[0] | append0
[0] << 24;
14411 w1
[1] = append0
[0] >> 8 | append0
[1] << 24;
14412 w1
[2] = append0
[1] >> 8 | append0
[2] << 24;
14413 w1
[3] = append0
[2] >> 8 | append0
[3] << 24;
14414 w2
[0] = append0
[3] >> 8 | append1
[0] << 24;
14415 w2
[1] = append1
[0] >> 8 | append1
[1] << 24;
14416 w2
[2] = append1
[1] >> 8 | append1
[2] << 24;
14417 w2
[3] = append1
[2] >> 8 | append1
[3] << 24;
14418 w3
[0] = append1
[3] >> 8 | append2
[0] << 24;
14419 w3
[1] = append2
[0] >> 8;
14423 w1
[1] = append0
[0];
14424 w1
[2] = append0
[1];
14425 w1
[3] = append0
[2];
14426 w2
[0] = append0
[3];
14427 w2
[1] = append1
[0];
14428 w2
[2] = append1
[1];
14429 w2
[3] = append1
[2];
14430 w3
[0] = append1
[3];
14431 w3
[1] = append2
[0];
14435 w1
[1] = w1
[1] | append0
[0] << 8;
14436 w1
[2] = append0
[0] >> 24 | append0
[1] << 8;
14437 w1
[3] = append0
[1] >> 24 | append0
[2] << 8;
14438 w2
[0] = append0
[2] >> 24 | append0
[3] << 8;
14439 w2
[1] = append0
[3] >> 24 | append1
[0] << 8;
14440 w2
[2] = append1
[0] >> 24 | append1
[1] << 8;
14441 w2
[3] = append1
[1] >> 24 | append1
[2] << 8;
14442 w3
[0] = append1
[2] >> 24 | append1
[3] << 8;
14443 w3
[1] = append1
[3] >> 24 | append2
[0] << 8;
14447 w1
[1] = w1
[1] | append0
[0] << 16;
14448 w1
[2] = append0
[0] >> 16 | append0
[1] << 16;
14449 w1
[3] = append0
[1] >> 16 | append0
[2] << 16;
14450 w2
[0] = append0
[2] >> 16 | append0
[3] << 16;
14451 w2
[1] = append0
[3] >> 16 | append1
[0] << 16;
14452 w2
[2] = append1
[0] >> 16 | append1
[1] << 16;
14453 w2
[3] = append1
[1] >> 16 | append1
[2] << 16;
14454 w3
[0] = append1
[2] >> 16 | append1
[3] << 16;
14455 w3
[1] = append1
[3] >> 16 | append2
[0] << 16;
14459 w1
[1] = w1
[1] | append0
[0] << 24;
14460 w1
[2] = append0
[0] >> 8 | append0
[1] << 24;
14461 w1
[3] = append0
[1] >> 8 | append0
[2] << 24;
14462 w2
[0] = append0
[2] >> 8 | append0
[3] << 24;
14463 w2
[1] = append0
[3] >> 8 | append1
[0] << 24;
14464 w2
[2] = append1
[0] >> 8 | append1
[1] << 24;
14465 w2
[3] = append1
[1] >> 8 | append1
[2] << 24;
14466 w3
[0] = append1
[2] >> 8 | append1
[3] << 24;
14467 w3
[1] = append1
[3] >> 8 | append2
[0] << 24;
14471 w1
[2] = append0
[0];
14472 w1
[3] = append0
[1];
14473 w2
[0] = append0
[2];
14474 w2
[1] = append0
[3];
14475 w2
[2] = append1
[0];
14476 w2
[3] = append1
[1];
14477 w3
[0] = append1
[2];
14478 w3
[1] = append1
[3];
14482 w1
[2] = w1
[2] | append0
[0] << 8;
14483 w1
[3] = append0
[0] >> 24 | append0
[1] << 8;
14484 w2
[0] = append0
[1] >> 24 | append0
[2] << 8;
14485 w2
[1] = append0
[2] >> 24 | append0
[3] << 8;
14486 w2
[2] = append0
[3] >> 24 | append1
[0] << 8;
14487 w2
[3] = append1
[0] >> 24 | append1
[1] << 8;
14488 w3
[0] = append1
[1] >> 24 | append1
[2] << 8;
14489 w3
[1] = append1
[2] >> 24 | append1
[3] << 8;
14493 w1
[2] = w1
[2] | append0
[0] << 16;
14494 w1
[3] = append0
[0] >> 16 | append0
[1] << 16;
14495 w2
[0] = append0
[1] >> 16 | append0
[2] << 16;
14496 w2
[1] = append0
[2] >> 16 | append0
[3] << 16;
14497 w2
[2] = append0
[3] >> 16 | append1
[0] << 16;
14498 w2
[3] = append1
[0] >> 16 | append1
[1] << 16;
14499 w3
[0] = append1
[1] >> 16 | append1
[2] << 16;
14500 w3
[1] = append1
[2] >> 16 | append1
[3] << 16;
14504 w1
[2] = w1
[2] | append0
[0] << 24;
14505 w1
[3] = append0
[0] >> 8 | append0
[1] << 24;
14506 w2
[0] = append0
[1] >> 8 | append0
[2] << 24;
14507 w2
[1] = append0
[2] >> 8 | append0
[3] << 24;
14508 w2
[2] = append0
[3] >> 8 | append1
[0] << 24;
14509 w2
[3] = append1
[0] >> 8 | append1
[1] << 24;
14510 w3
[0] = append1
[1] >> 8 | append1
[2] << 24;
14511 w3
[1] = append1
[2] >> 8 | append1
[3] << 24;
14515 w1
[3] = append0
[0];
14516 w2
[0] = append0
[1];
14517 w2
[1] = append0
[2];
14518 w2
[2] = append0
[3];
14519 w2
[3] = append1
[0];
14520 w3
[0] = append1
[1];
14521 w3
[1] = append1
[2];
14525 w1
[3] = w1
[3] | append0
[0] << 8;
14526 w2
[0] = append0
[0] >> 24 | append0
[1] << 8;
14527 w2
[1] = append0
[1] >> 24 | append0
[2] << 8;
14528 w2
[2] = append0
[2] >> 24 | append0
[3] << 8;
14529 w2
[3] = append0
[3] >> 24 | append1
[0] << 8;
14530 w3
[0] = append1
[0] >> 24 | append1
[1] << 8;
14531 w3
[1] = append1
[1] >> 24 | append1
[2] << 8;
14535 w1
[3] = w1
[3] | append0
[0] << 16;
14536 w2
[0] = append0
[0] >> 16 | append0
[1] << 16;
14537 w2
[1] = append0
[1] >> 16 | append0
[2] << 16;
14538 w2
[2] = append0
[2] >> 16 | append0
[3] << 16;
14539 w2
[3] = append0
[3] >> 16 | append1
[0] << 16;
14540 w3
[0] = append1
[0] >> 16 | append1
[1] << 16;
14541 w3
[1] = append1
[1] >> 16 | append1
[2] << 16;
14545 w1
[3] = w1
[3] | append0
[0] << 24;
14546 w2
[0] = append0
[0] >> 8 | append0
[1] << 24;
14547 w2
[1] = append0
[1] >> 8 | append0
[2] << 24;
14548 w2
[2] = append0
[2] >> 8 | append0
[3] << 24;
14549 w2
[3] = append0
[3] >> 8 | append1
[0] << 24;
14550 w3
[0] = append1
[0] >> 8 | append1
[1] << 24;
14551 w3
[1] = append1
[1] >> 8 | append1
[2] << 24;
14555 w2
[0] = append0
[0];
14556 w2
[1] = append0
[1];
14557 w2
[2] = append0
[2];
14558 w2
[3] = append0
[3];
14559 w3
[0] = append1
[0];
14560 w3
[1] = append1
[1];
14565 static void switch_buffer_by_offset (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], const u32 offset
)
14567 const int offset_mod_4
= offset
& 3;
14569 const int offset_minus_4
= 4 - offset
;
14571 switch (offset
/ 4)
14574 w3
[2] = amd_bytealign ( 0, w3
[1], offset_minus_4
);
14575 w3
[1] = amd_bytealign (w3
[1], w3
[0], offset_minus_4
);
14576 w3
[0] = amd_bytealign (w3
[0], w2
[3], offset_minus_4
);
14577 w2
[3] = amd_bytealign (w2
[3], w2
[2], offset_minus_4
);
14578 w2
[2] = amd_bytealign (w2
[2], w2
[1], offset_minus_4
);
14579 w2
[1] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
14580 w2
[0] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
14581 w1
[3] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
14582 w1
[2] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
14583 w1
[1] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
14584 w1
[0] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14585 w0
[3] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14586 w0
[2] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14587 w0
[1] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14588 w0
[0] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14590 if (offset_mod_4
== 0)
14612 w3
[2] = amd_bytealign ( 0, w3
[0], offset_minus_4
);
14613 w3
[1] = amd_bytealign (w3
[0], w2
[3], offset_minus_4
);
14614 w3
[0] = amd_bytealign (w2
[3], w2
[2], offset_minus_4
);
14615 w2
[3] = amd_bytealign (w2
[2], w2
[1], offset_minus_4
);
14616 w2
[2] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
14617 w2
[1] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
14618 w2
[0] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
14619 w1
[3] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
14620 w1
[2] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
14621 w1
[1] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14622 w1
[0] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14623 w0
[3] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14624 w0
[2] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14625 w0
[1] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14628 if (offset_mod_4
== 0)
14649 w3
[2] = amd_bytealign ( 0, w2
[3], offset_minus_4
);
14650 w3
[1] = amd_bytealign (w2
[3], w2
[2], offset_minus_4
);
14651 w3
[0] = amd_bytealign (w2
[2], w2
[1], offset_minus_4
);
14652 w2
[3] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
14653 w2
[2] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
14654 w2
[1] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
14655 w2
[0] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
14656 w1
[3] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
14657 w1
[2] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14658 w1
[1] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14659 w1
[0] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14660 w0
[3] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14661 w0
[2] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14665 if (offset_mod_4
== 0)
14685 w3
[2] = amd_bytealign ( 0, w2
[2], offset_minus_4
);
14686 w3
[1] = amd_bytealign (w2
[2], w2
[1], offset_minus_4
);
14687 w3
[0] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
14688 w2
[3] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
14689 w2
[2] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
14690 w2
[1] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
14691 w2
[0] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
14692 w1
[3] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14693 w1
[2] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14694 w1
[1] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14695 w1
[0] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14696 w0
[3] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14701 if (offset_mod_4
== 0)
14720 w3
[2] = amd_bytealign ( 0, w2
[1], offset_minus_4
);
14721 w3
[1] = amd_bytealign (w2
[1], w2
[0], offset_minus_4
);
14722 w3
[0] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
14723 w2
[3] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
14724 w2
[2] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
14725 w2
[1] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
14726 w2
[0] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14727 w1
[3] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14728 w1
[2] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14729 w1
[1] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14730 w1
[0] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14736 if (offset_mod_4
== 0)
14754 w3
[2] = amd_bytealign ( 0, w2
[0], offset_minus_4
);
14755 w3
[1] = amd_bytealign (w2
[0], w1
[3], offset_minus_4
);
14756 w3
[0] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
14757 w2
[3] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
14758 w2
[2] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
14759 w2
[1] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14760 w2
[0] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14761 w1
[3] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14762 w1
[2] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14763 w1
[1] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14770 if (offset_mod_4
== 0)
14787 w3
[2] = amd_bytealign ( 0, w1
[3], offset_minus_4
);
14788 w3
[1] = amd_bytealign (w1
[3], w1
[2], offset_minus_4
);
14789 w3
[0] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
14790 w2
[3] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
14791 w2
[2] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14792 w2
[1] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14793 w2
[0] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14794 w1
[3] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14795 w1
[2] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14803 if (offset_mod_4
== 0)
14819 w3
[2] = amd_bytealign ( 0, w1
[2], offset_minus_4
);
14820 w3
[1] = amd_bytealign (w1
[2], w1
[1], offset_minus_4
);
14821 w3
[0] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
14822 w2
[3] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14823 w2
[2] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14824 w2
[1] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14825 w2
[0] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14826 w1
[3] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14835 if (offset_mod_4
== 0)
14850 w3
[2] = amd_bytealign ( 0, w1
[1], offset_minus_4
);
14851 w3
[1] = amd_bytealign (w1
[1], w1
[0], offset_minus_4
);
14852 w3
[0] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14853 w2
[3] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14854 w2
[2] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14855 w2
[1] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14856 w2
[0] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14866 if (offset_mod_4
== 0)
14880 w3
[2] = amd_bytealign ( 0, w1
[0], offset_minus_4
);
14881 w3
[1] = amd_bytealign (w1
[0], w0
[3], offset_minus_4
);
14882 w3
[0] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14883 w2
[3] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14884 w2
[2] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14885 w2
[1] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14896 if (offset_mod_4
== 0)
14909 w3
[2] = amd_bytealign ( 0, w0
[3], offset_minus_4
);
14910 w3
[1] = amd_bytealign (w0
[3], w0
[2], offset_minus_4
);
14911 w3
[0] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14912 w2
[3] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14913 w2
[2] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14925 if (offset_mod_4
== 0)
14937 w3
[2] = amd_bytealign ( 0, w0
[2], offset_minus_4
);
14938 w3
[1] = amd_bytealign (w0
[2], w0
[1], offset_minus_4
);
14939 w3
[0] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14940 w2
[3] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14953 if (offset_mod_4
== 0)
14964 w3
[2] = amd_bytealign ( 0, w0
[1], offset_minus_4
);
14965 w3
[1] = amd_bytealign (w0
[1], w0
[0], offset_minus_4
);
14966 w3
[0] = amd_bytealign (w0
[0], 0, offset_minus_4
);
14980 if (offset_mod_4
== 0)
14990 w3
[2] = amd_bytealign ( 0, w0
[0], offset_minus_4
);
14991 w3
[1] = amd_bytealign (w0
[0], 0, offset_minus_4
);
15006 if (offset_mod_4
== 0)
15016 static void switch_buffer_by_offset_be (u32x w0
[4], u32x w1
[4], u32x w2
[4], u32x w3
[4], const u32 offset
)
15018 switch (offset
/ 4)
15021 w3
[2] = amd_bytealign (w3
[1], 0, offset
);
15022 w3
[1] = amd_bytealign (w3
[0], w3
[1], offset
);
15023 w3
[0] = amd_bytealign (w2
[3], w3
[0], offset
);
15024 w2
[3] = amd_bytealign (w2
[2], w2
[3], offset
);
15025 w2
[2] = amd_bytealign (w2
[1], w2
[2], offset
);
15026 w2
[1] = amd_bytealign (w2
[0], w2
[1], offset
);
15027 w2
[0] = amd_bytealign (w1
[3], w2
[0], offset
);
15028 w1
[3] = amd_bytealign (w1
[2], w1
[3], offset
);
15029 w1
[2] = amd_bytealign (w1
[1], w1
[2], offset
);
15030 w1
[1] = amd_bytealign (w1
[0], w1
[1], offset
);
15031 w1
[0] = amd_bytealign (w0
[3], w1
[0], offset
);
15032 w0
[3] = amd_bytealign (w0
[2], w0
[3], offset
);
15033 w0
[2] = amd_bytealign (w0
[1], w0
[2], offset
);
15034 w0
[1] = amd_bytealign (w0
[0], w0
[1], offset
);
15035 w0
[0] = amd_bytealign ( 0, w0
[0], offset
);
15039 w3
[2] = amd_bytealign (w3
[0], 0, offset
);
15040 w3
[1] = amd_bytealign (w2
[3], w3
[0], offset
);
15041 w3
[0] = amd_bytealign (w2
[2], w2
[3], offset
);
15042 w2
[3] = amd_bytealign (w2
[1], w2
[2], offset
);
15043 w2
[2] = amd_bytealign (w2
[0], w2
[1], offset
);
15044 w2
[1] = amd_bytealign (w1
[3], w2
[0], offset
);
15045 w2
[0] = amd_bytealign (w1
[2], w1
[3], offset
);
15046 w1
[3] = amd_bytealign (w1
[1], w1
[2], offset
);
15047 w1
[2] = amd_bytealign (w1
[0], w1
[1], offset
);
15048 w1
[1] = amd_bytealign (w0
[3], w1
[0], offset
);
15049 w1
[0] = amd_bytealign (w0
[2], w0
[3], offset
);
15050 w0
[3] = amd_bytealign (w0
[1], w0
[2], offset
);
15051 w0
[2] = amd_bytealign (w0
[0], w0
[1], offset
);
15052 w0
[1] = amd_bytealign ( 0, w0
[0], offset
);
15057 w3
[2] = amd_bytealign (w2
[3], 0, offset
);
15058 w3
[1] = amd_bytealign (w2
[2], w2
[3], offset
);
15059 w3
[0] = amd_bytealign (w2
[1], w2
[2], offset
);
15060 w2
[3] = amd_bytealign (w2
[0], w2
[1], offset
);
15061 w2
[2] = amd_bytealign (w1
[3], w2
[0], offset
);
15062 w2
[1] = amd_bytealign (w1
[2], w1
[3], offset
);
15063 w2
[0] = amd_bytealign (w1
[1], w1
[2], offset
);
15064 w1
[3] = amd_bytealign (w1
[0], w1
[1], offset
);
15065 w1
[2] = amd_bytealign (w0
[3], w1
[0], offset
);
15066 w1
[1] = amd_bytealign (w0
[2], w0
[3], offset
);
15067 w1
[0] = amd_bytealign (w0
[1], w0
[2], offset
);
15068 w0
[3] = amd_bytealign (w0
[0], w0
[1], offset
);
15069 w0
[2] = amd_bytealign ( 0, w0
[0], offset
);
15075 w3
[2] = amd_bytealign (w2
[2], 0, offset
);
15076 w3
[1] = amd_bytealign (w2
[1], w2
[2], offset
);
15077 w3
[0] = amd_bytealign (w2
[0], w2
[1], offset
);
15078 w2
[3] = amd_bytealign (w1
[3], w2
[0], offset
);
15079 w2
[2] = amd_bytealign (w1
[2], w1
[3], offset
);
15080 w2
[1] = amd_bytealign (w1
[1], w1
[2], offset
);
15081 w2
[0] = amd_bytealign (w1
[0], w1
[1], offset
);
15082 w1
[3] = amd_bytealign (w0
[3], w1
[0], offset
);
15083 w1
[2] = amd_bytealign (w0
[2], w0
[3], offset
);
15084 w1
[1] = amd_bytealign (w0
[1], w0
[2], offset
);
15085 w1
[0] = amd_bytealign (w0
[0], w0
[1], offset
);
15086 w0
[3] = amd_bytealign ( 0, w0
[0], offset
);
15093 w3
[2] = amd_bytealign (w2
[1], 0, offset
);
15094 w3
[1] = amd_bytealign (w2
[0], w2
[1], offset
);
15095 w3
[0] = amd_bytealign (w1
[3], w2
[0], offset
);
15096 w2
[3] = amd_bytealign (w1
[2], w1
[3], offset
);
15097 w2
[2] = amd_bytealign (w1
[1], w1
[2], offset
);
15098 w2
[1] = amd_bytealign (w1
[0], w1
[1], offset
);
15099 w2
[0] = amd_bytealign (w0
[3], w1
[0], offset
);
15100 w1
[3] = amd_bytealign (w0
[2], w0
[3], offset
);
15101 w1
[2] = amd_bytealign (w0
[1], w0
[2], offset
);
15102 w1
[1] = amd_bytealign (w0
[0], w0
[1], offset
);
15103 w1
[0] = amd_bytealign ( 0, w0
[0], offset
);
15111 w3
[2] = amd_bytealign (w2
[0], 0, offset
);
15112 w3
[1] = amd_bytealign (w1
[3], w2
[0], offset
);
15113 w3
[0] = amd_bytealign (w1
[2], w1
[3], offset
);
15114 w2
[3] = amd_bytealign (w1
[1], w1
[2], offset
);
15115 w2
[2] = amd_bytealign (w1
[0], w1
[1], offset
);
15116 w2
[1] = amd_bytealign (w0
[3], w1
[0], offset
);
15117 w2
[0] = amd_bytealign (w0
[2], w0
[3], offset
);
15118 w1
[3] = amd_bytealign (w0
[1], w0
[2], offset
);
15119 w1
[2] = amd_bytealign (w0
[0], w0
[1], offset
);
15120 w1
[1] = amd_bytealign ( 0, w0
[0], offset
);
15129 w3
[2] = amd_bytealign (w1
[3], 0, offset
);
15130 w3
[1] = amd_bytealign (w1
[2], w1
[3], offset
);
15131 w3
[0] = amd_bytealign (w1
[1], w1
[2], offset
);
15132 w2
[3] = amd_bytealign (w1
[0], w1
[1], offset
);
15133 w2
[2] = amd_bytealign (w0
[3], w1
[0], offset
);
15134 w2
[1] = amd_bytealign (w0
[2], w0
[3], offset
);
15135 w2
[0] = amd_bytealign (w0
[1], w0
[2], offset
);
15136 w1
[3] = amd_bytealign (w0
[0], w0
[1], offset
);
15137 w1
[2] = amd_bytealign ( 0, w0
[0], offset
);
15147 w3
[2] = amd_bytealign (w1
[2], 0, offset
);
15148 w3
[1] = amd_bytealign (w1
[1], w1
[2], offset
);
15149 w3
[0] = amd_bytealign (w1
[0], w1
[1], offset
);
15150 w2
[3] = amd_bytealign (w0
[3], w1
[0], offset
);
15151 w2
[2] = amd_bytealign (w0
[2], w0
[3], offset
);
15152 w2
[1] = amd_bytealign (w0
[1], w0
[2], offset
);
15153 w2
[0] = amd_bytealign (w0
[0], w0
[1], offset
);
15154 w1
[3] = amd_bytealign ( 0, w0
[0], offset
);
15165 w3
[2] = amd_bytealign (w1
[1], 0, offset
);
15166 w3
[1] = amd_bytealign (w1
[0], w1
[1], offset
);
15167 w3
[0] = amd_bytealign (w0
[3], w1
[0], offset
);
15168 w2
[3] = amd_bytealign (w0
[2], w0
[3], offset
);
15169 w2
[2] = amd_bytealign (w0
[1], w0
[2], offset
);
15170 w2
[1] = amd_bytealign (w0
[0], w0
[1], offset
);
15171 w2
[0] = amd_bytealign ( 0, w0
[0], offset
);
15183 w3
[2] = amd_bytealign (w1
[0], 0, offset
);
15184 w3
[1] = amd_bytealign (w0
[3], w1
[0], offset
);
15185 w3
[0] = amd_bytealign (w0
[2], w0
[3], offset
);
15186 w2
[3] = amd_bytealign (w0
[1], w0
[2], offset
);
15187 w2
[2] = amd_bytealign (w0
[0], w0
[1], offset
);
15188 w2
[1] = amd_bytealign ( 0, w0
[0], offset
);
15201 w3
[2] = amd_bytealign (w0
[3], 0, offset
);
15202 w3
[1] = amd_bytealign (w0
[2], w0
[3], offset
);
15203 w3
[0] = amd_bytealign (w0
[1], w0
[2], offset
);
15204 w2
[3] = amd_bytealign (w0
[0], w0
[1], offset
);
15205 w2
[2] = amd_bytealign ( 0, w0
[0], offset
);
15219 w3
[2] = amd_bytealign (w0
[2], 0, offset
);
15220 w3
[1] = amd_bytealign (w0
[1], w0
[2], offset
);
15221 w3
[0] = amd_bytealign (w0
[0], w0
[1], offset
);
15222 w2
[3] = amd_bytealign ( 0, w0
[0], offset
);
15237 w3
[2] = amd_bytealign (w0
[1], 0, offset
);
15238 w3
[1] = amd_bytealign (w0
[0], w0
[1], offset
);
15239 w3
[0] = amd_bytealign ( 0, w0
[0], offset
);
15255 w3
[2] = amd_bytealign (w0
[0], 0, offset
);
15256 w3
[1] = amd_bytealign ( 0, w0
[0], offset
);
15275 static u32
check_vector_accessible (const u32 il_pos
, const u32 bf_loops
, const u32 bfs_cnt
, const u32 element
)
15279 // nothing to do here
15283 if ((il_pos
+ 1) == bf_loops
)
15286 u32 bfs_over
= bfs_cnt
% 2;
15288 if (bfs_over
== 0) bfs_over
= 2;
15292 u32 bfs_over
= bfs_cnt
% 4;
15294 if (bfs_over
== 0) bfs_over
= 4;
15297 if (element
>= bfs_over
) return 0;