2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
44 __constant u32 te0[256] =
46 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
47 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
48 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
49 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
50 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
51 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
52 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
53 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
54 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
55 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
56 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
57 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
58 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
59 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
60 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
61 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
62 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
63 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
64 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
65 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
66 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
67 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
68 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
69 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
70 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
71 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
72 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
73 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
74 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
75 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
76 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
77 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
78 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
79 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
80 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
81 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
82 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
83 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
84 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
85 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
86 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
87 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
88 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
89 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
90 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
91 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
92 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
93 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
94 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
95 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
96 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
97 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
98 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
99 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
100 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
101 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
102 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
103 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
104 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
105 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
106 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
107 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
108 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
109 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
112 __constant u32 te1[256] =
114 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
115 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
116 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
117 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
118 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
119 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
120 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
121 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
122 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
123 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
124 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
125 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
126 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
127 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
128 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
129 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
130 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
131 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
132 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
133 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
134 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
135 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
136 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
137 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
138 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
139 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
140 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
141 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
142 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
143 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
144 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
145 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
146 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
147 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
148 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
149 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
150 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
151 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
152 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
153 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
154 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
155 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
156 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
157 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
158 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
159 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
160 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
161 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
162 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
163 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
164 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
165 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
166 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
167 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
168 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
169 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
170 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
171 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
172 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
173 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
174 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
175 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
176 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
177 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
180 __constant u32 te2[256] =
182 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
183 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
184 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
185 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
186 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
187 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
188 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
189 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
190 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
191 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
192 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
193 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
194 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
195 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
196 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
197 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
198 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
199 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
200 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
201 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
202 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
203 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
204 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
205 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
206 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
207 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
208 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
209 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
210 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
211 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
212 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
213 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
214 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
215 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
216 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
217 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
218 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
219 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
220 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
221 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
222 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
223 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
224 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
225 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
226 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
227 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
228 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
229 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
230 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
231 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
232 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
233 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
234 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
235 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
236 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
237 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
238 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
239 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
240 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
241 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
242 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
243 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
244 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
245 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
248 __constant u32 te3[256] =
250 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
251 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
252 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
253 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
254 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
255 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
256 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
257 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
258 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
259 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
260 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
261 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
262 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
263 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
264 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
265 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
266 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
267 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
268 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
269 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
270 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
271 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
272 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
273 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
274 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
275 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
276 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
277 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
278 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
279 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
280 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
281 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
282 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
283 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
284 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
285 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
286 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
287 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
288 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
289 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
290 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
291 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
292 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
293 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
294 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
295 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
296 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
297 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
298 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
299 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
300 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
301 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
302 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
303 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
304 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
305 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
306 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
307 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
308 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
309 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
310 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
311 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
312 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
313 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
316 __constant u32 te4[256] =
318 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
319 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
320 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
321 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
322 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
323 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
324 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
325 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
326 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
327 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
328 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
329 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
330 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
331 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
332 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
333 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
334 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
335 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
336 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
337 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
338 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
339 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
340 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
341 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
342 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
343 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
344 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
345 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
346 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
347 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
348 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
349 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
350 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
351 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
352 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
353 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
354 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
355 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
356 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
357 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
358 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
359 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
360 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
361 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
362 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
363 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
364 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
365 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
366 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
367 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
368 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
369 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
370 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
371 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
372 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
373 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
374 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
375 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
376 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
377 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
378 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
379 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
380 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
381 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
384 __constant u32 td0[256] =
386 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
387 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
388 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
389 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
390 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
391 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
392 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
393 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
394 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
395 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
396 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
397 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
398 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
399 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
400 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
401 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
402 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
403 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
404 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
405 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
406 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
407 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
408 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
409 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
410 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
411 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
412 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
413 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
414 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
415 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
416 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
417 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
418 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
419 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
420 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
421 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
422 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
423 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
424 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
425 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
426 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
427 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
428 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
429 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
430 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
431 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
432 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
433 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
434 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
435 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
436 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
437 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
438 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
439 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
440 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
441 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
442 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
443 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
444 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
445 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
446 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
447 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
448 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
449 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
452 __constant u32 td1[256] =
454 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
455 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
456 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
457 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
458 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
459 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
460 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
461 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
462 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
463 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
464 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
465 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
466 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
467 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
468 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
469 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
470 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
471 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
472 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
473 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
474 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
475 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
476 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
477 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
478 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
479 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
480 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
481 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
482 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
483 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
484 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
485 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
486 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
487 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
488 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
489 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
490 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
491 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
492 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
493 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
494 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
495 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
496 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
497 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
498 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
499 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
500 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
501 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
502 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
503 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
504 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
505 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
506 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
507 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
508 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
509 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
510 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
511 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
512 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
513 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
514 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
515 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
516 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
517 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
520 __constant u32 td2[256] =
522 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
523 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
524 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
525 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
526 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
527 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
528 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
529 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
530 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
531 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
532 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
533 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
534 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
535 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
536 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
537 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
538 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
539 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
540 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
541 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
542 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
543 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
544 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
545 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
546 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
547 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
548 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
549 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
550 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
551 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
552 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
553 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
554 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
555 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
556 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
557 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
558 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
559 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
560 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
561 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
562 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
563 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
564 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
565 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
566 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
567 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
568 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
569 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
570 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
571 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
572 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
573 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
574 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
575 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
576 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
577 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
578 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
579 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
580 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
581 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
582 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
583 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
584 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
585 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
588 __constant u32 td3[256] =
590 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
591 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
592 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
593 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
594 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
595 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
596 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
597 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
598 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
599 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
600 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
601 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
602 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
603 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
604 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
605 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
606 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
607 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
608 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
609 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
610 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
611 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
612 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
613 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
614 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
615 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
616 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
617 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
618 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
619 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
620 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
621 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
622 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
623 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
624 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
625 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
626 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
627 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
628 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
629 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
630 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
631 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
632 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
633 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
634 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
635 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
636 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
637 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
638 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
639 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
640 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
641 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
642 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
643 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
644 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
645 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
646 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
647 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
648 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
649 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
650 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
651 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
652 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
653 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
656 __constant u32 td4[256] =
658 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
659 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
660 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
661 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
662 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
663 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
664 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
665 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
666 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
667 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
668 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
669 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
670 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
671 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
672 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
673 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
674 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
675 0x86868686, 0x68686868, 0x98989898, 0x16161616,
676 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
677 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
678 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
679 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
680 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
681 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
682 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
683 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
684 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
685 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
686 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
687 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
688 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
689 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
690 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
691 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
692 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
693 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
694 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
695 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
696 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
697 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
698 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
699 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
700 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
701 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
702 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
703 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
704 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
705 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
706 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
707 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
708 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
709 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
710 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
711 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
712 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
713 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
714 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
715 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
716 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
717 0x83838383, 0x53535353, 0x99999999, 0x61616161,
718 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
719 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
720 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
721 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
724 __constant u32 rcon[] =
726 0x01000000, 0x02000000, 0x04000000, 0x08000000,
727 0x10000000, 0x20000000, 0x40000000, 0x80000000,
728 0x1b000000, 0x36000000,
731 static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
750 u32 temp = rek[j + 7];
752 rek[j + 8] = rek[j + 0]
753 ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000)
754 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
755 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
756 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff)
759 rek[j + 9] = rek[j + 1] ^ rek[j + 8];
760 rek[j + 10] = rek[j + 2] ^ rek[j + 9];
761 rek[j + 11] = rek[j + 3] ^ rek[j + 10];
767 rek[j + 12] = rek[j + 4]
768 ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000)
769 ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000)
770 ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00)
771 ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff);
773 rek[j + 13] = rek[j + 5] ^ rek[j + 12];
774 rek[j + 14] = rek[j + 6] ^ rek[j + 13];
775 rek[j + 15] = rek[j + 7] ^ rek[j + 14];
781 static void AES256_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
783 for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
787 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
788 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
789 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
790 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
793 for (u32 i = 1, j = 4; i < 14; i += 1, j += 4)
796 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
797 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
798 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
799 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
802 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
803 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
804 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
805 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
808 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
809 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
810 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
811 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
814 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
815 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
816 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
817 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
821 static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
823 u32 s0 = in[0] ^ rdk[0];
824 u32 s1 = in[1] ^ rdk[1];
825 u32 s2 = in[2] ^ rdk[2];
826 u32 s3 = in[3] ^ rdk[3];
833 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
834 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
835 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
836 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
837 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
838 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
839 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
840 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
841 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
842 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
843 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
844 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
845 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
846 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
847 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
848 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
849 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
850 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
851 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
852 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
853 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
854 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
855 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
856 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
857 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
858 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
859 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
860 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
861 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
862 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
863 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
864 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
865 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
866 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
867 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
868 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
869 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40];
870 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41];
871 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42];
872 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43];
873 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44];
874 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45];
875 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46];
876 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47];
877 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48];
878 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49];
879 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50];
880 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51];
881 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52];
882 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53];
883 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54];
884 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55];
886 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
887 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
888 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
889 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
892 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
893 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
894 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
895 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
898 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
899 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
900 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
901 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
904 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
905 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
906 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
907 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
911 static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
913 u32 s0 = in[0] ^ rek[0];
914 u32 s1 = in[1] ^ rek[1];
915 u32 s2 = in[2] ^ rek[2];
916 u32 s3 = in[3] ^ rek[3];
923 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
924 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
925 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
926 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
927 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
928 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
929 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
930 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
931 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
932 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
933 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
934 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
935 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
936 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
937 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
938 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
939 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
940 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
941 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
942 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
943 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
944 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
945 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
946 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
947 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
948 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
949 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
950 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
951 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
952 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
953 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
954 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
955 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
956 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
957 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
958 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
959 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40];
960 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41];
961 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42];
962 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43];
963 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44];
964 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45];
965 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46];
966 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47];
967 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48];
968 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49];
969 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50];
970 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51];
971 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52];
972 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53];
973 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54];
974 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55];
976 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
977 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
978 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
979 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
982 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
983 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
984 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
985 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
988 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
989 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
990 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
991 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
994 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
995 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
996 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
997 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
1001 __constant u64 k_sha512[80] =
1003 SHA512C00, SHA512C01, SHA512C02, SHA512C03,
1004 SHA512C04, SHA512C05, SHA512C06, SHA512C07,
1005 SHA512C08, SHA512C09, SHA512C0a, SHA512C0b,
1006 SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f,
1007 SHA512C10, SHA512C11, SHA512C12, SHA512C13,
1008 SHA512C14, SHA512C15, SHA512C16, SHA512C17,
1009 SHA512C18, SHA512C19, SHA512C1a, SHA512C1b,
1010 SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f,
1011 SHA512C20, SHA512C21, SHA512C22, SHA512C23,
1012 SHA512C24, SHA512C25, SHA512C26, SHA512C27,
1013 SHA512C28, SHA512C29, SHA512C2a, SHA512C2b,
1014 SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f,
1015 SHA512C30, SHA512C31, SHA512C32, SHA512C33,
1016 SHA512C34, SHA512C35, SHA512C36, SHA512C37,
1017 SHA512C38, SHA512C39, SHA512C3a, SHA512C3b,
1018 SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f,
1019 SHA512C40, SHA512C41, SHA512C42, SHA512C43,
1020 SHA512C44, SHA512C45, SHA512C46, SHA512C47,
1021 SHA512C48, SHA512C49, SHA512C4a, SHA512C4b,
1022 SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
1025 static void sha512_transform (volatile const u64 w0[4], volatile const u64 w1[4], volatile const u64 w2[4], volatile const u64 w3[4], volatile u64 dgst[8])
1053 #define ROUND_EXPAND() \
1055 w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
1056 w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
1057 w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
1058 w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
1059 w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
1060 w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
1061 w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
1062 w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
1063 w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
1064 w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
1065 wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
1066 wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
1067 wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
1068 wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
1069 we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
1070 wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
1073 #define ROUND_STEP(i) \
1075 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
1076 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
1077 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
1078 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
1079 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
1080 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
1081 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
1082 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
1083 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
1084 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
1085 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
1086 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
1087 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
1088 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
1089 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
1090 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
1096 for (int i = 16; i < 80; i += 16)
1098 ROUND_EXPAND (); ROUND_STEP (i);
1111 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2013_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2013_t *office2013_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1117 const u32 gid = get_global_id (0);
1119 if (gid >= gid_max) return;
1123 w0[0] = pws[gid].i[ 0];
1124 w0[1] = pws[gid].i[ 1];
1125 w0[2] = pws[gid].i[ 2];
1126 w0[3] = pws[gid].i[ 3];
1130 w1[0] = pws[gid].i[ 4];
1131 w1[1] = pws[gid].i[ 5];
1132 w1[2] = pws[gid].i[ 6];
1133 w1[3] = pws[gid].i[ 7];
1137 w2[0] = pws[gid].i[ 8];
1138 w2[1] = pws[gid].i[ 9];
1139 w2[2] = pws[gid].i[10];
1140 w2[3] = pws[gid].i[11];
1144 w3[0] = pws[gid].i[12];
1145 w3[1] = pws[gid].i[13];
1146 w3[2] = pws[gid].i[14];
1147 w3[3] = pws[gid].i[15];
1149 u32 pw_len = pws[gid].pw_len;
1151 append_0x80_4 (w0, w1, w2, w3, pw_len);
1153 make_unicode (w1, w2, w3);
1154 make_unicode (w0, w0, w1);
1160 u32 salt_len = salt_bufs[salt_pos].salt_len;
1164 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1165 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1166 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
1167 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
1175 t0[0] = (u64) salt_buf[0] << 32 | salt_buf[1];
1176 t0[1] = (u64) salt_buf[2] << 32 | salt_buf[3];
1177 t0[2] = (u64) swap_workaround (w0[0]) << 32 | swap_workaround (w0[1]);
1178 t0[3] = (u64) swap_workaround (w0[2]) << 32 | swap_workaround (w0[3]);
1182 t1[0] = (u64) swap_workaround (w1[0]) << 32 | swap_workaround (w1[1]);
1183 t1[1] = (u64) swap_workaround (w1[2]) << 32 | swap_workaround (w1[3]);
1184 t1[2] = (u64) swap_workaround (w2[0]) << 32 | swap_workaround (w2[1]);
1185 t1[3] = (u64) swap_workaround (w2[2]) << 32 | swap_workaround (w2[3]);
1189 t2[0] = (u64) swap_workaround (w3[0]) << 32 | swap_workaround (w3[1]);
1190 t2[1] = (u64) swap_workaround (w3[2]) << 32 | swap_workaround (w3[3]);
1199 t3[3] = (salt_len + (pw_len * 2)) * 8;
1203 digest[0] = SHA512M_A;
1204 digest[1] = SHA512M_B;
1205 digest[2] = SHA512M_C;
1206 digest[3] = SHA512M_D;
1207 digest[4] = SHA512M_E;
1208 digest[5] = SHA512M_F;
1209 digest[6] = SHA512M_G;
1210 digest[7] = SHA512M_H;
1212 sha512_transform (t0, t1, t2, t3, digest);
1214 tmps[gid].out[0] = digest[0];
1215 tmps[gid].out[1] = digest[1];
1216 tmps[gid].out[2] = digest[2];
1217 tmps[gid].out[3] = digest[3];
1218 tmps[gid].out[4] = digest[4];
1219 tmps[gid].out[5] = digest[5];
1220 tmps[gid].out[6] = digest[6];
1221 tmps[gid].out[7] = digest[7];
1224 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2013_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2013_t *office2013_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1226 const u32 gid = get_global_id (0);
1228 if (gid >= gid_max) return;
1232 w0[0] = tmps[gid].out[0] >> 32;
1233 w0[1] = tmps[gid].out[0] << 32 | tmps[gid].out[1] >> 32;
1234 w0[2] = tmps[gid].out[1] << 32 | tmps[gid].out[2] >> 32;
1235 w0[3] = tmps[gid].out[2] << 32 | tmps[gid].out[3] >> 32;
1239 w1[0] = tmps[gid].out[3] << 32 | tmps[gid].out[4] >> 32;
1240 w1[1] = tmps[gid].out[4] << 32 | tmps[gid].out[5] >> 32;
1241 w1[2] = tmps[gid].out[5] << 32 | tmps[gid].out[6] >> 32;
1242 w1[3] = tmps[gid].out[6] << 32 | tmps[gid].out[7] >> 32;
1246 w2[0] = tmps[gid].out[7] << 32 | 0x80000000;
1256 w3[3] = (4 + 64) * 8;
1258 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1260 w0[0] = (u64) swap_workaround (j) << 32 | w0[0] & 0xffffffff;
1264 digest[0] = SHA512M_A;
1265 digest[1] = SHA512M_B;
1266 digest[2] = SHA512M_C;
1267 digest[3] = SHA512M_D;
1268 digest[4] = SHA512M_E;
1269 digest[5] = SHA512M_F;
1270 digest[6] = SHA512M_G;
1271 digest[7] = SHA512M_H;
1273 sha512_transform (w0, w1, w2, w3, digest);
1275 w0[0] = digest[0] >> 32;
1276 w0[1] = digest[0] << 32 | digest[1] >> 32;
1277 w0[2] = digest[1] << 32 | digest[2] >> 32;
1278 w0[3] = digest[2] << 32 | digest[3] >> 32;
1279 w1[0] = digest[3] << 32 | digest[4] >> 32;
1280 w1[1] = digest[4] << 32 | digest[5] >> 32;
1281 w1[2] = digest[5] << 32 | digest[6] >> 32;
1282 w1[3] = digest[6] << 32 | digest[7] >> 32;
1283 w2[0] = digest[7] << 32 | 0x80000000;
1286 tmps[gid].out[0] = w0[0] << 32 | w0[1] >> 32;
1287 tmps[gid].out[1] = w0[1] << 32 | w0[2] >> 32;
1288 tmps[gid].out[2] = w0[2] << 32 | w0[3] >> 32;
1289 tmps[gid].out[3] = w0[3] << 32 | w1[0] >> 32;
1290 tmps[gid].out[4] = w1[0] << 32 | w1[1] >> 32;
1291 tmps[gid].out[5] = w1[1] << 32 | w1[2] >> 32;
1292 tmps[gid].out[6] = w1[2] << 32 | w1[3] >> 32;
1293 tmps[gid].out[7] = w1[3] << 32 | w2[0] >> 32;
1296 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2013_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2013_t *office2013_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1298 const u32 gid = get_global_id (0);
1299 const u32 lid = get_local_id (0);
1305 const u32 lid4 = lid * 4;
1307 __local u32 s_td0[256];
1308 __local u32 s_td1[256];
1309 __local u32 s_td2[256];
1310 __local u32 s_td3[256];
1311 __local u32 s_td4[256];
1313 __local u32 s_te0[256];
1314 __local u32 s_te1[256];
1315 __local u32 s_te2[256];
1316 __local u32 s_te3[256];
1317 __local u32 s_te4[256];
1319 s_td0[lid4 + 0] = td0[lid4 + 0];
1320 s_td0[lid4 + 1] = td0[lid4 + 1];
1321 s_td0[lid4 + 2] = td0[lid4 + 2];
1322 s_td0[lid4 + 3] = td0[lid4 + 3];
1324 s_td1[lid4 + 0] = td1[lid4 + 0];
1325 s_td1[lid4 + 1] = td1[lid4 + 1];
1326 s_td1[lid4 + 2] = td1[lid4 + 2];
1327 s_td1[lid4 + 3] = td1[lid4 + 3];
1329 s_td2[lid4 + 0] = td2[lid4 + 0];
1330 s_td2[lid4 + 1] = td2[lid4 + 1];
1331 s_td2[lid4 + 2] = td2[lid4 + 2];
1332 s_td2[lid4 + 3] = td2[lid4 + 3];
1334 s_td3[lid4 + 0] = td3[lid4 + 0];
1335 s_td3[lid4 + 1] = td3[lid4 + 1];
1336 s_td3[lid4 + 2] = td3[lid4 + 2];
1337 s_td3[lid4 + 3] = td3[lid4 + 3];
1339 s_td4[lid4 + 0] = td4[lid4 + 0];
1340 s_td4[lid4 + 1] = td4[lid4 + 1];
1341 s_td4[lid4 + 2] = td4[lid4 + 2];
1342 s_td4[lid4 + 3] = td4[lid4 + 3];
1344 s_te0[lid4 + 0] = te0[lid4 + 0];
1345 s_te0[lid4 + 1] = te0[lid4 + 1];
1346 s_te0[lid4 + 2] = te0[lid4 + 2];
1347 s_te0[lid4 + 3] = te0[lid4 + 3];
1349 s_te1[lid4 + 0] = te1[lid4 + 0];
1350 s_te1[lid4 + 1] = te1[lid4 + 1];
1351 s_te1[lid4 + 2] = te1[lid4 + 2];
1352 s_te1[lid4 + 3] = te1[lid4 + 3];
1354 s_te2[lid4 + 0] = te2[lid4 + 0];
1355 s_te2[lid4 + 1] = te2[lid4 + 1];
1356 s_te2[lid4 + 2] = te2[lid4 + 2];
1357 s_te2[lid4 + 3] = te2[lid4 + 3];
1359 s_te3[lid4 + 0] = te3[lid4 + 0];
1360 s_te3[lid4 + 1] = te3[lid4 + 1];
1361 s_te3[lid4 + 2] = te3[lid4 + 2];
1362 s_te3[lid4 + 3] = te3[lid4 + 3];
1364 s_te4[lid4 + 0] = te4[lid4 + 0];
1365 s_te4[lid4 + 1] = te4[lid4 + 1];
1366 s_te4[lid4 + 2] = te4[lid4 + 2];
1367 s_te4[lid4 + 3] = te4[lid4 + 3];
1369 barrier (CLK_LOCAL_MEM_FENCE);
1371 if (gid >= gid_max) return;
1373 u32x encryptedVerifierHashInputBlockKey[2] = { 0xfea7d276, 0x3b4b9e79 };
1374 u32x encryptedVerifierHashValueBlockKey[2] = { 0xd7aa0f6d, 0x3061344e };
1381 w0[0] = tmps[gid].out[0];
1382 w0[1] = tmps[gid].out[1];
1383 w0[2] = tmps[gid].out[2];
1384 w0[3] = tmps[gid].out[3];
1385 w1[0] = tmps[gid].out[4];
1386 w1[1] = tmps[gid].out[5];
1387 w1[2] = tmps[gid].out[6];
1388 w1[3] = tmps[gid].out[7];
1389 w2[0] = hl32_to_64 (encryptedVerifierHashInputBlockKey[0], encryptedVerifierHashInputBlockKey[1]);
1390 w2[1] = 0x8000000000000000;
1396 w3[3] = (64 + 8) * 8;
1400 digest0[0] = SHA512M_A;
1401 digest0[1] = SHA512M_B;
1402 digest0[2] = SHA512M_C;
1403 digest0[3] = SHA512M_D;
1404 digest0[4] = SHA512M_E;
1405 digest0[5] = SHA512M_F;
1406 digest0[6] = SHA512M_G;
1407 digest0[7] = SHA512M_H;
1409 sha512_transform (w0, w1, w2, w3, digest0);
1411 w0[0] = tmps[gid].out[0];
1412 w0[1] = tmps[gid].out[1];
1413 w0[2] = tmps[gid].out[2];
1414 w0[3] = tmps[gid].out[3];
1415 w1[0] = tmps[gid].out[4];
1416 w1[1] = tmps[gid].out[5];
1417 w1[2] = tmps[gid].out[6];
1418 w1[3] = tmps[gid].out[7];
1419 w2[0] = hl32_to_64 (encryptedVerifierHashValueBlockKey[0], encryptedVerifierHashValueBlockKey[1]);
1420 w2[1] = 0x8000000000000000;
1426 w3[3] = (64 + 8) * 8;
1430 digest1[0] = SHA512M_A;
1431 digest1[1] = SHA512M_B;
1432 digest1[2] = SHA512M_C;
1433 digest1[3] = SHA512M_D;
1434 digest1[4] = SHA512M_E;
1435 digest1[5] = SHA512M_F;
1436 digest1[6] = SHA512M_G;
1437 digest1[7] = SHA512M_H;
1439 sha512_transform (w0, w1, w2, w3, digest1);
1441 // now we got the AES key, decrypt the verifier
1448 data[0] = office2013_bufs[salt_pos].encryptedVerifier[0];
1449 data[1] = office2013_bufs[salt_pos].encryptedVerifier[1];
1450 data[2] = office2013_bufs[salt_pos].encryptedVerifier[2];
1451 data[3] = office2013_bufs[salt_pos].encryptedVerifier[3];
1455 ukeyx[0] = h32_from_64 (digest0[0]);
1456 ukeyx[1] = l32_from_64 (digest0[0]);
1457 ukeyx[2] = h32_from_64 (digest0[1]);
1458 ukeyx[3] = l32_from_64 (digest0[1]);
1459 ukeyx[4] = h32_from_64 (digest0[2]);
1460 ukeyx[5] = l32_from_64 (digest0[2]);
1461 ukeyx[6] = h32_from_64 (digest0[3]);
1462 ukeyx[7] = l32_from_64 (digest0[3]);
1464 AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1466 for (u32 i = 0; i < 60; i++) rdk[i] = rek[i];
1468 AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1472 AES256_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1474 out[0] ^= salt_bufs[salt_pos].salt_buf[0];
1475 out[1] ^= salt_bufs[salt_pos].salt_buf[1];
1476 out[2] ^= salt_bufs[salt_pos].salt_buf[2];
1477 out[3] ^= salt_bufs[salt_pos].salt_buf[3];
1481 w0[0] = hl32_to_64 (out[0], out[1]);
1482 w0[1] = hl32_to_64 (out[2], out[3]);
1483 w0[2] = 0x8000000000000000;
1500 digest[0] = SHA512M_A;
1501 digest[1] = SHA512M_B;
1502 digest[2] = SHA512M_C;
1503 digest[3] = SHA512M_D;
1504 digest[4] = SHA512M_E;
1505 digest[5] = SHA512M_F;
1506 digest[6] = SHA512M_G;
1507 digest[7] = SHA512M_H;
1509 sha512_transform (w0, w1, w2, w3, digest);
1511 // encrypt with 2nd key
1513 ukeyx[0] = h32_from_64 (digest1[0]);
1514 ukeyx[1] = l32_from_64 (digest1[0]);
1515 ukeyx[2] = h32_from_64 (digest1[1]);
1516 ukeyx[3] = l32_from_64 (digest1[1]);
1517 ukeyx[4] = h32_from_64 (digest1[2]);
1518 ukeyx[5] = l32_from_64 (digest1[2]);
1519 ukeyx[6] = h32_from_64 (digest1[3]);
1520 ukeyx[7] = l32_from_64 (digest1[3]);
1522 AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1524 data[0] = h32_from_64 (digest[0]) ^ salt_bufs[salt_pos].salt_buf[0];
1525 data[1] = l32_from_64 (digest[0]) ^ salt_bufs[salt_pos].salt_buf[1];
1526 data[2] = h32_from_64 (digest[1]) ^ salt_bufs[salt_pos].salt_buf[2];
1527 data[3] = l32_from_64 (digest[1]) ^ salt_bufs[salt_pos].salt_buf[3];
1529 AES256_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1531 const u32x r0 = out[0];
1532 const u32x r1 = out[1];
1533 const u32x r2 = out[2];
1534 const u32x r3 = out[3];
1538 #include VECT_COMPARE_M