2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
40 __constant u32 te0[256] =
42 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
43 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
44 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
45 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
46 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
47 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
48 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
49 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
50 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
51 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
52 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
53 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
54 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
55 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
56 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
57 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
58 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
59 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
60 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
61 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
62 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
63 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
64 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
65 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
66 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
67 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
68 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
69 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
70 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
71 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
72 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
73 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
74 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
75 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
76 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
77 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
78 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
79 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
80 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
81 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
82 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
83 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
84 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
85 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
86 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
87 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
88 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
89 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
90 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
91 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
92 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
93 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
94 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
95 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
96 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
97 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
98 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
99 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
100 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
101 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
102 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
103 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
104 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
105 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
108 __constant u32 te1[256] =
110 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
111 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
112 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
113 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
114 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
115 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
116 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
117 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
118 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
119 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
120 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
121 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
122 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
123 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
124 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
125 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
126 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
127 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
128 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
129 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
130 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
131 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
132 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
133 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
134 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
135 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
136 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
137 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
138 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
139 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
140 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
141 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
142 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
143 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
144 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
145 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
146 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
147 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
148 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
149 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
150 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
151 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
152 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
153 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
154 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
155 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
156 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
157 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
158 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
159 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
160 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
161 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
162 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
163 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
164 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
165 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
166 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
167 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
168 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
169 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
170 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
171 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
172 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
173 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
176 __constant u32 te2[256] =
178 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
179 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
180 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
181 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
182 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
183 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
184 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
185 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
186 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
187 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
188 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
189 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
190 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
191 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
192 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
193 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
194 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
195 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
196 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
197 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
198 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
199 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
200 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
201 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
202 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
203 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
204 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
205 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
206 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
207 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
208 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
209 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
210 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
211 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
212 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
213 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
214 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
215 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
216 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
217 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
218 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
219 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
220 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
221 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
222 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
223 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
224 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
225 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
226 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
227 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
228 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
229 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
230 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
231 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
232 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
233 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
234 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
235 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
236 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
237 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
238 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
239 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
240 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
241 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
244 __constant u32 te3[256] =
246 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
247 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
248 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
249 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
250 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
251 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
252 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
253 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
254 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
255 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
256 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
257 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
258 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
259 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
260 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
261 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
262 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
263 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
264 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
265 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
266 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
267 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
268 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
269 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
270 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
271 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
272 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
273 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
274 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
275 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
276 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
277 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
278 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
279 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
280 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
281 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
282 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
283 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
284 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
285 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
286 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
287 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
288 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
289 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
290 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
291 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
292 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
293 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
294 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
295 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
296 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
297 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
298 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
299 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
300 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
301 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
302 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
303 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
304 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
305 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
306 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
307 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
308 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
309 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
312 __constant u32 te4[256] =
314 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
315 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
316 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
317 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
318 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
319 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
320 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
321 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
322 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
323 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
324 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
325 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
326 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
327 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
328 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
329 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
330 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
331 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
332 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
333 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
334 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
335 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
336 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
337 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
338 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
339 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
340 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
341 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
342 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
343 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
344 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
345 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
346 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
347 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
348 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
349 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
350 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
351 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
352 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
353 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
354 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
355 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
356 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
357 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
358 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
359 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
360 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
361 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
362 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
363 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
364 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
365 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
366 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
367 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
368 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
369 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
370 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
371 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
372 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
373 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
374 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
375 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
376 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
377 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
380 __constant u32 td0[256] =
382 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
383 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
384 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
385 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
386 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
387 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
388 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
389 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
390 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
391 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
392 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
393 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
394 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
395 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
396 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
397 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
398 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
399 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
400 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
401 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
402 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
403 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
404 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
405 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
406 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
407 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
408 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
409 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
410 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
411 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
412 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
413 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
414 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
415 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
416 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
417 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
418 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
419 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
420 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
421 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
422 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
423 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
424 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
425 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
426 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
427 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
428 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
429 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
430 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
431 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
432 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
433 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
434 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
435 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
436 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
437 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
438 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
439 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
440 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
441 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
442 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
443 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
444 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
445 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
448 __constant u32 td1[256] =
450 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
451 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
452 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
453 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
454 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
455 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
456 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
457 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
458 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
459 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
460 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
461 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
462 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
463 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
464 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
465 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
466 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
467 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
468 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
469 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
470 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
471 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
472 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
473 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
474 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
475 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
476 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
477 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
478 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
479 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
480 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
481 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
482 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
483 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
484 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
485 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
486 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
487 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
488 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
489 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
490 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
491 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
492 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
493 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
494 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
495 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
496 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
497 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
498 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
499 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
500 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
501 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
502 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
503 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
504 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
505 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
506 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
507 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
508 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
509 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
510 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
511 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
512 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
513 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
516 __constant u32 td2[256] =
518 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
519 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
520 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
521 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
522 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
523 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
524 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
525 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
526 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
527 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
528 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
529 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
530 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
531 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
532 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
533 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
534 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
535 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
536 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
537 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
538 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
539 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
540 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
541 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
542 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
543 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
544 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
545 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
546 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
547 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
548 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
549 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
550 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
551 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
552 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
553 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
554 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
555 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
556 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
557 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
558 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
559 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
560 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
561 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
562 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
563 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
564 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
565 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
566 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
567 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
568 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
569 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
570 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
571 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
572 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
573 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
574 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
575 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
576 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
577 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
578 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
579 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
580 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
581 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
584 __constant u32 td3[256] =
586 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
587 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
588 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
589 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
590 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
591 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
592 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
593 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
594 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
595 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
596 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
597 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
598 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
599 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
600 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
601 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
602 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
603 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
604 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
605 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
606 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
607 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
608 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
609 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
610 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
611 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
612 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
613 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
614 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
615 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
616 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
617 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
618 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
619 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
620 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
621 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
622 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
623 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
624 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
625 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
626 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
627 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
628 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
629 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
630 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
631 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
632 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
633 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
634 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
635 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
636 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
637 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
638 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
639 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
640 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
641 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
642 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
643 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
644 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
645 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
646 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
647 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
648 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
649 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
652 __constant u32 td4[256] =
654 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
655 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
656 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
657 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
658 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
659 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
660 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
661 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
662 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
663 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
664 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
665 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
666 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
667 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
668 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
669 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
670 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
671 0x86868686, 0x68686868, 0x98989898, 0x16161616,
672 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
673 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
674 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
675 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
676 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
677 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
678 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
679 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
680 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
681 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
682 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
683 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
684 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
685 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
686 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
687 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
688 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
689 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
690 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
691 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
692 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
693 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
694 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
695 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
696 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
697 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
698 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
699 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
700 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
701 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
702 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
703 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
704 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
705 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
706 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
707 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
708 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
709 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
710 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
711 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
712 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
713 0x83838383, 0x53535353, 0x99999999, 0x61616161,
714 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
715 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
716 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
717 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
720 __constant u32 rcon[] =
722 0x01000000, 0x02000000, 0x04000000, 0x08000000,
723 0x10000000, 0x20000000, 0x40000000, 0x80000000,
724 0x1b000000, 0x36000000,
727 static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
748 u32 temp = rek[j + 7];
750 rek[j + 8] = rek[j + 0]
751 ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000)
752 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
753 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
754 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff)
757 rek[j + 9] = rek[j + 1] ^ rek[j + 8];
758 rek[j + 10] = rek[j + 2] ^ rek[j + 9];
759 rek[j + 11] = rek[j + 3] ^ rek[j + 10];
769 rek[j + 12] = rek[j + 4]
770 ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000)
771 ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000)
772 ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00)
773 ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff);
775 rek[j + 13] = rek[j + 5] ^ rek[j + 12];
776 rek[j + 14] = rek[j + 6] ^ rek[j + 13];
777 rek[j + 15] = rek[j + 7] ^ rek[j + 14];
783 static void AES256_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
785 for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
789 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
790 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
791 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
792 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
795 for (u32 i = 1, j = 4; i < 14; i += 1, j += 4)
798 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
799 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
800 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
801 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
804 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
805 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
806 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
807 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
810 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
811 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
812 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
813 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
816 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
817 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
818 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
819 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
823 static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
825 u32 s0 = in[0] ^ rdk[0];
826 u32 s1 = in[1] ^ rdk[1];
827 u32 s2 = in[2] ^ rdk[2];
828 u32 s3 = in[3] ^ rdk[3];
835 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
836 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
837 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
838 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
839 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
840 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
841 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
842 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
843 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
844 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
845 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
846 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
847 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
848 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
849 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
850 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
851 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
852 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
853 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
854 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
855 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
856 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
857 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
858 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
859 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
860 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
861 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
862 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
863 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
864 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
865 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
866 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
867 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
868 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
869 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
870 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
871 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40];
872 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41];
873 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42];
874 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43];
875 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44];
876 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45];
877 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46];
878 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47];
879 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48];
880 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49];
881 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50];
882 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51];
883 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52];
884 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53];
885 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54];
886 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55];
888 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
889 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
890 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
891 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
894 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
895 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
896 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
897 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
900 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
901 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
902 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
903 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
906 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
907 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
908 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
909 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
913 static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
915 u32 s0 = in[0] ^ rek[0];
916 u32 s1 = in[1] ^ rek[1];
917 u32 s2 = in[2] ^ rek[2];
918 u32 s3 = in[3] ^ rek[3];
925 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
926 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
927 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
928 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
929 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
930 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
931 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
932 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
933 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
934 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
935 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
936 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
937 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
938 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
939 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
940 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
941 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
942 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
943 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
944 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
945 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
946 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
947 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
948 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
949 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
950 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
951 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
952 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
953 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
954 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
955 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
956 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
957 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
958 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
959 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
960 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
961 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40];
962 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41];
963 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42];
964 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43];
965 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44];
966 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45];
967 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46];
968 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47];
969 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48];
970 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49];
971 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50];
972 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51];
973 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52];
974 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53];
975 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54];
976 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55];
978 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
979 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
980 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
981 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
984 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
985 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
986 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
987 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
990 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
991 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
992 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
993 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
996 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
997 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
998 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
999 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
1003 __constant u32 k_sha256[64] =
1005 SHA256C00, SHA256C01, SHA256C02, SHA256C03,
1006 SHA256C04, SHA256C05, SHA256C06, SHA256C07,
1007 SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
1008 SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
1009 SHA256C10, SHA256C11, SHA256C12, SHA256C13,
1010 SHA256C14, SHA256C15, SHA256C16, SHA256C17,
1011 SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
1012 SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
1013 SHA256C20, SHA256C21, SHA256C22, SHA256C23,
1014 SHA256C24, SHA256C25, SHA256C26, SHA256C27,
1015 SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
1016 SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
1017 SHA256C30, SHA256C31, SHA256C32, SHA256C33,
1018 SHA256C34, SHA256C35, SHA256C36, SHA256C37,
1019 SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
1020 SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
1023 static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8])
1051 #define ROUND_EXPAND() \
1053 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
1054 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
1055 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
1056 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
1057 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
1058 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
1059 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
1060 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
1061 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
1062 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
1063 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
1064 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
1065 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
1066 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
1067 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
1068 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
1071 #define ROUND_STEP(i) \
1073 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
1074 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
1075 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
1076 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
1077 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
1078 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
1079 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
1080 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
1081 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
1082 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
1083 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
1084 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
1085 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
1086 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
1087 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
1088 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
1094 for (int i = 16; i < 64; i += 16)
1096 ROUND_EXPAND (); ROUND_STEP (i);
1109 static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8])
1111 w0[0] = w0[0] ^ 0x36363636;
1112 w0[1] = w0[1] ^ 0x36363636;
1113 w0[2] = w0[2] ^ 0x36363636;
1114 w0[3] = w0[3] ^ 0x36363636;
1115 w1[0] = w1[0] ^ 0x36363636;
1116 w1[1] = w1[1] ^ 0x36363636;
1117 w1[2] = w1[2] ^ 0x36363636;
1118 w1[3] = w1[3] ^ 0x36363636;
1119 w2[0] = w2[0] ^ 0x36363636;
1120 w2[1] = w2[1] ^ 0x36363636;
1121 w2[2] = w2[2] ^ 0x36363636;
1122 w2[3] = w2[3] ^ 0x36363636;
1123 w3[0] = w3[0] ^ 0x36363636;
1124 w3[1] = w3[1] ^ 0x36363636;
1125 w3[2] = w3[2] ^ 0x36363636;
1126 w3[3] = w3[3] ^ 0x36363636;
1128 ipad[0] = SHA256M_A;
1129 ipad[1] = SHA256M_B;
1130 ipad[2] = SHA256M_C;
1131 ipad[3] = SHA256M_D;
1132 ipad[4] = SHA256M_E;
1133 ipad[5] = SHA256M_F;
1134 ipad[6] = SHA256M_G;
1135 ipad[7] = SHA256M_H;
1137 sha256_transform (w0, w1, w2, w3, ipad);
1139 w0[0] = w0[0] ^ 0x6a6a6a6a;
1140 w0[1] = w0[1] ^ 0x6a6a6a6a;
1141 w0[2] = w0[2] ^ 0x6a6a6a6a;
1142 w0[3] = w0[3] ^ 0x6a6a6a6a;
1143 w1[0] = w1[0] ^ 0x6a6a6a6a;
1144 w1[1] = w1[1] ^ 0x6a6a6a6a;
1145 w1[2] = w1[2] ^ 0x6a6a6a6a;
1146 w1[3] = w1[3] ^ 0x6a6a6a6a;
1147 w2[0] = w2[0] ^ 0x6a6a6a6a;
1148 w2[1] = w2[1] ^ 0x6a6a6a6a;
1149 w2[2] = w2[2] ^ 0x6a6a6a6a;
1150 w2[3] = w2[3] ^ 0x6a6a6a6a;
1151 w3[0] = w3[0] ^ 0x6a6a6a6a;
1152 w3[1] = w3[1] ^ 0x6a6a6a6a;
1153 w3[2] = w3[2] ^ 0x6a6a6a6a;
1154 w3[3] = w3[3] ^ 0x6a6a6a6a;
1156 opad[0] = SHA256M_A;
1157 opad[1] = SHA256M_B;
1158 opad[2] = SHA256M_C;
1159 opad[3] = SHA256M_D;
1160 opad[4] = SHA256M_E;
1161 opad[5] = SHA256M_F;
1162 opad[6] = SHA256M_G;
1163 opad[7] = SHA256M_H;
1165 sha256_transform (w0, w1, w2, w3, opad);
1168 static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8])
1170 digest[0] = ipad[0];
1171 digest[1] = ipad[1];
1172 digest[2] = ipad[2];
1173 digest[3] = ipad[3];
1174 digest[4] = ipad[4];
1175 digest[5] = ipad[5];
1176 digest[6] = ipad[6];
1177 digest[7] = ipad[7];
1179 sha256_transform (w0, w1, w2, w3, digest);
1196 w3[3] = (64 + 32) * 8;
1198 digest[0] = opad[0];
1199 digest[1] = opad[1];
1200 digest[2] = opad[2];
1201 digest[3] = opad[3];
1202 digest[4] = opad[4];
1203 digest[5] = opad[5];
1204 digest[6] = opad[6];
1205 digest[7] = opad[7];
1207 sha256_transform (w0, w1, w2, w3, digest);
1210 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lastpass_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1216 const u32 gid = get_global_id (0);
1218 if (gid >= gid_max) return;
1222 w0[0] = pws[gid].i[ 0];
1223 w0[1] = pws[gid].i[ 1];
1224 w0[2] = pws[gid].i[ 2];
1225 w0[3] = pws[gid].i[ 3];
1229 w1[0] = pws[gid].i[ 4];
1230 w1[1] = pws[gid].i[ 5];
1231 w1[2] = pws[gid].i[ 6];
1232 w1[3] = pws[gid].i[ 7];
1236 w2[0] = pws[gid].i[ 8];
1237 w2[1] = pws[gid].i[ 9];
1238 w2[2] = pws[gid].i[10];
1239 w2[3] = pws[gid].i[11];
1243 w3[0] = pws[gid].i[12];
1244 w3[1] = pws[gid].i[13];
1245 w3[2] = pws[gid].i[14];
1246 w3[3] = pws[gid].i[15];
1252 u32 salt_len = salt_bufs[salt_pos].salt_len;
1257 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
1258 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
1259 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
1260 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
1261 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
1262 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
1263 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
1264 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
1270 w0[0] = swap_workaround (w0[0]);
1271 w0[1] = swap_workaround (w0[1]);
1272 w0[2] = swap_workaround (w0[2]);
1273 w0[3] = swap_workaround (w0[3]);
1274 w1[0] = swap_workaround (w1[0]);
1275 w1[1] = swap_workaround (w1[1]);
1276 w1[2] = swap_workaround (w1[2]);
1277 w1[3] = swap_workaround (w1[3]);
1278 w2[0] = swap_workaround (w2[0]);
1279 w2[1] = swap_workaround (w2[1]);
1280 w2[2] = swap_workaround (w2[2]);
1281 w2[3] = swap_workaround (w2[3]);
1282 w3[0] = swap_workaround (w3[0]);
1283 w3[1] = swap_workaround (w3[1]);
1284 w3[2] = swap_workaround (w3[2]);
1285 w3[3] = swap_workaround (w3[3]);
1290 hmac_sha256_pad (w0, w1, w2, w3, ipad, opad);
1292 tmps[gid].ipad[0] = ipad[0];
1293 tmps[gid].ipad[1] = ipad[1];
1294 tmps[gid].ipad[2] = ipad[2];
1295 tmps[gid].ipad[3] = ipad[3];
1296 tmps[gid].ipad[4] = ipad[4];
1297 tmps[gid].ipad[5] = ipad[5];
1298 tmps[gid].ipad[6] = ipad[6];
1299 tmps[gid].ipad[7] = ipad[7];
1301 tmps[gid].opad[0] = opad[0];
1302 tmps[gid].opad[1] = opad[1];
1303 tmps[gid].opad[2] = opad[2];
1304 tmps[gid].opad[3] = opad[3];
1305 tmps[gid].opad[4] = opad[4];
1306 tmps[gid].opad[5] = opad[5];
1307 tmps[gid].opad[6] = opad[6];
1308 tmps[gid].opad[7] = opad[7];
1310 w0[0] = salt_buf0[0];
1311 w0[1] = salt_buf0[1];
1312 w0[2] = salt_buf0[2];
1313 w0[3] = salt_buf0[3];
1314 w1[0] = salt_buf1[0];
1315 w1[1] = salt_buf1[1];
1316 w1[2] = salt_buf1[2];
1317 w1[3] = salt_buf1[3];
1327 append_0x01_3 (w0, w1, w2, salt_len + 3);
1328 append_0x80_3 (w0, w1, w2, salt_len + 4);
1330 w0[0] = swap_workaround (w0[0]);
1331 w0[1] = swap_workaround (w0[1]);
1332 w0[2] = swap_workaround (w0[2]);
1333 w0[3] = swap_workaround (w0[3]);
1334 w1[0] = swap_workaround (w1[0]);
1335 w1[1] = swap_workaround (w1[1]);
1336 w1[2] = swap_workaround (w1[2]);
1337 w1[3] = swap_workaround (w1[3]);
1338 w2[0] = swap_workaround (w2[0]);
1339 w2[1] = swap_workaround (w2[1]);
1345 w3[3] = (64 + salt_len + 4) * 8;
1349 hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst);
1351 tmps[gid].dgst[0] = dgst[0];
1352 tmps[gid].dgst[1] = dgst[1];
1353 tmps[gid].dgst[2] = dgst[2];
1354 tmps[gid].dgst[3] = dgst[3];
1355 tmps[gid].dgst[4] = dgst[4];
1356 tmps[gid].dgst[5] = dgst[5];
1357 tmps[gid].dgst[6] = dgst[6];
1358 tmps[gid].dgst[7] = dgst[7];
1360 tmps[gid].out[0] = dgst[0];
1361 tmps[gid].out[1] = dgst[1];
1362 tmps[gid].out[2] = dgst[2];
1363 tmps[gid].out[3] = dgst[3];
1364 tmps[gid].out[4] = dgst[4];
1365 tmps[gid].out[5] = dgst[5];
1366 tmps[gid].out[6] = dgst[6];
1367 tmps[gid].out[7] = dgst[7];
1370 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lastpass_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1372 const u32 gid = get_global_id (0);
1374 if (gid >= gid_max) return;
1379 ipad[0] = tmps[gid].ipad[0];
1380 ipad[1] = tmps[gid].ipad[1];
1381 ipad[2] = tmps[gid].ipad[2];
1382 ipad[3] = tmps[gid].ipad[3];
1383 ipad[4] = tmps[gid].ipad[4];
1384 ipad[5] = tmps[gid].ipad[5];
1385 ipad[6] = tmps[gid].ipad[6];
1386 ipad[7] = tmps[gid].ipad[7];
1388 opad[0] = tmps[gid].opad[0];
1389 opad[1] = tmps[gid].opad[1];
1390 opad[2] = tmps[gid].opad[2];
1391 opad[3] = tmps[gid].opad[3];
1392 opad[4] = tmps[gid].opad[4];
1393 opad[5] = tmps[gid].opad[5];
1394 opad[6] = tmps[gid].opad[6];
1395 opad[7] = tmps[gid].opad[7];
1400 dgst[0] = tmps[gid].dgst[0];
1401 dgst[1] = tmps[gid].dgst[1];
1402 dgst[2] = tmps[gid].dgst[2];
1403 dgst[3] = tmps[gid].dgst[3];
1404 dgst[4] = tmps[gid].dgst[4];
1405 dgst[5] = tmps[gid].dgst[5];
1406 dgst[6] = tmps[gid].dgst[6];
1407 dgst[7] = tmps[gid].dgst[7];
1409 out[0] = tmps[gid].out[0];
1410 out[1] = tmps[gid].out[1];
1411 out[2] = tmps[gid].out[2];
1412 out[3] = tmps[gid].out[3];
1413 out[4] = tmps[gid].out[4];
1414 out[5] = tmps[gid].out[5];
1415 out[6] = tmps[gid].out[6];
1416 out[7] = tmps[gid].out[7];
1418 for (u32 j = 0; j < loop_cnt; j++)
1440 w3[3] = (64 + 32) * 8;
1442 hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst);
1454 tmps[gid].dgst[0] = dgst[0];
1455 tmps[gid].dgst[1] = dgst[1];
1456 tmps[gid].dgst[2] = dgst[2];
1457 tmps[gid].dgst[3] = dgst[3];
1458 tmps[gid].dgst[4] = dgst[4];
1459 tmps[gid].dgst[5] = dgst[5];
1460 tmps[gid].dgst[6] = dgst[6];
1461 tmps[gid].dgst[7] = dgst[7];
1463 tmps[gid].out[0] = out[0];
1464 tmps[gid].out[1] = out[1];
1465 tmps[gid].out[2] = out[2];
1466 tmps[gid].out[3] = out[3];
1467 tmps[gid].out[4] = out[4];
1468 tmps[gid].out[5] = out[5];
1469 tmps[gid].out[6] = out[6];
1470 tmps[gid].out[7] = out[7];
1473 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lastpass_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1479 const u32 gid = get_global_id (0);
1480 const u32 lid = get_local_id (0);
1488 digests_buf[digests_offset].digest_buf[0],
1489 digests_buf[digests_offset].digest_buf[1],
1490 digests_buf[digests_offset].digest_buf[2],
1491 digests_buf[digests_offset].digest_buf[3],
1498 const u32 lid4 = lid * 4;
1500 __local u32 s_td0[256];
1501 __local u32 s_td1[256];
1502 __local u32 s_td2[256];
1503 __local u32 s_td3[256];
1504 __local u32 s_td4[256];
1506 __local u32 s_te0[256];
1507 __local u32 s_te1[256];
1508 __local u32 s_te2[256];
1509 __local u32 s_te3[256];
1510 __local u32 s_te4[256];
1512 s_td0[lid4 + 0] = td0[lid4 + 0];
1513 s_td0[lid4 + 1] = td0[lid4 + 1];
1514 s_td0[lid4 + 2] = td0[lid4 + 2];
1515 s_td0[lid4 + 3] = td0[lid4 + 3];
1517 s_td1[lid4 + 0] = td1[lid4 + 0];
1518 s_td1[lid4 + 1] = td1[lid4 + 1];
1519 s_td1[lid4 + 2] = td1[lid4 + 2];
1520 s_td1[lid4 + 3] = td1[lid4 + 3];
1522 s_td2[lid4 + 0] = td2[lid4 + 0];
1523 s_td2[lid4 + 1] = td2[lid4 + 1];
1524 s_td2[lid4 + 2] = td2[lid4 + 2];
1525 s_td2[lid4 + 3] = td2[lid4 + 3];
1527 s_td3[lid4 + 0] = td3[lid4 + 0];
1528 s_td3[lid4 + 1] = td3[lid4 + 1];
1529 s_td3[lid4 + 2] = td3[lid4 + 2];
1530 s_td3[lid4 + 3] = td3[lid4 + 3];
1532 s_td4[lid4 + 0] = td4[lid4 + 0];
1533 s_td4[lid4 + 1] = td4[lid4 + 1];
1534 s_td4[lid4 + 2] = td4[lid4 + 2];
1535 s_td4[lid4 + 3] = td4[lid4 + 3];
1537 s_te0[lid4 + 0] = te0[lid4 + 0];
1538 s_te0[lid4 + 1] = te0[lid4 + 1];
1539 s_te0[lid4 + 2] = te0[lid4 + 2];
1540 s_te0[lid4 + 3] = te0[lid4 + 3];
1542 s_te1[lid4 + 0] = te1[lid4 + 0];
1543 s_te1[lid4 + 1] = te1[lid4 + 1];
1544 s_te1[lid4 + 2] = te1[lid4 + 2];
1545 s_te1[lid4 + 3] = te1[lid4 + 3];
1547 s_te2[lid4 + 0] = te2[lid4 + 0];
1548 s_te2[lid4 + 1] = te2[lid4 + 1];
1549 s_te2[lid4 + 2] = te2[lid4 + 2];
1550 s_te2[lid4 + 3] = te2[lid4 + 3];
1552 s_te3[lid4 + 0] = te3[lid4 + 0];
1553 s_te3[lid4 + 1] = te3[lid4 + 1];
1554 s_te3[lid4 + 2] = te3[lid4 + 2];
1555 s_te3[lid4 + 3] = te3[lid4 + 3];
1557 s_te4[lid4 + 0] = te4[lid4 + 0];
1558 s_te4[lid4 + 1] = te4[lid4 + 1];
1559 s_te4[lid4 + 2] = te4[lid4 + 2];
1560 s_te4[lid4 + 3] = te4[lid4 + 3];
1562 barrier (CLK_LOCAL_MEM_FENCE);
1564 if (gid >= gid_max) return;
1572 ukey[0] = tmps[gid].out[0];
1573 ukey[1] = tmps[gid].out[1];
1574 ukey[2] = tmps[gid].out[2];
1575 ukey[3] = tmps[gid].out[3];
1576 ukey[4] = tmps[gid].out[4];
1577 ukey[5] = tmps[gid].out[5];
1578 ukey[6] = tmps[gid].out[6];
1579 ukey[7] = tmps[gid].out[7];
1585 AES256_ExpandKey (ukey, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1596 for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i];
1598 AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1600 AES256_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1602 u32 salt_len = salt_bufs[salt_pos].salt_len;
1606 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1607 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1608 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
1609 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
1611 out[0] = swap_workaround (out[0]);
1612 out[1] = swap_workaround (out[1]);
1613 out[2] = swap_workaround (out[2]);
1614 out[3] = swap_workaround (out[3]);
1616 truncate_block (out, salt_len);
1618 if ((out[0] == salt_buf[0])
1619 && (out[1] == salt_buf[1])
1620 && (out[2] == salt_buf[2])
1621 && (out[3] == salt_buf[3]))
1623 mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0);
1625 d_return_buf[lid] = 1;
1632 const u32 lastpass_magic[4] =
1640 AES256_encrypt (lastpass_magic, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1642 const u32x r0 = out[DGST_R0];
1643 const u32x r1 = out[DGST_R1];
1644 const u32x r2 = out[DGST_R2];
1645 const u32x r3 = out[DGST_R3];
1649 #include VECT_COMPARE_M