2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 __constant u32 te0[256] =
25 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
26 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
27 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
28 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
29 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
30 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
31 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
32 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
33 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
34 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
35 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
36 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
37 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
38 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
39 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
40 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
41 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
42 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
43 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
44 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
45 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
46 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
47 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
48 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
49 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
50 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
51 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
52 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
53 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
54 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
55 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
56 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
57 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
58 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
59 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
60 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
61 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
62 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
63 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
64 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
65 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
66 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
67 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
68 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
69 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
70 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
71 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
72 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
73 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
74 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
75 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
76 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
77 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
78 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
79 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
80 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
81 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
82 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
83 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
84 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
85 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
86 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
87 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
88 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
91 __constant u32 te1[256] =
93 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
94 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
95 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
96 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
97 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
98 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
99 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
100 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
101 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
102 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
103 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
104 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
105 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
106 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
107 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
108 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
109 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
110 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
111 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
112 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
113 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
114 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
115 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
116 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
117 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
118 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
119 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
120 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
121 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
122 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
123 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
124 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
125 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
126 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
127 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
128 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
129 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
130 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
131 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
132 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
133 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
134 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
135 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
136 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
137 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
138 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
139 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
140 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
141 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
142 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
143 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
144 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
145 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
146 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
147 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
148 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
149 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
150 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
151 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
152 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
153 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
154 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
155 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
156 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
159 __constant u32 te2[256] =
161 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
162 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
163 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
164 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
165 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
166 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
167 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
168 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
169 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
170 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
171 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
172 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
173 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
174 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
175 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
176 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
177 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
178 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
179 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
180 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
181 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
182 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
183 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
184 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
185 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
186 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
187 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
188 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
189 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
190 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
191 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
192 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
193 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
194 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
195 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
196 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
197 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
198 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
199 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
200 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
201 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
202 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
203 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
204 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
205 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
206 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
207 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
208 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
209 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
210 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
211 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
212 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
213 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
214 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
215 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
216 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
217 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
218 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
219 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
220 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
221 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
222 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
223 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
224 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
227 __constant u32 te3[256] =
229 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
230 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
231 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
232 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
233 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
234 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
235 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
236 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
237 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
238 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
239 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
240 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
241 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
242 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
243 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
244 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
245 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
246 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
247 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
248 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
249 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
250 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
251 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
252 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
253 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
254 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
255 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
256 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
257 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
258 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
259 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
260 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
261 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
262 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
263 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
264 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
265 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
266 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
267 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
268 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
269 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
270 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
271 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
272 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
273 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
274 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
275 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
276 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
277 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
278 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
279 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
280 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
281 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
282 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
283 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
284 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
285 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
286 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
287 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
288 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
289 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
290 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
291 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
292 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
295 __constant u32 te4[256] =
297 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
298 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
299 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
300 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
301 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
302 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
303 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
304 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
305 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
306 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
307 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
308 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
309 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
310 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
311 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
312 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
313 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
314 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
315 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
316 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
317 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
318 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
319 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
320 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
321 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
322 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
323 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
324 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
325 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
326 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
327 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
328 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
329 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
330 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
331 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
332 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
333 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
334 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
335 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
336 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
337 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
338 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
339 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
340 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
341 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
342 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
343 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
344 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
345 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
346 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
347 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
348 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
349 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
350 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
351 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
352 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
353 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
354 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
355 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
356 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
357 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
358 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
359 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
360 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
363 __constant u32 td0[256] =
365 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
366 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
367 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
368 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
369 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
370 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
371 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
372 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
373 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
374 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
375 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
376 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
377 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
378 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
379 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
380 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
381 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
382 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
383 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
384 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
385 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
386 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
387 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
388 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
389 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
390 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
391 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
392 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
393 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
394 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
395 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
396 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
397 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
398 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
399 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
400 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
401 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
402 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
403 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
404 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
405 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
406 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
407 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
408 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
409 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
410 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
411 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
412 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
413 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
414 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
415 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
416 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
417 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
418 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
419 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
420 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
421 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
422 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
423 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
424 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
425 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
426 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
427 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
428 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
431 __constant u32 td1[256] =
433 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
434 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
435 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
436 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
437 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
438 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
439 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
440 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
441 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
442 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
443 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
444 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
445 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
446 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
447 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
448 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
449 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
450 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
451 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
452 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
453 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
454 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
455 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
456 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
457 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
458 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
459 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
460 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
461 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
462 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
463 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
464 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
465 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
466 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
467 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
468 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
469 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
470 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
471 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
472 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
473 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
474 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
475 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
476 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
477 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
478 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
479 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
480 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
481 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
482 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
483 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
484 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
485 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
486 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
487 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
488 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
489 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
490 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
491 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
492 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
493 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
494 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
495 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
496 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
499 __constant u32 td2[256] =
501 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
502 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
503 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
504 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
505 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
506 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
507 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
508 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
509 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
510 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
511 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
512 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
513 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
514 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
515 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
516 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
517 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
518 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
519 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
520 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
521 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
522 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
523 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
524 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
525 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
526 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
527 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
528 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
529 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
530 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
531 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
532 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
533 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
534 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
535 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
536 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
537 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
538 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
539 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
540 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
541 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
542 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
543 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
544 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
545 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
546 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
547 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
548 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
549 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
550 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
551 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
552 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
553 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
554 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
555 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
556 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
557 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
558 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
559 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
560 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
561 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
562 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
563 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
564 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
567 __constant u32 td3[256] =
569 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
570 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
571 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
572 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
573 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
574 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
575 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
576 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
577 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
578 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
579 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
580 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
581 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
582 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
583 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
584 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
585 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
586 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
587 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
588 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
589 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
590 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
591 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
592 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
593 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
594 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
595 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
596 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
597 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
598 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
599 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
600 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
601 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
602 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
603 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
604 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
605 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
606 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
607 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
608 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
609 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
610 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
611 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
612 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
613 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
614 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
615 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
616 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
617 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
618 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
619 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
620 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
621 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
622 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
623 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
624 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
625 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
626 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
627 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
628 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
629 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
630 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
631 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
632 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
635 __constant u32 td4[256] =
637 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
638 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
639 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
640 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
641 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
642 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
643 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
644 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
645 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
646 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
647 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
648 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
649 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
650 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
651 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
652 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
653 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
654 0x86868686, 0x68686868, 0x98989898, 0x16161616,
655 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
656 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
657 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
658 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
659 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
660 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
661 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
662 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
663 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
664 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
665 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
666 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
667 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
668 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
669 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
670 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
671 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
672 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
673 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
674 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
675 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
676 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
677 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
678 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
679 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
680 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
681 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
682 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
683 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
684 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
685 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
686 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
687 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
688 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
689 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
690 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
691 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
692 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
693 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
694 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
695 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
696 0x83838383, 0x53535353, 0x99999999, 0x61616161,
697 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
698 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
699 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
700 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
703 __constant u32 rcon[] =
705 0x01000000, 0x02000000, 0x04000000, 0x08000000,
706 0x10000000, 0x20000000, 0x40000000, 0x80000000,
707 0x1b000000, 0x36000000,
710 static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
729 u32 temp = rek[j + 7];
731 rek[j + 8] = rek[j + 0]
732 ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000)
733 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
734 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
735 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff)
738 rek[j + 9] = rek[j + 1] ^ rek[j + 8];
739 rek[j + 10] = rek[j + 2] ^ rek[j + 9];
740 rek[j + 11] = rek[j + 3] ^ rek[j + 10];
746 rek[j + 12] = rek[j + 4]
747 ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000)
748 ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000)
749 ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00)
750 ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff);
752 rek[j + 13] = rek[j + 5] ^ rek[j + 12];
753 rek[j + 14] = rek[j + 6] ^ rek[j + 13];
754 rek[j + 15] = rek[j + 7] ^ rek[j + 14];
760 static void AES256_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
762 for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
766 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
767 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
768 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
769 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
772 for (u32 i = 1, j = 4; i < 14; i += 1, j += 4)
775 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
776 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
777 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
778 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
781 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
782 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
783 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
784 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
787 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
788 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
789 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
790 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
793 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
794 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
795 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
796 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
800 static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
802 u32 s0 = in[0] ^ rdk[0];
803 u32 s1 = in[1] ^ rdk[1];
804 u32 s2 = in[2] ^ rdk[2];
805 u32 s3 = in[3] ^ rdk[3];
812 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
813 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
814 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
815 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
816 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
817 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
818 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
819 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
820 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
821 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
822 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
823 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
824 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
825 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
826 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
827 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
828 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
829 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
830 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
831 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
832 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
833 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
834 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
835 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
836 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
837 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
838 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
839 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
840 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
841 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
842 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
843 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
844 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
845 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
846 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
847 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
848 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40];
849 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41];
850 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42];
851 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43];
852 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44];
853 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45];
854 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46];
855 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47];
856 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48];
857 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49];
858 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50];
859 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51];
860 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52];
861 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53];
862 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54];
863 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55];
865 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
866 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
867 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
868 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
871 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
872 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
873 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
874 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
877 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
878 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
879 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
880 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
883 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
884 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
885 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
886 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
890 static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
892 u32 s0 = in[0] ^ rek[0];
893 u32 s1 = in[1] ^ rek[1];
894 u32 s2 = in[2] ^ rek[2];
895 u32 s3 = in[3] ^ rek[3];
902 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
903 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
904 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
905 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
906 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
907 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
908 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
909 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
910 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
911 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
912 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
913 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
914 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
915 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
916 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
917 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
918 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
919 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
920 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
921 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
922 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
923 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
924 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
925 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
926 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
927 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
928 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
929 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
930 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
931 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
932 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
933 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
934 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
935 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
936 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
937 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
938 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40];
939 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41];
940 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42];
941 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43];
942 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44];
943 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45];
944 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46];
945 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47];
946 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48];
947 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49];
948 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50];
949 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51];
950 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52];
951 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53];
952 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54];
953 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55];
955 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
956 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
957 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
958 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
961 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
962 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
963 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
964 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
967 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
968 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
969 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
970 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
973 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
974 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
975 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
976 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
980 __constant u64 k_sha512[80] =
982 SHA512C00, SHA512C01, SHA512C02, SHA512C03,
983 SHA512C04, SHA512C05, SHA512C06, SHA512C07,
984 SHA512C08, SHA512C09, SHA512C0a, SHA512C0b,
985 SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f,
986 SHA512C10, SHA512C11, SHA512C12, SHA512C13,
987 SHA512C14, SHA512C15, SHA512C16, SHA512C17,
988 SHA512C18, SHA512C19, SHA512C1a, SHA512C1b,
989 SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f,
990 SHA512C20, SHA512C21, SHA512C22, SHA512C23,
991 SHA512C24, SHA512C25, SHA512C26, SHA512C27,
992 SHA512C28, SHA512C29, SHA512C2a, SHA512C2b,
993 SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f,
994 SHA512C30, SHA512C31, SHA512C32, SHA512C33,
995 SHA512C34, SHA512C35, SHA512C36, SHA512C37,
996 SHA512C38, SHA512C39, SHA512C3a, SHA512C3b,
997 SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f,
998 SHA512C40, SHA512C41, SHA512C42, SHA512C43,
999 SHA512C44, SHA512C45, SHA512C46, SHA512C47,
1000 SHA512C48, SHA512C49, SHA512C4a, SHA512C4b,
1001 SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
1004 static void sha512_transform (volatile const u64 w0[4], volatile const u64 w1[4], volatile const u64 w2[4], volatile const u64 w3[4], volatile u64 dgst[8])
1032 #define ROUND_EXPAND() \
1034 w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
1035 w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
1036 w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
1037 w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
1038 w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
1039 w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
1040 w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
1041 w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
1042 w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
1043 w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
1044 wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
1045 wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
1046 wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
1047 wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
1048 we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
1049 wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
1052 #define ROUND_STEP(i) \
1054 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
1055 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
1056 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
1057 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
1058 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
1059 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
1060 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
1061 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
1062 SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
1063 SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
1064 SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
1065 SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
1066 SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
1067 SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
1068 SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
1069 SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
1075 for (int i = 16; i < 80; i += 16)
1077 ROUND_EXPAND (); ROUND_STEP (i);
1090 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2013_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2013_t *office2013_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1096 const u32 gid = get_global_id (0);
1098 if (gid >= gid_max) return;
1102 w0[0] = pws[gid].i[ 0];
1103 w0[1] = pws[gid].i[ 1];
1104 w0[2] = pws[gid].i[ 2];
1105 w0[3] = pws[gid].i[ 3];
1109 w1[0] = pws[gid].i[ 4];
1110 w1[1] = pws[gid].i[ 5];
1111 w1[2] = pws[gid].i[ 6];
1112 w1[3] = pws[gid].i[ 7];
1116 w2[0] = pws[gid].i[ 8];
1117 w2[1] = pws[gid].i[ 9];
1118 w2[2] = pws[gid].i[10];
1119 w2[3] = pws[gid].i[11];
1123 w3[0] = pws[gid].i[12];
1124 w3[1] = pws[gid].i[13];
1125 w3[2] = pws[gid].i[14];
1126 w3[3] = pws[gid].i[15];
1128 u32 pw_len = pws[gid].pw_len;
1130 append_0x80_4x4 (w0, w1, w2, w3, pw_len);
1132 make_unicode (w1, w2, w3);
1133 make_unicode (w0, w0, w1);
1139 u32 salt_len = salt_bufs[salt_pos].salt_len;
1143 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1144 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1145 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
1146 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
1154 t0[0] = (u64) salt_buf[0] << 32 | salt_buf[1];
1155 t0[1] = (u64) salt_buf[2] << 32 | salt_buf[3];
1156 t0[2] = (u64) swap32 (w0[0]) << 32 | swap32 (w0[1]);
1157 t0[3] = (u64) swap32 (w0[2]) << 32 | swap32 (w0[3]);
1161 t1[0] = (u64) swap32 (w1[0]) << 32 | swap32 (w1[1]);
1162 t1[1] = (u64) swap32 (w1[2]) << 32 | swap32 (w1[3]);
1163 t1[2] = (u64) swap32 (w2[0]) << 32 | swap32 (w2[1]);
1164 t1[3] = (u64) swap32 (w2[2]) << 32 | swap32 (w2[3]);
1168 t2[0] = (u64) swap32 (w3[0]) << 32 | swap32 (w3[1]);
1169 t2[1] = (u64) swap32 (w3[2]) << 32 | swap32 (w3[3]);
1178 t3[3] = (salt_len + (pw_len * 2)) * 8;
1182 digest[0] = SHA512M_A;
1183 digest[1] = SHA512M_B;
1184 digest[2] = SHA512M_C;
1185 digest[3] = SHA512M_D;
1186 digest[4] = SHA512M_E;
1187 digest[5] = SHA512M_F;
1188 digest[6] = SHA512M_G;
1189 digest[7] = SHA512M_H;
1191 sha512_transform (t0, t1, t2, t3, digest);
1193 tmps[gid].out[0] = digest[0];
1194 tmps[gid].out[1] = digest[1];
1195 tmps[gid].out[2] = digest[2];
1196 tmps[gid].out[3] = digest[3];
1197 tmps[gid].out[4] = digest[4];
1198 tmps[gid].out[5] = digest[5];
1199 tmps[gid].out[6] = digest[6];
1200 tmps[gid].out[7] = digest[7];
1203 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2013_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2013_t *office2013_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1205 const u32 gid = get_global_id (0);
1207 if (gid >= gid_max) return;
1211 w0[0] = tmps[gid].out[0] >> 32;
1212 w0[1] = tmps[gid].out[0] << 32 | tmps[gid].out[1] >> 32;
1213 w0[2] = tmps[gid].out[1] << 32 | tmps[gid].out[2] >> 32;
1214 w0[3] = tmps[gid].out[2] << 32 | tmps[gid].out[3] >> 32;
1218 w1[0] = tmps[gid].out[3] << 32 | tmps[gid].out[4] >> 32;
1219 w1[1] = tmps[gid].out[4] << 32 | tmps[gid].out[5] >> 32;
1220 w1[2] = tmps[gid].out[5] << 32 | tmps[gid].out[6] >> 32;
1221 w1[3] = tmps[gid].out[6] << 32 | tmps[gid].out[7] >> 32;
1225 w2[0] = tmps[gid].out[7] << 32 | 0x80000000;
1235 w3[3] = (4 + 64) * 8;
1237 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1239 w0[0] = (u64) swap32 (j) << 32 | w0[0] & 0xffffffff;
1243 digest[0] = SHA512M_A;
1244 digest[1] = SHA512M_B;
1245 digest[2] = SHA512M_C;
1246 digest[3] = SHA512M_D;
1247 digest[4] = SHA512M_E;
1248 digest[5] = SHA512M_F;
1249 digest[6] = SHA512M_G;
1250 digest[7] = SHA512M_H;
1252 sha512_transform (w0, w1, w2, w3, digest);
1254 w0[0] = digest[0] >> 32;
1255 w0[1] = digest[0] << 32 | digest[1] >> 32;
1256 w0[2] = digest[1] << 32 | digest[2] >> 32;
1257 w0[3] = digest[2] << 32 | digest[3] >> 32;
1258 w1[0] = digest[3] << 32 | digest[4] >> 32;
1259 w1[1] = digest[4] << 32 | digest[5] >> 32;
1260 w1[2] = digest[5] << 32 | digest[6] >> 32;
1261 w1[3] = digest[6] << 32 | digest[7] >> 32;
1262 w2[0] = digest[7] << 32 | 0x80000000;
1265 tmps[gid].out[0] = w0[0] << 32 | w0[1] >> 32;
1266 tmps[gid].out[1] = w0[1] << 32 | w0[2] >> 32;
1267 tmps[gid].out[2] = w0[2] << 32 | w0[3] >> 32;
1268 tmps[gid].out[3] = w0[3] << 32 | w1[0] >> 32;
1269 tmps[gid].out[4] = w1[0] << 32 | w1[1] >> 32;
1270 tmps[gid].out[5] = w1[1] << 32 | w1[2] >> 32;
1271 tmps[gid].out[6] = w1[2] << 32 | w1[3] >> 32;
1272 tmps[gid].out[7] = w1[3] << 32 | w2[0] >> 32;
1275 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2013_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2013_t *office2013_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1277 const u32 gid = get_global_id (0);
1278 const u32 lid = get_local_id (0);
1284 const u32 lid4 = lid * 4;
1286 __local u32 s_td0[256];
1287 __local u32 s_td1[256];
1288 __local u32 s_td2[256];
1289 __local u32 s_td3[256];
1290 __local u32 s_td4[256];
1292 __local u32 s_te0[256];
1293 __local u32 s_te1[256];
1294 __local u32 s_te2[256];
1295 __local u32 s_te3[256];
1296 __local u32 s_te4[256];
1298 s_td0[lid4 + 0] = td0[lid4 + 0];
1299 s_td0[lid4 + 1] = td0[lid4 + 1];
1300 s_td0[lid4 + 2] = td0[lid4 + 2];
1301 s_td0[lid4 + 3] = td0[lid4 + 3];
1303 s_td1[lid4 + 0] = td1[lid4 + 0];
1304 s_td1[lid4 + 1] = td1[lid4 + 1];
1305 s_td1[lid4 + 2] = td1[lid4 + 2];
1306 s_td1[lid4 + 3] = td1[lid4 + 3];
1308 s_td2[lid4 + 0] = td2[lid4 + 0];
1309 s_td2[lid4 + 1] = td2[lid4 + 1];
1310 s_td2[lid4 + 2] = td2[lid4 + 2];
1311 s_td2[lid4 + 3] = td2[lid4 + 3];
1313 s_td3[lid4 + 0] = td3[lid4 + 0];
1314 s_td3[lid4 + 1] = td3[lid4 + 1];
1315 s_td3[lid4 + 2] = td3[lid4 + 2];
1316 s_td3[lid4 + 3] = td3[lid4 + 3];
1318 s_td4[lid4 + 0] = td4[lid4 + 0];
1319 s_td4[lid4 + 1] = td4[lid4 + 1];
1320 s_td4[lid4 + 2] = td4[lid4 + 2];
1321 s_td4[lid4 + 3] = td4[lid4 + 3];
1323 s_te0[lid4 + 0] = te0[lid4 + 0];
1324 s_te0[lid4 + 1] = te0[lid4 + 1];
1325 s_te0[lid4 + 2] = te0[lid4 + 2];
1326 s_te0[lid4 + 3] = te0[lid4 + 3];
1328 s_te1[lid4 + 0] = te1[lid4 + 0];
1329 s_te1[lid4 + 1] = te1[lid4 + 1];
1330 s_te1[lid4 + 2] = te1[lid4 + 2];
1331 s_te1[lid4 + 3] = te1[lid4 + 3];
1333 s_te2[lid4 + 0] = te2[lid4 + 0];
1334 s_te2[lid4 + 1] = te2[lid4 + 1];
1335 s_te2[lid4 + 2] = te2[lid4 + 2];
1336 s_te2[lid4 + 3] = te2[lid4 + 3];
1338 s_te3[lid4 + 0] = te3[lid4 + 0];
1339 s_te3[lid4 + 1] = te3[lid4 + 1];
1340 s_te3[lid4 + 2] = te3[lid4 + 2];
1341 s_te3[lid4 + 3] = te3[lid4 + 3];
1343 s_te4[lid4 + 0] = te4[lid4 + 0];
1344 s_te4[lid4 + 1] = te4[lid4 + 1];
1345 s_te4[lid4 + 2] = te4[lid4 + 2];
1346 s_te4[lid4 + 3] = te4[lid4 + 3];
1348 barrier (CLK_LOCAL_MEM_FENCE);
1350 if (gid >= gid_max) return;
1352 u32 encryptedVerifierHashInputBlockKey[2] = { 0xfea7d276, 0x3b4b9e79 };
1353 u32 encryptedVerifierHashValueBlockKey[2] = { 0xd7aa0f6d, 0x3061344e };
1360 w0[0] = tmps[gid].out[0];
1361 w0[1] = tmps[gid].out[1];
1362 w0[2] = tmps[gid].out[2];
1363 w0[3] = tmps[gid].out[3];
1364 w1[0] = tmps[gid].out[4];
1365 w1[1] = tmps[gid].out[5];
1366 w1[2] = tmps[gid].out[6];
1367 w1[3] = tmps[gid].out[7];
1368 w2[0] = hl32_to_64 (encryptedVerifierHashInputBlockKey[0], encryptedVerifierHashInputBlockKey[1]);
1369 w2[1] = 0x8000000000000000;
1375 w3[3] = (64 + 8) * 8;
1379 digest0[0] = SHA512M_A;
1380 digest0[1] = SHA512M_B;
1381 digest0[2] = SHA512M_C;
1382 digest0[3] = SHA512M_D;
1383 digest0[4] = SHA512M_E;
1384 digest0[5] = SHA512M_F;
1385 digest0[6] = SHA512M_G;
1386 digest0[7] = SHA512M_H;
1388 sha512_transform (w0, w1, w2, w3, digest0);
1390 w0[0] = tmps[gid].out[0];
1391 w0[1] = tmps[gid].out[1];
1392 w0[2] = tmps[gid].out[2];
1393 w0[3] = tmps[gid].out[3];
1394 w1[0] = tmps[gid].out[4];
1395 w1[1] = tmps[gid].out[5];
1396 w1[2] = tmps[gid].out[6];
1397 w1[3] = tmps[gid].out[7];
1398 w2[0] = hl32_to_64 (encryptedVerifierHashValueBlockKey[0], encryptedVerifierHashValueBlockKey[1]);
1399 w2[1] = 0x8000000000000000;
1405 w3[3] = (64 + 8) * 8;
1409 digest1[0] = SHA512M_A;
1410 digest1[1] = SHA512M_B;
1411 digest1[2] = SHA512M_C;
1412 digest1[3] = SHA512M_D;
1413 digest1[4] = SHA512M_E;
1414 digest1[5] = SHA512M_F;
1415 digest1[6] = SHA512M_G;
1416 digest1[7] = SHA512M_H;
1418 sha512_transform (w0, w1, w2, w3, digest1);
1420 // now we got the AES key, decrypt the verifier
1427 data[0] = office2013_bufs[salt_pos].encryptedVerifier[0];
1428 data[1] = office2013_bufs[salt_pos].encryptedVerifier[1];
1429 data[2] = office2013_bufs[salt_pos].encryptedVerifier[2];
1430 data[3] = office2013_bufs[salt_pos].encryptedVerifier[3];
1434 ukeyx[0] = h32_from_64 (digest0[0]);
1435 ukeyx[1] = l32_from_64 (digest0[0]);
1436 ukeyx[2] = h32_from_64 (digest0[1]);
1437 ukeyx[3] = l32_from_64 (digest0[1]);
1438 ukeyx[4] = h32_from_64 (digest0[2]);
1439 ukeyx[5] = l32_from_64 (digest0[2]);
1440 ukeyx[6] = h32_from_64 (digest0[3]);
1441 ukeyx[7] = l32_from_64 (digest0[3]);
1443 AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1445 for (u32 i = 0; i < 60; i++) rdk[i] = rek[i];
1447 AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1451 AES256_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1453 out[0] ^= salt_bufs[salt_pos].salt_buf[0];
1454 out[1] ^= salt_bufs[salt_pos].salt_buf[1];
1455 out[2] ^= salt_bufs[salt_pos].salt_buf[2];
1456 out[3] ^= salt_bufs[salt_pos].salt_buf[3];
1460 w0[0] = hl32_to_64 (out[0], out[1]);
1461 w0[1] = hl32_to_64 (out[2], out[3]);
1462 w0[2] = 0x8000000000000000;
1479 digest[0] = SHA512M_A;
1480 digest[1] = SHA512M_B;
1481 digest[2] = SHA512M_C;
1482 digest[3] = SHA512M_D;
1483 digest[4] = SHA512M_E;
1484 digest[5] = SHA512M_F;
1485 digest[6] = SHA512M_G;
1486 digest[7] = SHA512M_H;
1488 sha512_transform (w0, w1, w2, w3, digest);
1490 // encrypt with 2nd key
1492 ukeyx[0] = h32_from_64 (digest1[0]);
1493 ukeyx[1] = l32_from_64 (digest1[0]);
1494 ukeyx[2] = h32_from_64 (digest1[1]);
1495 ukeyx[3] = l32_from_64 (digest1[1]);
1496 ukeyx[4] = h32_from_64 (digest1[2]);
1497 ukeyx[5] = l32_from_64 (digest1[2]);
1498 ukeyx[6] = h32_from_64 (digest1[3]);
1499 ukeyx[7] = l32_from_64 (digest1[3]);
1501 AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1503 data[0] = h32_from_64 (digest[0]) ^ salt_bufs[salt_pos].salt_buf[0];
1504 data[1] = l32_from_64 (digest[0]) ^ salt_bufs[salt_pos].salt_buf[1];
1505 data[2] = h32_from_64 (digest[1]) ^ salt_bufs[salt_pos].salt_buf[2];
1506 data[3] = l32_from_64 (digest[1]) ^ salt_bufs[salt_pos].salt_buf[3];
1508 AES256_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1510 const u32 r0 = out[0];
1511 const u32 r1 = out[1];
1512 const u32 r2 = out[2];
1513 const u32 r3 = out[3];