2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
21 #define COMPARE_M "check_multi_vect1_comp4.c"
24 __constant u32 te0[256] =
26 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
27 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
28 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
29 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
30 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
31 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
32 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
33 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
34 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
35 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
36 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
37 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
38 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
39 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
40 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
41 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
42 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
43 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
44 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
45 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
46 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
47 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
48 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
49 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
50 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
51 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
52 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
53 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
54 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
55 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
56 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
57 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
58 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
59 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
60 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
61 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
62 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
63 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
64 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
65 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
66 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
67 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
68 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
69 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
70 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
71 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
72 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
73 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
74 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
75 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
76 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
77 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
78 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
79 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
80 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
81 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
82 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
83 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
84 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
85 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
86 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
87 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
88 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
89 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
92 __constant u32 te1[256] =
94 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
95 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
96 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
97 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
98 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
99 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
100 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
101 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
102 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
103 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
104 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
105 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
106 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
107 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
108 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
109 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
110 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
111 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
112 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
113 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
114 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
115 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
116 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
117 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
118 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
119 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
120 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
121 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
122 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
123 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
124 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
125 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
126 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
127 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
128 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
129 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
130 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
131 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
132 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
133 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
134 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
135 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
136 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
137 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
138 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
139 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
140 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
141 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
142 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
143 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
144 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
145 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
146 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
147 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
148 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
149 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
150 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
151 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
152 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
153 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
154 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
155 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
156 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
157 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
160 __constant u32 te2[256] =
162 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
163 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
164 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
165 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
166 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
167 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
168 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
169 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
170 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
171 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
172 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
173 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
174 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
175 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
176 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
177 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
178 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
179 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
180 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
181 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
182 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
183 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
184 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
185 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
186 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
187 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
188 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
189 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
190 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
191 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
192 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
193 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
194 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
195 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
196 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
197 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
198 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
199 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
200 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
201 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
202 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
203 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
204 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
205 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
206 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
207 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
208 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
209 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
210 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
211 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
212 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
213 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
214 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
215 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
216 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
217 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
218 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
219 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
220 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
221 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
222 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
223 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
224 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
225 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
228 __constant u32 te3[256] =
230 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
231 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
232 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
233 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
234 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
235 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
236 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
237 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
238 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
239 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
240 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
241 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
242 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
243 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
244 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
245 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
246 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
247 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
248 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
249 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
250 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
251 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
252 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
253 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
254 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
255 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
256 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
257 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
258 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
259 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
260 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
261 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
262 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
263 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
264 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
265 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
266 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
267 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
268 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
269 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
270 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
271 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
272 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
273 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
274 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
275 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
276 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
277 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
278 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
279 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
280 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
281 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
282 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
283 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
284 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
285 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
286 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
287 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
288 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
289 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
290 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
291 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
292 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
293 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
296 __constant u32 te4[256] =
298 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
299 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
300 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
301 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
302 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
303 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
304 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
305 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
306 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
307 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
308 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
309 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
310 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
311 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
312 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
313 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
314 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
315 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
316 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
317 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
318 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
319 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
320 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
321 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
322 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
323 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
324 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
325 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
326 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
327 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
328 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
329 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
330 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
331 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
332 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
333 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
334 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
335 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
336 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
337 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
338 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
339 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
340 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
341 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
342 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
343 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
344 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
345 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
346 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
347 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
348 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
349 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
350 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
351 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
352 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
353 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
354 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
355 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
356 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
357 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
358 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
359 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
360 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
361 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
364 __constant u32 td0[256] =
366 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
367 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
368 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
369 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
370 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
371 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
372 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
373 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
374 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
375 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
376 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
377 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
378 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
379 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
380 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
381 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
382 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
383 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
384 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
385 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
386 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
387 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
388 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
389 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
390 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
391 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
392 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
393 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
394 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
395 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
396 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
397 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
398 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
399 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
400 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
401 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
402 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
403 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
404 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
405 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
406 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
407 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
408 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
409 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
410 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
411 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
412 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
413 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
414 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
415 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
416 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
417 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
418 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
419 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
420 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
421 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
422 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
423 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
424 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
425 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
426 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
427 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
428 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
429 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
432 __constant u32 td1[256] =
434 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
435 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
436 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
437 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
438 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
439 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
440 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
441 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
442 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
443 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
444 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
445 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
446 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
447 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
448 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
449 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
450 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
451 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
452 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
453 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
454 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
455 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
456 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
457 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
458 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
459 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
460 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
461 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
462 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
463 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
464 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
465 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
466 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
467 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
468 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
469 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
470 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
471 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
472 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
473 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
474 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
475 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
476 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
477 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
478 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
479 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
480 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
481 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
482 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
483 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
484 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
485 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
486 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
487 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
488 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
489 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
490 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
491 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
492 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
493 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
494 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
495 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
496 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
497 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
500 __constant u32 td2[256] =
502 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
503 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
504 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
505 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
506 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
507 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
508 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
509 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
510 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
511 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
512 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
513 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
514 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
515 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
516 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
517 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
518 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
519 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
520 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
521 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
522 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
523 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
524 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
525 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
526 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
527 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
528 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
529 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
530 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
531 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
532 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
533 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
534 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
535 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
536 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
537 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
538 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
539 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
540 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
541 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
542 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
543 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
544 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
545 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
546 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
547 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
548 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
549 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
550 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
551 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
552 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
553 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
554 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
555 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
556 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
557 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
558 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
559 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
560 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
561 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
562 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
563 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
564 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
565 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
568 __constant u32 td3[256] =
570 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
571 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
572 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
573 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
574 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
575 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
576 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
577 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
578 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
579 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
580 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
581 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
582 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
583 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
584 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
585 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
586 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
587 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
588 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
589 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
590 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
591 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
592 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
593 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
594 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
595 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
596 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
597 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
598 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
599 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
600 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
601 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
602 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
603 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
604 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
605 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
606 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
607 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
608 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
609 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
610 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
611 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
612 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
613 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
614 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
615 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
616 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
617 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
618 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
619 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
620 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
621 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
622 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
623 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
624 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
625 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
626 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
627 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
628 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
629 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
630 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
631 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
632 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
633 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
636 __constant u32 td4[256] =
638 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
639 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
640 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
641 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
642 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
643 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
644 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
645 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
646 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
647 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
648 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
649 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
650 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
651 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
652 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
653 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
654 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
655 0x86868686, 0x68686868, 0x98989898, 0x16161616,
656 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
657 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
658 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
659 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
660 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
661 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
662 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
663 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
664 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
665 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
666 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
667 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
668 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
669 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
670 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
671 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
672 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
673 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
674 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
675 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
676 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
677 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
678 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
679 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
680 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
681 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
682 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
683 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
684 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
685 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
686 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
687 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
688 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
689 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
690 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
691 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
692 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
693 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
694 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
695 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
696 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
697 0x83838383, 0x53535353, 0x99999999, 0x61616161,
698 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
699 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
700 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
701 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
704 __constant u32 rcon[] =
706 0x01000000, 0x02000000, 0x04000000, 0x08000000,
707 0x10000000, 0x20000000, 0x40000000, 0x80000000,
708 0x1b000000, 0x36000000,
711 static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
732 u32 temp = rek[j + 7];
734 rek[j + 8] = rek[j + 0]
735 ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000)
736 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
737 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
738 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff)
741 rek[j + 9] = rek[j + 1] ^ rek[j + 8];
742 rek[j + 10] = rek[j + 2] ^ rek[j + 9];
743 rek[j + 11] = rek[j + 3] ^ rek[j + 10];
753 rek[j + 12] = rek[j + 4]
754 ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000)
755 ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000)
756 ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00)
757 ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff);
759 rek[j + 13] = rek[j + 5] ^ rek[j + 12];
760 rek[j + 14] = rek[j + 6] ^ rek[j + 13];
761 rek[j + 15] = rek[j + 7] ^ rek[j + 14];
767 static void AES256_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
769 for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
773 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
774 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
775 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
776 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
779 for (u32 i = 1, j = 4; i < 14; i += 1, j += 4)
782 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
783 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
784 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
785 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
788 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
789 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
790 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
791 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
794 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
795 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
796 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
797 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
800 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
801 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
802 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
803 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
807 static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
809 u32 s0 = in[0] ^ rdk[0];
810 u32 s1 = in[1] ^ rdk[1];
811 u32 s2 = in[2] ^ rdk[2];
812 u32 s3 = in[3] ^ rdk[3];
819 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
820 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
821 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
822 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
823 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
824 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
825 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
826 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
827 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
828 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
829 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
830 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
831 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
832 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
833 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
834 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
835 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
836 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
837 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
838 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
839 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
840 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
841 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
842 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
843 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
844 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
845 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
846 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
847 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
848 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
849 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
850 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
851 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
852 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
853 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
854 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
855 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40];
856 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41];
857 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42];
858 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43];
859 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44];
860 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45];
861 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46];
862 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47];
863 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48];
864 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49];
865 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50];
866 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51];
867 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52];
868 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53];
869 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54];
870 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55];
872 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
873 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
874 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
875 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
878 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
879 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
880 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
881 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
884 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
885 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
886 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
887 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
890 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
891 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
892 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
893 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
897 __constant u32 k_sha256[64] =
899 SHA256C00, SHA256C01, SHA256C02, SHA256C03,
900 SHA256C04, SHA256C05, SHA256C06, SHA256C07,
901 SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
902 SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
903 SHA256C10, SHA256C11, SHA256C12, SHA256C13,
904 SHA256C14, SHA256C15, SHA256C16, SHA256C17,
905 SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
906 SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
907 SHA256C20, SHA256C21, SHA256C22, SHA256C23,
908 SHA256C24, SHA256C25, SHA256C26, SHA256C27,
909 SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
910 SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
911 SHA256C30, SHA256C31, SHA256C32, SHA256C33,
912 SHA256C34, SHA256C35, SHA256C36, SHA256C37,
913 SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
914 SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
917 static void sha256_transform (const u32 w[16], u32 digest[8])
928 u32 w0_t = swap_workaround (w[ 0]);
929 u32 w1_t = swap_workaround (w[ 1]);
930 u32 w2_t = swap_workaround (w[ 2]);
931 u32 w3_t = swap_workaround (w[ 3]);
932 u32 w4_t = swap_workaround (w[ 4]);
933 u32 w5_t = swap_workaround (w[ 5]);
934 u32 w6_t = swap_workaround (w[ 6]);
935 u32 w7_t = swap_workaround (w[ 7]);
936 u32 w8_t = swap_workaround (w[ 8]);
937 u32 w9_t = swap_workaround (w[ 9]);
938 u32 wa_t = swap_workaround (w[10]);
939 u32 wb_t = swap_workaround (w[11]);
940 u32 wc_t = swap_workaround (w[12]);
941 u32 wd_t = swap_workaround (w[13]);
942 u32 we_t = swap_workaround (w[14]);
943 u32 wf_t = swap_workaround (w[15]);
945 #define ROUND_EXPAND() \
947 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
948 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
949 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
950 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
951 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
952 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
953 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
954 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
955 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
956 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
957 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
958 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
959 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
960 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
961 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
962 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
965 #define ROUND_STEP(i) \
967 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
968 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
969 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
970 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
971 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
972 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
973 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
974 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
975 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
976 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
977 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
978 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
979 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
980 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
981 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
982 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
988 for (int i = 16; i < 64; i += 16)
990 ROUND_EXPAND (); ROUND_STEP (i);
1003 __constant u32 crc32tab[0x100] =
1005 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
1006 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
1007 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
1008 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
1009 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
1010 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
1011 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
1012 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
1013 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
1014 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
1015 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
1016 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
1017 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
1018 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
1019 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
1020 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
1021 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
1022 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
1023 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
1024 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
1025 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
1026 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
1027 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
1028 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
1029 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
1030 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
1031 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
1032 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
1033 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
1034 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
1035 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
1036 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
1037 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
1038 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
1039 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
1040 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
1041 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
1042 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
1043 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
1044 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
1045 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
1046 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
1047 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
1048 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
1049 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
1050 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
1051 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
1052 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
1053 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
1054 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
1055 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
1056 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
1057 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
1058 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
1059 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
1060 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
1061 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
1062 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
1063 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
1064 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
1065 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
1066 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
1067 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
1068 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
1071 static u32 round_crc32 (u32 a, const u32 v)
1073 const u32 k = (a ^ v) & 0xff;
1075 const u32 s = a >> 8;
1082 a.s0 = crc32tab[k.s0];
1083 a.s1 = crc32tab[k.s1];
1087 a.s0 = crc32tab[k.s0];
1088 a.s1 = crc32tab[k.s1];
1089 a.s2 = crc32tab[k.s2];
1090 a.s3 = crc32tab[k.s3];
1098 static u32 crc32 (const u32 w[16], const u32 pw_len, const u32 iv)
1102 if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0);
1103 if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8);
1104 if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16);
1105 if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24);
1107 for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1)
1109 if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0);
1110 if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8);
1111 if (pw_len >= (i + 3)) a = round_crc32 (a, w[j] >> 16);
1112 if (pw_len >= (i + 4)) a = round_crc32 (a, w[j] >> 24);
1118 static void bzero16 (u32 block[16])
1138 static u32 memcat8c (u32 block[16], const u32 block_len, const u32 append[2], const u32 append_len, u32 digest[8])
1140 const u32 mod = block_len & 3;
1141 const u32 div = block_len / 4;
1147 const int offset_minus_4 = 4 - block_len;
1149 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
1150 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
1151 tmp2 = amd_bytealign ( 0, append[1], offset_minus_4);
1160 u32 carry[2] = { 0, 0 };
1164 case 0: block[ 0] |= tmp0;
1168 case 1: block[ 1] |= tmp0;
1172 case 2: block[ 2] |= tmp0;
1176 case 3: block[ 3] |= tmp0;
1180 case 4: block[ 4] |= tmp0;
1184 case 5: block[ 5] |= tmp0;
1188 case 6: block[ 6] |= tmp0;
1192 case 7: block[ 7] |= tmp0;
1196 case 8: block[ 8] |= tmp0;
1200 case 9: block[ 9] |= tmp0;
1204 case 10: block[10] |= tmp0;
1208 case 11: block[11] |= tmp0;
1212 case 12: block[12] |= tmp0;
1216 case 13: block[13] |= tmp0;
1220 case 14: block[14] |= tmp0;
1224 case 15: block[15] |= tmp0;
1230 u32 new_len = block_len + append_len;
1236 sha256_transform (block, digest);
1240 block[0] = carry[0];
1241 block[1] = carry[1];
1247 static u32 memcat32c (u32 block[16], const u32 block_len, const u32 append[8], const u32 append_len, u32 digest[8])
1249 const u32 mod = block_len & 3;
1250 const u32 div = block_len / 4;
1262 const int offset_minus_4 = 4 - block_len;
1264 tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
1265 tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
1266 tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
1267 tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
1268 tmp4 = amd_bytealign (append[4], append[3], offset_minus_4);
1269 tmp5 = amd_bytealign (append[5], append[4], offset_minus_4);
1270 tmp6 = amd_bytealign (append[6], append[5], offset_minus_4);
1271 tmp7 = amd_bytealign (append[7], append[6], offset_minus_4);
1272 tmp8 = amd_bytealign ( 0, append[7], offset_minus_4);
1287 u32 carry[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
1291 case 0: block[ 0] |= tmp0;
1301 case 1: block[ 1] |= tmp0;
1311 case 2: block[ 2] |= tmp0;
1321 case 3: block[ 3] |= tmp0;
1331 case 4: block[ 4] |= tmp0;
1341 case 5: block[ 5] |= tmp0;
1351 case 6: block[ 6] |= tmp0;
1361 case 7: block[ 7] |= tmp0;
1371 case 8: block[ 8] |= tmp0;
1381 case 9: block[ 9] |= tmp0;
1391 case 10: block[10] |= tmp0;
1401 case 11: block[11] |= tmp0;
1411 case 12: block[12] |= tmp0;
1421 case 13: block[13] |= tmp0;
1431 case 14: block[14] |= tmp0;
1441 case 15: block[15] |= tmp0;
1453 u32 new_len = block_len + append_len;
1459 sha256_transform (block, digest);
1463 block[0] = carry[0];
1464 block[1] = carry[1];
1465 block[2] = carry[2];
1466 block[3] = carry[3];
1467 block[4] = carry[4];
1468 block[5] = carry[5];
1469 block[6] = carry[6];
1470 block[7] = carry[7];
1476 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global seven_zip_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global seven_zip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1482 const u32 gid = get_global_id (0);
1484 if (gid >= gid_max) return;
1487 * algo starts here already
1492 dgst[0] = SHA256M_A;
1493 dgst[1] = SHA256M_B;
1494 dgst[2] = SHA256M_C;
1495 dgst[3] = SHA256M_D;
1496 dgst[4] = SHA256M_E;
1497 dgst[5] = SHA256M_F;
1498 dgst[6] = SHA256M_G;
1499 dgst[7] = SHA256M_H;
1512 tmps[gid].dgst[0] = dgst[0];
1513 tmps[gid].dgst[1] = dgst[1];
1514 tmps[gid].dgst[2] = dgst[2];
1515 tmps[gid].dgst[3] = dgst[3];
1516 tmps[gid].dgst[4] = dgst[4];
1517 tmps[gid].dgst[5] = dgst[5];
1518 tmps[gid].dgst[6] = dgst[6];
1519 tmps[gid].dgst[7] = dgst[7];
1521 tmps[gid].block[ 0] = block[ 0];
1522 tmps[gid].block[ 1] = block[ 1];
1523 tmps[gid].block[ 2] = block[ 2];
1524 tmps[gid].block[ 3] = block[ 3];
1525 tmps[gid].block[ 4] = block[ 4];
1526 tmps[gid].block[ 5] = block[ 5];
1527 tmps[gid].block[ 6] = block[ 6];
1528 tmps[gid].block[ 7] = block[ 7];
1529 tmps[gid].block[ 8] = block[ 8];
1530 tmps[gid].block[ 9] = block[ 9];
1531 tmps[gid].block[10] = block[10];
1532 tmps[gid].block[11] = block[11];
1533 tmps[gid].block[12] = block[12];
1534 tmps[gid].block[13] = block[13];
1535 tmps[gid].block[14] = block[14];
1536 tmps[gid].block[15] = block[15];
1538 tmps[gid].block_len = block_len;
1539 tmps[gid].final_len = final_len;
1542 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global seven_zip_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global seven_zip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1548 const u32 gid = get_global_id (0);
1550 if (gid >= gid_max) return;
1554 pw[0] = pws[gid].i[ 0];
1555 pw[1] = pws[gid].i[ 1];
1556 pw[2] = pws[gid].i[ 2];
1557 pw[3] = pws[gid].i[ 3];
1563 u32 pw_len = pws[gid].pw_len;
1565 make_unicode (&pw[0], &pw[0], &pw[4]);
1575 dgst[0] = tmps[gid].dgst[0];
1576 dgst[1] = tmps[gid].dgst[1];
1577 dgst[2] = tmps[gid].dgst[2];
1578 dgst[3] = tmps[gid].dgst[3];
1579 dgst[4] = tmps[gid].dgst[4];
1580 dgst[5] = tmps[gid].dgst[5];
1581 dgst[6] = tmps[gid].dgst[6];
1582 dgst[7] = tmps[gid].dgst[7];
1586 block[ 0] = tmps[gid].block[ 0];
1587 block[ 1] = tmps[gid].block[ 1];
1588 block[ 2] = tmps[gid].block[ 2];
1589 block[ 3] = tmps[gid].block[ 3];
1590 block[ 4] = tmps[gid].block[ 4];
1591 block[ 5] = tmps[gid].block[ 5];
1592 block[ 6] = tmps[gid].block[ 6];
1593 block[ 7] = tmps[gid].block[ 7];
1594 block[ 8] = tmps[gid].block[ 8];
1595 block[ 9] = tmps[gid].block[ 9];
1596 block[10] = tmps[gid].block[10];
1597 block[11] = tmps[gid].block[11];
1598 block[12] = tmps[gid].block[12];
1599 block[13] = tmps[gid].block[13];
1600 block[14] = tmps[gid].block[14];
1601 block[15] = tmps[gid].block[15];
1603 u32 block_len = tmps[gid].block_len;
1604 u32 final_len = tmps[gid].final_len;
1610 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1617 block_len = memcat32c (block, block_len, pw, pw_len, dgst); final_len += pw_len;
1618 block_len = memcat8c (block, block_len, it, 8, dgst); final_len += 8;
1625 tmps[gid].dgst[0] = dgst[0];
1626 tmps[gid].dgst[1] = dgst[1];
1627 tmps[gid].dgst[2] = dgst[2];
1628 tmps[gid].dgst[3] = dgst[3];
1629 tmps[gid].dgst[4] = dgst[4];
1630 tmps[gid].dgst[5] = dgst[5];
1631 tmps[gid].dgst[6] = dgst[6];
1632 tmps[gid].dgst[7] = dgst[7];
1634 tmps[gid].block[ 0] = block[ 0];
1635 tmps[gid].block[ 1] = block[ 1];
1636 tmps[gid].block[ 2] = block[ 2];
1637 tmps[gid].block[ 3] = block[ 3];
1638 tmps[gid].block[ 4] = block[ 4];
1639 tmps[gid].block[ 5] = block[ 5];
1640 tmps[gid].block[ 6] = block[ 6];
1641 tmps[gid].block[ 7] = block[ 7];
1642 tmps[gid].block[ 8] = block[ 8];
1643 tmps[gid].block[ 9] = block[ 9];
1644 tmps[gid].block[10] = block[10];
1645 tmps[gid].block[11] = block[11];
1646 tmps[gid].block[12] = block[12];
1647 tmps[gid].block[13] = block[13];
1648 tmps[gid].block[14] = block[14];
1649 tmps[gid].block[15] = block[15];
1651 tmps[gid].block_len = block_len;
1652 tmps[gid].final_len = final_len;
1655 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global seven_zip_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global seven_zip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1661 const u32 gid = get_global_id (0);
1662 const u32 lid = get_local_id (0);
1668 const u32 lid4 = lid * 4;
1670 __local u32 s_td0[256];
1671 __local u32 s_td1[256];
1672 __local u32 s_td2[256];
1673 __local u32 s_td3[256];
1674 __local u32 s_td4[256];
1676 __local u32 s_te0[256];
1677 __local u32 s_te1[256];
1678 __local u32 s_te2[256];
1679 __local u32 s_te3[256];
1680 __local u32 s_te4[256];
1682 s_td0[lid4 + 0] = td0[lid4 + 0];
1683 s_td0[lid4 + 1] = td0[lid4 + 1];
1684 s_td0[lid4 + 2] = td0[lid4 + 2];
1685 s_td0[lid4 + 3] = td0[lid4 + 3];
1687 s_td1[lid4 + 0] = td1[lid4 + 0];
1688 s_td1[lid4 + 1] = td1[lid4 + 1];
1689 s_td1[lid4 + 2] = td1[lid4 + 2];
1690 s_td1[lid4 + 3] = td1[lid4 + 3];
1692 s_td2[lid4 + 0] = td2[lid4 + 0];
1693 s_td2[lid4 + 1] = td2[lid4 + 1];
1694 s_td2[lid4 + 2] = td2[lid4 + 2];
1695 s_td2[lid4 + 3] = td2[lid4 + 3];
1697 s_td3[lid4 + 0] = td3[lid4 + 0];
1698 s_td3[lid4 + 1] = td3[lid4 + 1];
1699 s_td3[lid4 + 2] = td3[lid4 + 2];
1700 s_td3[lid4 + 3] = td3[lid4 + 3];
1702 s_td4[lid4 + 0] = td4[lid4 + 0];
1703 s_td4[lid4 + 1] = td4[lid4 + 1];
1704 s_td4[lid4 + 2] = td4[lid4 + 2];
1705 s_td4[lid4 + 3] = td4[lid4 + 3];
1707 s_te0[lid4 + 0] = te0[lid4 + 0];
1708 s_te0[lid4 + 1] = te0[lid4 + 1];
1709 s_te0[lid4 + 2] = te0[lid4 + 2];
1710 s_te0[lid4 + 3] = te0[lid4 + 3];
1712 s_te1[lid4 + 0] = te1[lid4 + 0];
1713 s_te1[lid4 + 1] = te1[lid4 + 1];
1714 s_te1[lid4 + 2] = te1[lid4 + 2];
1715 s_te1[lid4 + 3] = te1[lid4 + 3];
1717 s_te2[lid4 + 0] = te2[lid4 + 0];
1718 s_te2[lid4 + 1] = te2[lid4 + 1];
1719 s_te2[lid4 + 2] = te2[lid4 + 2];
1720 s_te2[lid4 + 3] = te2[lid4 + 3];
1722 s_te3[lid4 + 0] = te3[lid4 + 0];
1723 s_te3[lid4 + 1] = te3[lid4 + 1];
1724 s_te3[lid4 + 2] = te3[lid4 + 2];
1725 s_te3[lid4 + 3] = te3[lid4 + 3];
1727 s_te4[lid4 + 0] = te4[lid4 + 0];
1728 s_te4[lid4 + 1] = te4[lid4 + 1];
1729 s_te4[lid4 + 2] = te4[lid4 + 2];
1730 s_te4[lid4 + 3] = te4[lid4 + 3];
1732 barrier (CLK_LOCAL_MEM_FENCE);
1734 if (gid >= gid_max) return;
1742 dgst[0] = tmps[gid].dgst[0];
1743 dgst[1] = tmps[gid].dgst[1];
1744 dgst[2] = tmps[gid].dgst[2];
1745 dgst[3] = tmps[gid].dgst[3];
1746 dgst[4] = tmps[gid].dgst[4];
1747 dgst[5] = tmps[gid].dgst[5];
1748 dgst[6] = tmps[gid].dgst[6];
1749 dgst[7] = tmps[gid].dgst[7];
1753 block[ 0] = tmps[gid].block[ 0];
1754 block[ 1] = tmps[gid].block[ 1];
1755 block[ 2] = tmps[gid].block[ 2];
1756 block[ 3] = tmps[gid].block[ 3];
1757 block[ 4] = tmps[gid].block[ 4];
1758 block[ 5] = tmps[gid].block[ 5];
1759 block[ 6] = tmps[gid].block[ 6];
1760 block[ 7] = tmps[gid].block[ 7];
1761 block[ 8] = tmps[gid].block[ 8];
1762 block[ 9] = tmps[gid].block[ 9];
1763 block[10] = tmps[gid].block[10];
1764 block[11] = tmps[gid].block[11];
1765 block[12] = tmps[gid].block[12];
1766 block[13] = tmps[gid].block[13];
1767 block[14] = tmps[gid].block[14];
1768 block[15] = tmps[gid].block[15];
1770 u32 block_len = tmps[gid].block_len;
1771 u32 final_len = tmps[gid].final_len;
1773 append_0x80_4x4 (block, block_len);
1775 if (block_len >= 56)
1777 sha256_transform (block, dgst);
1782 block[15] = swap_workaround (final_len * 8);
1784 sha256_transform (block, dgst);
1787 * final key operations
1792 iv[0] = esalt_bufs[salt_pos].iv_buf[0];
1793 iv[1] = esalt_bufs[salt_pos].iv_buf[1];
1794 iv[2] = esalt_bufs[salt_pos].iv_buf[2];
1795 iv[3] = esalt_bufs[salt_pos].iv_buf[3];
1812 AES256_ExpandKey (ukey, rk, s_te0, s_te1, s_te2, s_te3, s_te4);
1814 AES256_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1818 int data_len = esalt_bufs[salt_pos].data_len;
1819 int unpack_size = esalt_bufs[salt_pos].unpack_size;
1824 for (i = 0, j = 0; i < data_len - 16; i += 16, j += 4)
1828 data[0] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 0]);
1829 data[1] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 1]);
1830 data[2] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 2]);
1831 data[3] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 3]);
1835 AES256_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4);
1847 out[0] = swap_workaround (out[0]);
1848 out[1] = swap_workaround (out[1]);
1849 out[2] = swap_workaround (out[2]);
1850 out[3] = swap_workaround (out[3]);
1852 crc = crc32 (out, 16, crc);
1857 data[0] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 0]);
1858 data[1] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 1]);
1859 data[2] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 2]);
1860 data[3] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 3]);
1864 AES256_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4);
1876 out[0] = swap_workaround (out[0]);
1877 out[1] = swap_workaround (out[1]);
1878 out[2] = swap_workaround (out[2]);
1879 out[3] = swap_workaround (out[3]);
1881 const u32 margin = data_len - unpack_size;
1883 const u32 left = 16 - margin;
1885 crc = crc32 (out, left, crc);
1887 // use padding attack in that case
1893 case 15: out[0] &= 0xffffff00;
1895 case 14: out[0] &= 0xffff0000;
1897 case 13: out[0] &= 0xff000000;
1899 case 12: out[0] = 0;
1901 case 11: out[0] = 0;
1902 out[1] &= 0xffffff00;
1904 case 10: out[0] = 0;
1905 out[1] &= 0xffff0000;
1908 out[1] &= 0xff000000;
1915 out[2] &= 0xffffff00;
1919 out[2] &= 0xffff0000;
1923 out[2] &= 0xff000000;
1932 out[3] &= 0xffffff00;
1937 out[3] &= 0xffff0000;
1942 out[3] &= 0xff000000;
1946 if ((out[0] == 0) && (out[1] == 0) && (out[2] == 0) && (out[3] == 0))
1948 mark_hash (plains_buf, hashes_shown, digests_offset + 0, gid, 0);
1950 d_return_buf[lid] = 1;