2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
28 __device__ __constant__ u32 te0[256] =
30 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
31 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
32 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
33 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
34 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
35 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
36 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
37 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
38 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
39 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
40 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
41 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
42 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
43 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
44 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
45 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
46 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
47 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
48 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
49 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
50 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
51 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
52 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
53 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
54 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
55 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
56 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
57 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
58 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
59 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
60 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
61 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
62 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
63 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
64 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
65 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
66 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
67 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
68 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
69 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
70 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
71 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
72 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
73 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
74 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
75 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
76 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
77 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
78 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
79 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
80 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
81 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
82 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
83 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
84 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
85 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
86 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
87 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
88 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
89 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
90 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
91 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
92 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
93 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
96 __device__ __constant__ u32 te1[256] =
98 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
99 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
100 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
101 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
102 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
103 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
104 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
105 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
106 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
107 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
108 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
109 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
110 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
111 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
112 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
113 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
114 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
115 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
116 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
117 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
118 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
119 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
120 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
121 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
122 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
123 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
124 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
125 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
126 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
127 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
128 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
129 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
130 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
131 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
132 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
133 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
134 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
135 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
136 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
137 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
138 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
139 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
140 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
141 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
142 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
143 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
144 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
145 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
146 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
147 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
148 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
149 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
150 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
151 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
152 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
153 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
154 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
155 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
156 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
157 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
158 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
159 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
160 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
161 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
164 __device__ __constant__ u32 te2[256] =
166 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
167 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
168 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
169 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
170 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
171 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
172 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
173 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
174 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
175 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
176 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
177 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
178 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
179 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
180 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
181 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
182 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
183 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
184 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
185 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
186 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
187 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
188 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
189 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
190 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
191 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
192 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
193 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
194 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
195 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
196 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
197 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
198 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
199 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
200 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
201 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
202 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
203 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
204 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
205 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
206 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
207 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
208 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
209 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
210 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
211 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
212 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
213 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
214 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
215 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
216 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
217 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
218 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
219 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
220 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
221 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
222 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
223 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
224 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
225 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
226 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
227 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
228 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
229 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
232 __device__ __constant__ u32 te3[256] =
234 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
235 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
236 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
237 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
238 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
239 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
240 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
241 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
242 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
243 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
244 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
245 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
246 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
247 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
248 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
249 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
250 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
251 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
252 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
253 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
254 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
255 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
256 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
257 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
258 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
259 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
260 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
261 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
262 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
263 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
264 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
265 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
266 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
267 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
268 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
269 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
270 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
271 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
272 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
273 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
274 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
275 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
276 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
277 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
278 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
279 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
280 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
281 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
282 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
283 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
284 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
285 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
286 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
287 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
288 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
289 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
290 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
291 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
292 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
293 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
294 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
295 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
296 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
297 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
300 __device__ __constant__ u32 te4[256] =
302 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
303 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
304 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
305 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
306 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
307 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
308 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
309 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
310 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
311 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
312 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
313 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
314 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
315 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
316 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
317 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
318 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
319 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
320 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
321 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
322 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
323 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
324 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
325 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
326 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
327 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
328 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
329 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
330 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
331 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
332 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
333 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
334 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
335 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
336 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
337 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
338 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
339 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
340 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
341 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
342 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
343 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
344 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
345 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
346 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
347 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
348 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
349 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
350 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
351 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
352 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
353 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
354 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
355 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
356 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
357 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
358 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
359 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
360 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
361 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
362 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
363 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
364 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
365 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
368 __device__ __constant__ u32 td0[256] =
370 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
371 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
372 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
373 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
374 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
375 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
376 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
377 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
378 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
379 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
380 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
381 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
382 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
383 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
384 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
385 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
386 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
387 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
388 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
389 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
390 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
391 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
392 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
393 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
394 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
395 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
396 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
397 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
398 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
399 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
400 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
401 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
402 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
403 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
404 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
405 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
406 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
407 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
408 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
409 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
410 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
411 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
412 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
413 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
414 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
415 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
416 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
417 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
418 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
419 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
420 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
421 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
422 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
423 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
424 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
425 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
426 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
427 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
428 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
429 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
430 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
431 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
432 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
433 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
436 __device__ __constant__ u32 td1[256] =
438 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
439 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
440 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
441 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
442 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
443 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
444 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
445 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
446 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
447 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
448 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
449 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
450 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
451 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
452 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
453 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
454 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
455 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
456 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
457 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
458 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
459 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
460 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
461 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
462 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
463 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
464 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
465 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
466 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
467 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
468 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
469 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
470 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
471 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
472 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
473 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
474 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
475 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
476 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
477 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
478 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
479 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
480 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
481 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
482 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
483 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
484 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
485 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
486 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
487 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
488 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
489 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
490 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
491 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
492 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
493 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
494 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
495 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
496 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
497 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
498 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
499 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
500 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
501 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
504 __device__ __constant__ u32 td2[256] =
506 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
507 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
508 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
509 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
510 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
511 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
512 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
513 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
514 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
515 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
516 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
517 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
518 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
519 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
520 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
521 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
522 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
523 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
524 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
525 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
526 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
527 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
528 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
529 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
530 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
531 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
532 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
533 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
534 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
535 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
536 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
537 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
538 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
539 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
540 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
541 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
542 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
543 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
544 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
545 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
546 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
547 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
548 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
549 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
550 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
551 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
552 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
553 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
554 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
555 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
556 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
557 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
558 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
559 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
560 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
561 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
562 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
563 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
564 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
565 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
566 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
567 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
568 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
569 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
572 __device__ __constant__ u32 td3[256] =
574 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
575 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
576 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
577 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
578 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
579 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
580 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
581 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
582 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
583 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
584 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
585 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
586 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
587 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
588 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
589 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
590 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
591 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
592 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
593 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
594 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
595 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
596 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
597 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
598 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
599 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
600 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
601 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
602 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
603 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
604 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
605 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
606 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
607 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
608 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
609 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
610 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
611 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
612 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
613 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
614 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
615 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
616 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
617 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
618 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
619 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
620 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
621 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
622 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
623 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
624 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
625 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
626 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
627 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
628 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
629 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
630 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
631 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
632 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
633 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
634 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
635 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
636 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
637 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
640 __device__ __constant__ u32 td4[256] =
642 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
643 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
644 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
645 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
646 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
647 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
648 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
649 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
650 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
651 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
652 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
653 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
654 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
655 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
656 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
657 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
658 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
659 0x86868686, 0x68686868, 0x98989898, 0x16161616,
660 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
661 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
662 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
663 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
664 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
665 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
666 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
667 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
668 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
669 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
670 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
671 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
672 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
673 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
674 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
675 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
676 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
677 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
678 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
679 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
680 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
681 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
682 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
683 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
684 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
685 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
686 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
687 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
688 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
689 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
690 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
691 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
692 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
693 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
694 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
695 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
696 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
697 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
698 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
699 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
700 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
701 0x83838383, 0x53535353, 0x99999999, 0x61616161,
702 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
703 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
704 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
705 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
708 __device__ __constant__ u32 rcon[] =
710 0x01000000, 0x02000000, 0x04000000, 0x08000000,
711 0x10000000, 0x20000000, 0x40000000, 0x80000000,
712 0x1b000000, 0x36000000,
715 __device__ static void AES128_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256])
723 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
725 u32 temp = rek[j + 3];
727 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
728 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
729 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
730 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
732 rek[j + 4] = rek[j + 0]
736 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
737 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
738 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
742 __device__ static void AES128_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256])
744 for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
748 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
749 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
750 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
751 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
754 for (u32 i = 1, j = 4; i < 10; i += 1, j += 4)
757 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
758 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
759 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
760 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
763 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
764 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
765 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
766 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
769 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
770 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
771 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
772 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
775 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
776 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
777 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
778 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
782 __device__ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256])
784 u32 s0 = in[0] ^ rdk[0];
785 u32 s1 = in[1] ^ rdk[1];
786 u32 s2 = in[2] ^ rdk[2];
787 u32 s3 = in[3] ^ rdk[3];
794 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
795 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
796 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
797 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
798 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
799 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
800 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
801 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
802 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
803 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
804 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
805 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
806 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
807 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
808 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
809 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
810 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
811 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
812 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
813 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
814 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
815 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
816 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
817 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
818 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
819 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
820 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
821 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
822 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
823 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
824 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
825 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
826 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
827 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
828 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
829 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
831 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
832 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
833 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
834 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
837 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
838 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
839 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
840 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
843 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
844 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
845 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
846 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
849 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
850 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
851 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
852 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
856 __device__ static void AES256_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256])
877 u32 temp = rek[j + 7];
879 rek[j + 8] = rek[j + 0]
880 ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000)
881 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
882 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
883 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff)
886 rek[j + 9] = rek[j + 1] ^ rek[j + 8];
887 rek[j + 10] = rek[j + 2] ^ rek[j + 9];
888 rek[j + 11] = rek[j + 3] ^ rek[j + 10];
898 rek[j + 12] = rek[j + 4]
899 ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000)
900 ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000)
901 ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00)
902 ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff);
904 rek[j + 13] = rek[j + 5] ^ rek[j + 12];
905 rek[j + 14] = rek[j + 6] ^ rek[j + 13];
906 rek[j + 15] = rek[j + 7] ^ rek[j + 14];
912 __device__ static void AES256_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256])
914 for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
918 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
919 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
920 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
921 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
924 for (u32 i = 1, j = 4; i < 14; i += 1, j += 4)
927 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
928 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
929 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
930 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
933 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
934 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
935 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
936 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
939 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
940 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
941 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
942 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
945 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
946 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
947 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
948 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
952 __device__ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256])
954 u32 s0 = in[0] ^ rdk[0];
955 u32 s1 = in[1] ^ rdk[1];
956 u32 s2 = in[2] ^ rdk[2];
957 u32 s3 = in[3] ^ rdk[3];
964 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
965 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
966 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
967 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
968 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
969 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
970 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
971 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
972 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
973 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
974 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
975 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
976 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
977 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
978 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
979 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
980 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
981 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
982 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
983 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
984 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
985 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
986 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
987 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
988 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
989 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
990 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
991 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
992 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
993 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
994 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
995 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
996 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
997 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
998 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
999 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
1000 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40];
1001 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41];
1002 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42];
1003 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43];
1004 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44];
1005 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45];
1006 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46];
1007 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47];
1008 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48];
1009 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49];
1010 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50];
1011 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51];
1012 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52];
1013 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53];
1014 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54];
1015 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55];
1017 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
1018 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
1019 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
1020 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
1023 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
1024 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
1025 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
1026 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
1029 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
1030 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
1031 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
1032 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
1035 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
1036 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
1037 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
1038 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
1042 __device__ static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256])
1044 u32 s0 = in[0] ^ rek[0];
1045 u32 s1 = in[1] ^ rek[1];
1046 u32 s2 = in[2] ^ rek[2];
1047 u32 s3 = in[3] ^ rek[3];
1054 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
1055 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
1056 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
1057 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
1058 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
1059 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
1060 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
1061 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
1062 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
1063 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
1064 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
1065 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
1066 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
1067 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
1068 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
1069 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
1070 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
1071 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
1072 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
1073 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
1074 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
1075 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
1076 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
1077 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
1078 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
1079 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
1080 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
1081 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
1082 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
1083 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
1084 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
1085 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
1086 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
1087 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
1088 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
1089 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
1090 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40];
1091 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41];
1092 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42];
1093 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43];
1094 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44];
1095 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45];
1096 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46];
1097 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47];
1098 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48];
1099 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49];
1100 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50];
1101 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51];
1102 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52];
1103 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53];
1104 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54];
1105 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55];
1107 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
1108 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
1109 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
1110 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
1113 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
1114 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
1115 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
1116 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
1119 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
1120 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
1121 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
1122 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
1125 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
1126 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
1127 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
1128 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
1132 __device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8])
1160 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
1161 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
1162 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
1163 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
1164 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
1165 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
1166 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
1167 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
1168 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
1169 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
1170 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
1171 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
1172 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
1173 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
1174 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
1175 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
1176 w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
1177 w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
1178 w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
1179 w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
1180 w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
1181 w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
1182 w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
1183 w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
1184 w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
1185 w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
1186 wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
1187 wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
1188 wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
1189 wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
1190 we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
1191 wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
1192 w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
1193 w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
1194 w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
1195 w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
1196 w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
1197 w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
1198 w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
1199 w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
1200 w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
1201 w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
1202 wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
1203 wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
1204 wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
1205 wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
1206 we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
1207 wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
1208 w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
1209 w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
1210 w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
1211 w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
1212 w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
1213 w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
1214 w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
1215 w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
1216 w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
1217 w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
1218 wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
1219 wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
1220 wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
1221 wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
1222 we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
1223 wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
1235 __device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5])
1263 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
1264 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
1265 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
1266 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
1267 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
1268 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
1269 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
1270 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
1271 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
1272 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
1273 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
1274 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
1275 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
1276 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
1277 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
1278 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
1279 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
1280 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
1281 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
1282 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
1287 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
1288 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
1289 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
1290 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
1291 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
1292 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
1293 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
1294 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
1295 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
1296 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
1297 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
1298 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
1299 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
1300 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
1301 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
1302 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
1303 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
1304 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
1305 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
1306 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
1311 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
1312 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
1313 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
1314 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
1315 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
1316 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
1317 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
1318 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
1319 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
1320 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
1321 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
1322 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
1323 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
1324 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
1325 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
1326 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
1327 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
1328 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
1329 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
1330 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
1335 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
1336 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
1337 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
1338 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
1339 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
1340 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
1341 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
1342 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
1343 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
1344 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
1345 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
1346 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
1347 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
1348 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
1349 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
1350 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
1351 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
1352 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
1353 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
1354 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
1363 __device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5])
1365 w0[0] = w0[0] ^ 0x36363636;
1366 w0[1] = w0[1] ^ 0x36363636;
1367 w0[2] = w0[2] ^ 0x36363636;
1368 w0[3] = w0[3] ^ 0x36363636;
1369 w1[0] = w1[0] ^ 0x36363636;
1370 w1[1] = w1[1] ^ 0x36363636;
1371 w1[2] = w1[2] ^ 0x36363636;
1372 w1[3] = w1[3] ^ 0x36363636;
1373 w2[0] = w2[0] ^ 0x36363636;
1374 w2[1] = w2[1] ^ 0x36363636;
1375 w2[2] = w2[2] ^ 0x36363636;
1376 w2[3] = w2[3] ^ 0x36363636;
1377 w3[0] = w3[0] ^ 0x36363636;
1378 w3[1] = w3[1] ^ 0x36363636;
1379 w3[2] = w3[2] ^ 0x36363636;
1380 w3[3] = w3[3] ^ 0x36363636;
1388 sha1_transform (w0, w1, w2, w3, ipad);
1390 w0[0] = w0[0] ^ 0x6a6a6a6a;
1391 w0[1] = w0[1] ^ 0x6a6a6a6a;
1392 w0[2] = w0[2] ^ 0x6a6a6a6a;
1393 w0[3] = w0[3] ^ 0x6a6a6a6a;
1394 w1[0] = w1[0] ^ 0x6a6a6a6a;
1395 w1[1] = w1[1] ^ 0x6a6a6a6a;
1396 w1[2] = w1[2] ^ 0x6a6a6a6a;
1397 w1[3] = w1[3] ^ 0x6a6a6a6a;
1398 w2[0] = w2[0] ^ 0x6a6a6a6a;
1399 w2[1] = w2[1] ^ 0x6a6a6a6a;
1400 w2[2] = w2[2] ^ 0x6a6a6a6a;
1401 w2[3] = w2[3] ^ 0x6a6a6a6a;
1402 w3[0] = w3[0] ^ 0x6a6a6a6a;
1403 w3[1] = w3[1] ^ 0x6a6a6a6a;
1404 w3[2] = w3[2] ^ 0x6a6a6a6a;
1405 w3[3] = w3[3] ^ 0x6a6a6a6a;
1413 sha1_transform (w0, w1, w2, w3, opad);
1416 __device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5])
1418 digest[0] = ipad[0];
1419 digest[1] = ipad[1];
1420 digest[2] = ipad[2];
1421 digest[3] = ipad[3];
1422 digest[4] = ipad[4];
1424 sha1_transform (w0, w1, w2, w3, digest);
1441 w3[3] = (64 + 20) * 8;
1443 digest[0] = opad[0];
1444 digest[1] = opad[1];
1445 digest[2] = opad[2];
1446 digest[3] = opad[3];
1447 digest[4] = opad[4];
1449 sha1_transform (w0, w1, w2, w3, digest);
1452 extern "C" __global__ void __launch_bounds__ (256, 1) m08800_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, androidfde_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const androidfde_t *androidfde_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1458 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1460 if (gid >= gid_max) return;
1464 w0[0] = pws[gid].i[ 0];
1465 w0[1] = pws[gid].i[ 1];
1466 w0[2] = pws[gid].i[ 2];
1467 w0[3] = pws[gid].i[ 3];
1471 w1[0] = pws[gid].i[ 4];
1472 w1[1] = pws[gid].i[ 5];
1473 w1[2] = pws[gid].i[ 6];
1474 w1[3] = pws[gid].i[ 7];
1478 w2[0] = pws[gid].i[ 8];
1479 w2[1] = pws[gid].i[ 9];
1480 w2[2] = pws[gid].i[10];
1481 w2[3] = pws[gid].i[11];
1485 w3[0] = pws[gid].i[12];
1486 w3[1] = pws[gid].i[13];
1487 w3[2] = pws[gid].i[14];
1488 w3[3] = pws[gid].i[15];
1498 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1499 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1500 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
1501 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
1507 w0[0] = swap_workaround (w0[0]);
1508 w0[1] = swap_workaround (w0[1]);
1509 w0[2] = swap_workaround (w0[2]);
1510 w0[3] = swap_workaround (w0[3]);
1511 w1[0] = swap_workaround (w1[0]);
1512 w1[1] = swap_workaround (w1[1]);
1513 w1[2] = swap_workaround (w1[2]);
1514 w1[3] = swap_workaround (w1[3]);
1515 w2[0] = swap_workaround (w2[0]);
1516 w2[1] = swap_workaround (w2[1]);
1517 w2[2] = swap_workaround (w2[2]);
1518 w2[3] = swap_workaround (w2[3]);
1519 w3[0] = swap_workaround (w3[0]);
1520 w3[1] = swap_workaround (w3[1]);
1521 w3[2] = swap_workaround (w3[2]);
1522 w3[3] = swap_workaround (w3[3]);
1527 hmac_sha1_pad (w0, w1, w2, w3, ipad, opad);
1529 tmps[gid].ipad[0] = ipad[0];
1530 tmps[gid].ipad[1] = ipad[1];
1531 tmps[gid].ipad[2] = ipad[2];
1532 tmps[gid].ipad[3] = ipad[3];
1533 tmps[gid].ipad[4] = ipad[4];
1535 tmps[gid].opad[0] = opad[0];
1536 tmps[gid].opad[1] = opad[1];
1537 tmps[gid].opad[2] = opad[2];
1538 tmps[gid].opad[3] = opad[3];
1539 tmps[gid].opad[4] = opad[4];
1541 for (u32 i = 0, j = 1; i < 8; i += 5, j += 1)
1543 w0[0] = salt_buf[0];
1544 w0[1] = salt_buf[1];
1545 w0[2] = salt_buf[2];
1546 w0[3] = salt_buf[3];
1561 append_0x01_3 (w0, w1, w2, salt_len + 3);
1563 append_0x02_3 (w0, w1, w2, salt_len + 3);
1565 append_0x80_3 (w0, w1, w2, salt_len + 4);
1567 w0[0] = swap_workaround (w0[0]);
1568 w0[1] = swap_workaround (w0[1]);
1569 w0[2] = swap_workaround (w0[2]);
1570 w0[3] = swap_workaround (w0[3]);
1571 w1[0] = swap_workaround (w1[0]);
1572 w1[1] = swap_workaround (w1[1]);
1573 w1[2] = swap_workaround (w1[2]);
1574 w1[3] = swap_workaround (w1[3]);
1575 w2[0] = swap_workaround (w2[0]);
1576 w2[1] = swap_workaround (w2[1]);
1582 w3[3] = (64 + salt_len + 4) * 8;
1586 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst);
1588 tmps[gid].dgst[i + 0] = dgst[0];
1589 tmps[gid].dgst[i + 1] = dgst[1];
1590 tmps[gid].dgst[i + 2] = dgst[2];
1591 tmps[gid].dgst[i + 3] = dgst[3];
1592 tmps[gid].dgst[i + 4] = dgst[4];
1594 tmps[gid].out[i + 0] = dgst[0];
1595 tmps[gid].out[i + 1] = dgst[1];
1596 tmps[gid].out[i + 2] = dgst[2];
1597 tmps[gid].out[i + 3] = dgst[3];
1598 tmps[gid].out[i + 4] = dgst[4];
1602 extern "C" __global__ void __launch_bounds__ (256, 1) m08800_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, androidfde_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const androidfde_t *androidfde_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1604 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1606 if (gid >= gid_max) return;
1611 ipad[0] = tmps[gid].ipad[0];
1612 ipad[1] = tmps[gid].ipad[1];
1613 ipad[2] = tmps[gid].ipad[2];
1614 ipad[3] = tmps[gid].ipad[3];
1615 ipad[4] = tmps[gid].ipad[4];
1617 opad[0] = tmps[gid].opad[0];
1618 opad[1] = tmps[gid].opad[1];
1619 opad[2] = tmps[gid].opad[2];
1620 opad[3] = tmps[gid].opad[3];
1621 opad[4] = tmps[gid].opad[4];
1623 for (u32 i = 0; i < 8; i += 5)
1628 dgst[0] = tmps[gid].dgst[i + 0];
1629 dgst[1] = tmps[gid].dgst[i + 1];
1630 dgst[2] = tmps[gid].dgst[i + 2];
1631 dgst[3] = tmps[gid].dgst[i + 3];
1632 dgst[4] = tmps[gid].dgst[i + 4];
1634 out[0] = tmps[gid].out[i + 0];
1635 out[1] = tmps[gid].out[i + 1];
1636 out[2] = tmps[gid].out[i + 2];
1637 out[3] = tmps[gid].out[i + 3];
1638 out[4] = tmps[gid].out[i + 4];
1640 for (u32 j = 0; j < loop_cnt; j++)
1662 w3[3] = (64 + 20) * 8;
1664 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst);
1673 tmps[gid].dgst[i + 0] = dgst[0];
1674 tmps[gid].dgst[i + 1] = dgst[1];
1675 tmps[gid].dgst[i + 2] = dgst[2];
1676 tmps[gid].dgst[i + 3] = dgst[3];
1677 tmps[gid].dgst[i + 4] = dgst[4];
1679 tmps[gid].out[i + 0] = out[0];
1680 tmps[gid].out[i + 1] = out[1];
1681 tmps[gid].out[i + 2] = out[2];
1682 tmps[gid].out[i + 3] = out[3];
1683 tmps[gid].out[i + 4] = out[4];
1687 extern "C" __global__ void __launch_bounds__ (256, 1) m08800_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, androidfde_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const androidfde_t *androidfde_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1693 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1694 const u32 lid = threadIdx.x;
1707 __shared__ u32 s_td0[256];
1708 __shared__ u32 s_td1[256];
1709 __shared__ u32 s_td2[256];
1710 __shared__ u32 s_td3[256];
1711 __shared__ u32 s_td4[256];
1713 __shared__ u32 s_te0[256];
1714 __shared__ u32 s_te1[256];
1715 __shared__ u32 s_te2[256];
1716 __shared__ u32 s_te3[256];
1717 __shared__ u32 s_te4[256];
1719 s_td0[lid] = td0[lid];
1720 s_td1[lid] = td1[lid];
1721 s_td2[lid] = td2[lid];
1722 s_td3[lid] = td3[lid];
1723 s_td4[lid] = td4[lid];
1725 s_te0[lid] = te0[lid];
1726 s_te1[lid] = te1[lid];
1727 s_te2[lid] = te2[lid];
1728 s_te3[lid] = te3[lid];
1729 s_te4[lid] = te4[lid];
1733 if (gid >= gid_max) return;
1741 ukeyx[0] = tmps[gid].out[0];
1742 ukeyx[1] = tmps[gid].out[1];
1743 ukeyx[2] = tmps[gid].out[2];
1744 ukeyx[3] = tmps[gid].out[3];
1759 data[0] = digests_buf[digests_offset].digest_buf[0];
1760 data[1] = digests_buf[digests_offset].digest_buf[1];
1761 data[2] = digests_buf[digests_offset].digest_buf[2];
1762 data[3] = digests_buf[digests_offset].digest_buf[3];
1764 iv[0] = tmps[gid].out[4];
1765 iv[1] = tmps[gid].out[5];
1766 iv[2] = tmps[gid].out[6];
1767 iv[3] = tmps[gid].out[7];
1769 AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1771 for (u32 i = 0; i < 44; i++) rdk[i] = rek[i];
1773 AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1775 AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1783 * section AES_cbc_essiv() starting
1786 // 1. start with simple sha256_transform
1790 essivhash[0] = SHA256M_A;
1791 essivhash[1] = SHA256M_B;
1792 essivhash[2] = SHA256M_C;
1793 essivhash[3] = SHA256M_D;
1794 essivhash[4] = SHA256M_E;
1795 essivhash[5] = SHA256M_F;
1796 essivhash[6] = SHA256M_G;
1797 essivhash[7] = SHA256M_H;
1821 sha256_transform (w0, w1, w2, w3, essivhash);
1826 // 2. generate essiv based on startsector -- each 512 byte is one sector
1833 ukeyx[0] = essivhash[0];
1834 ukeyx[1] = essivhash[1];
1835 ukeyx[2] = essivhash[2];
1836 ukeyx[3] = essivhash[3];
1837 ukeyx[4] = essivhash[4];
1838 ukeyx[5] = essivhash[5];
1839 ukeyx[6] = essivhash[6];
1840 ukeyx[7] = essivhash[7];
1842 AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1846 AES256_encrypt (data, essiv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1848 // 3. decrypt real data, xor essiv afterwards
1850 data[0] = androidfde_bufs[salt_pos].data[0];
1851 data[1] = androidfde_bufs[salt_pos].data[1];
1852 data[2] = androidfde_bufs[salt_pos].data[2];
1853 data[3] = androidfde_bufs[salt_pos].data[3];
1869 AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1871 for (u32 i = 0; i < 44; i++) rdk[i] = rek[i];
1873 AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1875 AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1877 u32x r0 = out[0] ^ iv[0];
1878 u32x r1 = out[1] ^ iv[1];
1879 u32x r2 = out[2] ^ iv[2];
1880 u32x r3 = out[3] ^ iv[3];
1882 // rotate 3 byte (static in fat!)
1884 r0 = r1 << 8 | r0 >> 24;
1885 r1 = r2 << 8 | r1 >> 24;
1888 if ((r0 == 0x4f44534d) && (r1 == 0x302e3553))
1890 mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0);
1892 d_return_buf[lid] = 1;
1901 // 2. generate essiv based on startsector -- each 512 byte is one sector
1903 // not needed because of cbc mode -- implementation flaw !!. first 16 byte are not interessting
1910 // 3. decrypt real data
1921 AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1923 for (u32 i = 0; i < 44; i++) rdk[i] = rek[i];
1925 AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1927 for (u32 i = 4; i < 16; i += 4)
1929 data[0] = androidfde_bufs[salt_pos].data[256 + i + 0];
1930 data[1] = androidfde_bufs[salt_pos].data[256 + i + 1];
1931 data[2] = androidfde_bufs[salt_pos].data[256 + i + 2];
1932 data[3] = androidfde_bufs[salt_pos].data[256 + i + 3];
1934 iv[0] = androidfde_bufs[salt_pos].data[256 + i + 0 - 4];
1935 iv[1] = androidfde_bufs[salt_pos].data[256 + i + 1 - 4];
1936 iv[2] = androidfde_bufs[salt_pos].data[256 + i + 2 - 4];
1937 iv[3] = androidfde_bufs[salt_pos].data[256 + i + 3 - 4];
1939 AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1941 r[i + 0] = out[0] ^ iv[0];
1942 r[i + 1] = out[1] ^ iv[1];
1943 r[i + 2] = out[2] ^ iv[2];
1944 r[i + 3] = out[3] ^ iv[3];
1947 // we need just a few swapped, because we do not access the others
1948 r[ 5] = swap_workaround (r[ 5]);
1949 r[ 6] = swap_workaround (r[ 6]);
1950 r[14] = swap_workaround (r[14]);
1952 // superblock not on id 0 or 1
1953 // assumes max block size is 32MiB
1954 // has EXT2_SUPER_MAGIC
1956 if ((r[5] < 2) && (r[6] < 16) && ((r[14] & 0xffff) == 0xEF53))
1958 mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0);
1960 d_return_buf[lid] = 1;