2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
33 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
37 __device__ __constant__ u32 te0[256] =
39 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
40 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
41 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
42 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
43 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
44 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
45 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
46 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
47 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
48 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
49 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
50 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
51 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
52 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
53 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
54 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
55 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
56 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
57 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
58 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
59 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
60 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
61 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
62 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
63 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
64 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
65 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
66 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
67 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
68 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
69 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
70 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
71 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
72 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
73 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
74 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
75 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
76 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
77 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
78 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
79 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
80 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
81 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
82 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
83 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
84 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
85 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
86 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
87 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
88 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
89 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
90 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
91 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
92 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
93 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
94 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
95 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
96 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
97 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
98 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
99 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
100 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
101 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
102 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
105 __device__ __constant__ u32 te1[256] =
107 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
108 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
109 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
110 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
111 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
112 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
113 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
114 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
115 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
116 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
117 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
118 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
119 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
120 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
121 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
122 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
123 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
124 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
125 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
126 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
127 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
128 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
129 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
130 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
131 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
132 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
133 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
134 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
135 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
136 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
137 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
138 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
139 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
140 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
141 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
142 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
143 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
144 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
145 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
146 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
147 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
148 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
149 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
150 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
151 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
152 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
153 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
154 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
155 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
156 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
157 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
158 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
159 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
160 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
161 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
162 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
163 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
164 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
165 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
166 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
167 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
168 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
169 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
170 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
173 __device__ __constant__ u32 te2[256] =
175 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
176 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
177 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
178 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
179 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
180 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
181 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
182 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
183 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
184 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
185 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
186 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
187 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
188 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
189 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
190 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
191 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
192 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
193 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
194 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
195 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
196 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
197 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
198 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
199 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
200 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
201 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
202 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
203 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
204 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
205 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
206 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
207 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
208 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
209 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
210 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
211 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
212 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
213 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
214 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
215 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
216 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
217 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
218 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
219 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
220 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
221 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
222 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
223 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
224 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
225 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
226 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
227 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
228 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
229 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
230 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
231 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
232 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
233 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
234 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
235 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
236 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
237 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
238 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
241 __device__ __constant__ u32 te3[256] =
243 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
244 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
245 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
246 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
247 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
248 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
249 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
250 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
251 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
252 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
253 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
254 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
255 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
256 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
257 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
258 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
259 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
260 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
261 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
262 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
263 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
264 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
265 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
266 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
267 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
268 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
269 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
270 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
271 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
272 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
273 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
274 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
275 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
276 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
277 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
278 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
279 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
280 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
281 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
282 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
283 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
284 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
285 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
286 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
287 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
288 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
289 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
290 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
291 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
292 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
293 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
294 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
295 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
296 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
297 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
298 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
299 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
300 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
301 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
302 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
303 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
304 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
305 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
306 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
309 __device__ __constant__ u32 te4[256] =
311 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
312 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
313 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
314 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
315 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
316 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
317 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
318 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
319 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
320 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
321 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
322 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
323 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
324 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
325 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
326 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
327 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
328 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
329 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
330 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
331 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
332 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
333 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
334 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
335 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
336 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
337 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
338 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
339 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
340 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
341 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
342 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
343 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
344 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
345 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
346 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
347 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
348 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
349 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
350 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
351 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
352 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
353 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
354 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
355 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
356 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
357 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
358 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
359 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
360 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
361 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
362 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
363 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
364 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
365 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
366 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
367 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
368 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
369 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
370 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
371 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
372 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
373 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
374 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
377 __device__ __constant__ u32 td0[256] =
379 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
380 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
381 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
382 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
383 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
384 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
385 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
386 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
387 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
388 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
389 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
390 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
391 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
392 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
393 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
394 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
395 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
396 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
397 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
398 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
399 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
400 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
401 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
402 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
403 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
404 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
405 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
406 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
407 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
408 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
409 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
410 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
411 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
412 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
413 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
414 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
415 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
416 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
417 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
418 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
419 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
420 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
421 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
422 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
423 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
424 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
425 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
426 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
427 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
428 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
429 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
430 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
431 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
432 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
433 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
434 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
435 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
436 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
437 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
438 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
439 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
440 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
441 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
442 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
445 __device__ __constant__ u32 td1[256] =
447 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
448 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
449 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
450 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
451 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
452 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
453 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
454 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
455 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
456 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
457 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
458 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
459 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
460 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
461 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
462 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
463 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
464 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
465 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
466 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
467 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
468 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
469 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
470 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
471 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
472 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
473 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
474 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
475 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
476 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
477 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
478 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
479 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
480 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
481 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
482 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
483 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
484 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
485 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
486 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
487 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
488 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
489 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
490 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
491 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
492 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
493 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
494 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
495 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
496 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
497 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
498 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
499 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
500 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
501 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
502 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
503 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
504 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
505 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
506 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
507 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
508 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
509 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
510 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
513 __device__ __constant__ u32 td2[256] =
515 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
516 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
517 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
518 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
519 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
520 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
521 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
522 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
523 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
524 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
525 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
526 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
527 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
528 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
529 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
530 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
531 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
532 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
533 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
534 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
535 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
536 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
537 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
538 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
539 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
540 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
541 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
542 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
543 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
544 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
545 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
546 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
547 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
548 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
549 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
550 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
551 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
552 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
553 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
554 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
555 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
556 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
557 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
558 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
559 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
560 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
561 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
562 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
563 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
564 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
565 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
566 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
567 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
568 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
569 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
570 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
571 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
572 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
573 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
574 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
575 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
576 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
577 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
578 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
581 __device__ __constant__ u32 td3[256] =
583 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
584 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
585 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
586 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
587 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
588 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
589 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
590 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
591 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
592 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
593 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
594 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
595 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
596 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
597 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
598 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
599 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
600 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
601 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
602 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
603 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
604 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
605 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
606 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
607 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
608 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
609 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
610 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
611 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
612 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
613 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
614 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
615 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
616 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
617 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
618 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
619 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
620 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
621 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
622 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
623 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
624 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
625 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
626 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
627 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
628 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
629 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
630 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
631 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
632 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
633 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
634 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
635 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
636 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
637 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
638 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
639 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
640 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
641 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
642 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
643 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
644 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
645 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
646 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
649 __device__ __constant__ u32 td4[256] =
651 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
652 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
653 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
654 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
655 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
656 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
657 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
658 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
659 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
660 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
661 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
662 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
663 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
664 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
665 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
666 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
667 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
668 0x86868686, 0x68686868, 0x98989898, 0x16161616,
669 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
670 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
671 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
672 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
673 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
674 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
675 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
676 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
677 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
678 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
679 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
680 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
681 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
682 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
683 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
684 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
685 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
686 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
687 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
688 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
689 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
690 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
691 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
692 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
693 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
694 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
695 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
696 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
697 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
698 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
699 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
700 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
701 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
702 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
703 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
704 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
705 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
706 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
707 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
708 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
709 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
710 0x83838383, 0x53535353, 0x99999999, 0x61616161,
711 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
712 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
713 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
714 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
717 __device__ __constant__ u32 rcon[] =
719 0x01000000, 0x02000000, 0x04000000, 0x08000000,
720 0x10000000, 0x20000000, 0x40000000, 0x80000000,
721 0x1b000000, 0x36000000,
724 __device__ static void AES256_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256])
745 u32 temp = rek[j + 7];
747 rek[j + 8] = rek[j + 0]
748 ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000)
749 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
750 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
751 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff)
754 rek[j + 9] = rek[j + 1] ^ rek[j + 8];
755 rek[j + 10] = rek[j + 2] ^ rek[j + 9];
756 rek[j + 11] = rek[j + 3] ^ rek[j + 10];
766 rek[j + 12] = rek[j + 4]
767 ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000)
768 ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000)
769 ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00)
770 ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff);
772 rek[j + 13] = rek[j + 5] ^ rek[j + 12];
773 rek[j + 14] = rek[j + 6] ^ rek[j + 13];
774 rek[j + 15] = rek[j + 7] ^ rek[j + 14];
780 __device__ static void AES256_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256])
782 for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
786 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
787 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
788 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
789 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
792 for (u32 i = 1, j = 4; i < 14; i += 1, j += 4)
795 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
796 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
797 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
798 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
801 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
802 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
803 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
804 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
807 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
808 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
809 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
810 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
813 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
814 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
815 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
816 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
820 __device__ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256])
822 u32 s0 = in[0] ^ rdk[0];
823 u32 s1 = in[1] ^ rdk[1];
824 u32 s2 = in[2] ^ rdk[2];
825 u32 s3 = in[3] ^ rdk[3];
832 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
833 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
834 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
835 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
836 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
837 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
838 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
839 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
840 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
841 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
842 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
843 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
844 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
845 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
846 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
847 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
848 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
849 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
850 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
851 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
852 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
853 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
854 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
855 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
856 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
857 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
858 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
859 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
860 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
861 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
862 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
863 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
864 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
865 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
866 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
867 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
868 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40];
869 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41];
870 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42];
871 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43];
872 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44];
873 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45];
874 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46];
875 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47];
876 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48];
877 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49];
878 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50];
879 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51];
880 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52];
881 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53];
882 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54];
883 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55];
885 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
886 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
887 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
888 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
891 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
892 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
893 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
894 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
897 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
898 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
899 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
900 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
903 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
904 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
905 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
906 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
910 __device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5])
938 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
939 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
940 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
941 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
942 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
943 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
944 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
945 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
946 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
947 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
948 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
949 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
950 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
951 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
952 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
953 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
954 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
955 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
956 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
957 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
962 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
963 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
964 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
965 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
966 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
967 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
968 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
969 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
970 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
971 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
972 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
973 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
974 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
975 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
976 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
977 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
978 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
979 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
980 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
981 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
986 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
987 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
988 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
989 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
990 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
991 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
992 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
993 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
994 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
995 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
996 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
997 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
998 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
999 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
1000 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
1001 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
1002 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
1003 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
1004 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
1005 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
1010 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
1011 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
1012 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
1013 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
1014 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
1015 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
1016 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
1017 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
1018 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
1019 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
1020 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
1021 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
1022 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
1023 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
1024 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
1025 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
1026 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
1027 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
1028 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
1029 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
1038 __device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5])
1040 w0[0] = w0[0] ^ 0x36363636;
1041 w0[1] = w0[1] ^ 0x36363636;
1042 w0[2] = w0[2] ^ 0x36363636;
1043 w0[3] = w0[3] ^ 0x36363636;
1044 w1[0] = w1[0] ^ 0x36363636;
1045 w1[1] = w1[1] ^ 0x36363636;
1046 w1[2] = w1[2] ^ 0x36363636;
1047 w1[3] = w1[3] ^ 0x36363636;
1048 w2[0] = w2[0] ^ 0x36363636;
1049 w2[1] = w2[1] ^ 0x36363636;
1050 w2[2] = w2[2] ^ 0x36363636;
1051 w2[3] = w2[3] ^ 0x36363636;
1052 w3[0] = w3[0] ^ 0x36363636;
1053 w3[1] = w3[1] ^ 0x36363636;
1054 w3[2] = w3[2] ^ 0x36363636;
1055 w3[3] = w3[3] ^ 0x36363636;
1063 sha1_transform (w0, w1, w2, w3, ipad);
1065 w0[0] = w0[0] ^ 0x6a6a6a6a;
1066 w0[1] = w0[1] ^ 0x6a6a6a6a;
1067 w0[2] = w0[2] ^ 0x6a6a6a6a;
1068 w0[3] = w0[3] ^ 0x6a6a6a6a;
1069 w1[0] = w1[0] ^ 0x6a6a6a6a;
1070 w1[1] = w1[1] ^ 0x6a6a6a6a;
1071 w1[2] = w1[2] ^ 0x6a6a6a6a;
1072 w1[3] = w1[3] ^ 0x6a6a6a6a;
1073 w2[0] = w2[0] ^ 0x6a6a6a6a;
1074 w2[1] = w2[1] ^ 0x6a6a6a6a;
1075 w2[2] = w2[2] ^ 0x6a6a6a6a;
1076 w2[3] = w2[3] ^ 0x6a6a6a6a;
1077 w3[0] = w3[0] ^ 0x6a6a6a6a;
1078 w3[1] = w3[1] ^ 0x6a6a6a6a;
1079 w3[2] = w3[2] ^ 0x6a6a6a6a;
1080 w3[3] = w3[3] ^ 0x6a6a6a6a;
1088 sha1_transform (w0, w1, w2, w3, opad);
1091 __device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5])
1093 digest[0] = ipad[0];
1094 digest[1] = ipad[1];
1095 digest[2] = ipad[2];
1096 digest[3] = ipad[3];
1097 digest[4] = ipad[4];
1099 sha1_transform (w0, w1, w2, w3, digest);
1116 w3[3] = (64 + 20) * 8;
1118 digest[0] = opad[0];
1119 digest[1] = opad[1];
1120 digest[2] = opad[2];
1121 digest[3] = opad[3];
1122 digest[4] = opad[4];
1124 sha1_transform (w0, w1, w2, w3, digest);
1127 extern "C" __global__ void __launch_bounds__ (256, 1) m12700_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, mywallet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1133 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1135 if (gid >= gid_max) return;
1139 w0[0] = pws[gid].i[ 0];
1140 w0[1] = pws[gid].i[ 1];
1141 w0[2] = pws[gid].i[ 2];
1142 w0[3] = pws[gid].i[ 3];
1146 w1[0] = pws[gid].i[ 4];
1147 w1[1] = pws[gid].i[ 5];
1148 w1[2] = pws[gid].i[ 6];
1149 w1[3] = pws[gid].i[ 7];
1153 w2[0] = pws[gid].i[ 8];
1154 w2[1] = pws[gid].i[ 9];
1155 w2[2] = pws[gid].i[10];
1156 w2[3] = pws[gid].i[11];
1160 w3[0] = pws[gid].i[12];
1161 w3[1] = pws[gid].i[13];
1162 w3[2] = pws[gid].i[14];
1163 w3[3] = pws[gid].i[15];
1173 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1174 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1175 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
1176 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
1182 w0[0] = swap_workaround (w0[0]);
1183 w0[1] = swap_workaround (w0[1]);
1184 w0[2] = swap_workaround (w0[2]);
1185 w0[3] = swap_workaround (w0[3]);
1186 w1[0] = swap_workaround (w1[0]);
1187 w1[1] = swap_workaround (w1[1]);
1188 w1[2] = swap_workaround (w1[2]);
1189 w1[3] = swap_workaround (w1[3]);
1190 w2[0] = swap_workaround (w2[0]);
1191 w2[1] = swap_workaround (w2[1]);
1192 w2[2] = swap_workaround (w2[2]);
1193 w2[3] = swap_workaround (w2[3]);
1194 w3[0] = swap_workaround (w3[0]);
1195 w3[1] = swap_workaround (w3[1]);
1196 w3[2] = swap_workaround (w3[2]);
1197 w3[3] = swap_workaround (w3[3]);
1202 hmac_sha1_pad (w0, w1, w2, w3, ipad, opad);
1204 tmps[gid].ipad[0] = ipad[0];
1205 tmps[gid].ipad[1] = ipad[1];
1206 tmps[gid].ipad[2] = ipad[2];
1207 tmps[gid].ipad[3] = ipad[3];
1208 tmps[gid].ipad[4] = ipad[4];
1210 tmps[gid].opad[0] = opad[0];
1211 tmps[gid].opad[1] = opad[1];
1212 tmps[gid].opad[2] = opad[2];
1213 tmps[gid].opad[3] = opad[3];
1214 tmps[gid].opad[4] = opad[4];
1219 w0[0] = salt_buf[0];
1220 w0[1] = salt_buf[1];
1221 w0[2] = salt_buf[2];
1222 w0[3] = salt_buf[3];
1234 w3[3] = (64 + salt_len + 4) * 8;
1238 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst1);
1240 tmps[gid].dgst1[0] = dgst1[0];
1241 tmps[gid].dgst1[1] = dgst1[1];
1242 tmps[gid].dgst1[2] = dgst1[2];
1243 tmps[gid].dgst1[3] = dgst1[3];
1244 tmps[gid].dgst1[4] = dgst1[4];
1246 tmps[gid].out1[0] = dgst1[0];
1247 tmps[gid].out1[1] = dgst1[1];
1248 tmps[gid].out1[2] = dgst1[2];
1249 tmps[gid].out1[3] = dgst1[3];
1250 tmps[gid].out1[4] = dgst1[4];
1256 w0[0] = salt_buf[0];
1257 w0[1] = salt_buf[1];
1258 w0[2] = salt_buf[2];
1259 w0[3] = salt_buf[3];
1271 w3[3] = (64 + salt_len + 4) * 8;
1275 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst2);
1277 tmps[gid].dgst2[0] = dgst2[0];
1278 tmps[gid].dgst2[1] = dgst2[1];
1279 tmps[gid].dgst2[2] = dgst2[2];
1280 tmps[gid].dgst2[3] = dgst2[3];
1281 tmps[gid].dgst2[4] = dgst2[4];
1283 tmps[gid].out2[0] = dgst2[0];
1284 tmps[gid].out2[1] = dgst2[1];
1285 tmps[gid].out2[2] = dgst2[2];
1286 tmps[gid].out2[3] = dgst2[3];
1287 tmps[gid].out2[4] = dgst2[4];
1291 extern "C" __global__ void __launch_bounds__ (256, 1) m12700_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, mywallet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1293 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1295 if (gid >= gid_max) return;
1300 ipad[0] = tmps[gid].ipad[0];
1301 ipad[1] = tmps[gid].ipad[1];
1302 ipad[2] = tmps[gid].ipad[2];
1303 ipad[3] = tmps[gid].ipad[3];
1304 ipad[4] = tmps[gid].ipad[4];
1306 opad[0] = tmps[gid].opad[0];
1307 opad[1] = tmps[gid].opad[1];
1308 opad[2] = tmps[gid].opad[2];
1309 opad[3] = tmps[gid].opad[3];
1310 opad[4] = tmps[gid].opad[4];
1318 dgst1[0] = tmps[gid].dgst1[0];
1319 dgst1[1] = tmps[gid].dgst1[1];
1320 dgst1[2] = tmps[gid].dgst1[2];
1321 dgst1[3] = tmps[gid].dgst1[3];
1322 dgst1[4] = tmps[gid].dgst1[4];
1324 out1[0] = tmps[gid].out1[0];
1325 out1[1] = tmps[gid].out1[1];
1326 out1[2] = tmps[gid].out1[2];
1327 out1[3] = tmps[gid].out1[3];
1328 out1[4] = tmps[gid].out1[4];
1330 for (u32 j = 0; j < loop_cnt; j++)
1352 w3[3] = (64 + 20) * 8;
1354 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst1);
1356 out1[0] ^= dgst1[0];
1357 out1[1] ^= dgst1[1];
1358 out1[2] ^= dgst1[2];
1359 out1[3] ^= dgst1[3];
1360 out1[4] ^= dgst1[4];
1363 tmps[gid].dgst1[0] = dgst1[0];
1364 tmps[gid].dgst1[1] = dgst1[1];
1365 tmps[gid].dgst1[2] = dgst1[2];
1366 tmps[gid].dgst1[3] = dgst1[3];
1367 tmps[gid].dgst1[4] = dgst1[4];
1369 tmps[gid].out1[0] = out1[0];
1370 tmps[gid].out1[1] = out1[1];
1371 tmps[gid].out1[2] = out1[2];
1372 tmps[gid].out1[3] = out1[3];
1373 tmps[gid].out1[4] = out1[4];
1382 dgst2[0] = tmps[gid].dgst2[0];
1383 dgst2[1] = tmps[gid].dgst2[1];
1384 dgst2[2] = tmps[gid].dgst2[2];
1385 dgst2[3] = tmps[gid].dgst2[3];
1386 dgst2[4] = tmps[gid].dgst2[4];
1388 out2[0] = tmps[gid].out2[0];
1389 out2[1] = tmps[gid].out2[1];
1390 out2[2] = tmps[gid].out2[2];
1391 out2[3] = tmps[gid].out2[3];
1392 out2[4] = tmps[gid].out2[4];
1394 for (u32 j = 0; j < loop_cnt; j++)
1416 w3[3] = (64 + 20) * 8;
1418 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst2);
1420 out2[0] ^= dgst2[0];
1421 out2[1] ^= dgst2[1];
1422 out2[2] ^= dgst2[2];
1423 out2[3] ^= dgst2[3];
1424 out2[4] ^= dgst2[4];
1427 tmps[gid].dgst2[0] = dgst2[0];
1428 tmps[gid].dgst2[1] = dgst2[1];
1429 tmps[gid].dgst2[2] = dgst2[2];
1430 tmps[gid].dgst2[3] = dgst2[3];
1431 tmps[gid].dgst2[4] = dgst2[4];
1433 tmps[gid].out2[0] = out2[0];
1434 tmps[gid].out2[1] = out2[1];
1435 tmps[gid].out2[2] = out2[2];
1436 tmps[gid].out2[3] = out2[3];
1437 tmps[gid].out2[4] = out2[4];
1441 extern "C" __global__ void __launch_bounds__ (256, 1) m12700_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, mywallet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1447 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1448 const u32 lid = threadIdx.x;
1454 __shared__ u32 s_td0[256];
1455 __shared__ u32 s_td1[256];
1456 __shared__ u32 s_td2[256];
1457 __shared__ u32 s_td3[256];
1458 __shared__ u32 s_td4[256];
1460 __shared__ u32 s_te0[256];
1461 __shared__ u32 s_te1[256];
1462 __shared__ u32 s_te2[256];
1463 __shared__ u32 s_te3[256];
1464 __shared__ u32 s_te4[256];
1466 s_td0[lid] = td0[lid];
1467 s_td1[lid] = td1[lid];
1468 s_td2[lid] = td2[lid];
1469 s_td3[lid] = td3[lid];
1470 s_td4[lid] = td4[lid];
1472 s_te0[lid] = te0[lid];
1473 s_te1[lid] = te1[lid];
1474 s_te2[lid] = te2[lid];
1475 s_te3[lid] = te3[lid];
1476 s_te4[lid] = te4[lid];
1480 if (gid >= gid_max) return;
1488 salt_bufs[salt_pos].salt_buf[0],
1489 salt_bufs[salt_pos].salt_buf[1],
1490 salt_bufs[salt_pos].salt_buf[2],
1491 salt_bufs[salt_pos].salt_buf[3]
1496 salt_bufs[salt_pos].salt_buf[4],
1497 salt_bufs[salt_pos].salt_buf[5],
1498 salt_bufs[salt_pos].salt_buf[6],
1499 salt_bufs[salt_pos].salt_buf[7]
1504 ukey[0] = tmps[gid].out1[0];
1505 ukey[1] = tmps[gid].out1[1];
1506 ukey[2] = tmps[gid].out1[2];
1507 ukey[3] = tmps[gid].out1[3];
1508 ukey[4] = tmps[gid].out1[4];
1509 ukey[5] = tmps[gid].out2[0];
1510 ukey[6] = tmps[gid].out2[1];
1511 ukey[7] = tmps[gid].out2[2];
1517 AES256_ExpandKey (ukey, rk, s_te0, s_te1, s_te2, s_te3, s_te4);
1519 AES256_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1523 AES256_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4);
1530 out[0] = swap_workaround (out[0]);
1531 out[1] = swap_workaround (out[1]);
1532 out[2] = swap_workaround (out[2]);
1533 out[3] = swap_workaround (out[3]);
1535 if ((out[0] & 0xff) != '{') return;
1537 char *pt = (char *) out;
1539 for (int i = 1; i < 16 - 6; i++)
1541 if (pt[i + 0] != '"') continue;
1542 if (pt[i + 1] != 'g') continue;
1543 if (pt[i + 2] != 'u') continue;
1544 if (pt[i + 3] != 'i') continue;
1545 if (pt[i + 4] != 'd') continue;
1546 if (pt[i + 5] != '"') continue;
1548 const u32x r0 = data[0];
1549 const u32x r1 = data[1];
1550 const u32x r2 = data[2];
1551 const u32x r3 = data[3];
1555 #include VECT_COMPARE_M