2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 // http://www.schneier.com/code/constants.txt
25 __constant u32 c_sbox0[256] =
27 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
28 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
29 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
30 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
31 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
32 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
33 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
34 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
35 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
36 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
37 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
38 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
39 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
40 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
41 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
42 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
43 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
44 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
45 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
46 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
47 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
48 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
49 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
50 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
51 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
52 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
53 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
54 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
55 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
56 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
57 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
58 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
59 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
60 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
61 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
62 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
63 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
64 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
65 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
66 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
67 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
68 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
69 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
70 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
71 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
72 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
73 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
74 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
75 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
76 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
77 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
78 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
79 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
80 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
81 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
82 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
83 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
84 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
85 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
86 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
87 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
88 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
89 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
90 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a
93 __constant u32 c_sbox1[256] =
95 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
96 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
97 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
98 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
99 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
100 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
101 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
102 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
103 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
104 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
105 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
106 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
107 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
108 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
109 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
110 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
111 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
112 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
113 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
114 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
115 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
116 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
117 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
118 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
119 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
120 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
121 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
122 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
123 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
124 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
125 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
126 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
127 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
128 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
129 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
130 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
131 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
132 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
133 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
134 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
135 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
136 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
137 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
138 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
139 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
140 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
141 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
142 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
143 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
144 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
145 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
146 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
147 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
148 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
149 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
150 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
151 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
152 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
153 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
154 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
155 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
156 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
157 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
158 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7
161 __constant u32 c_sbox2[256] =
163 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
164 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
165 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
166 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
167 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
168 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
169 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
170 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
171 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
172 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
173 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
174 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
175 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
176 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
177 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
178 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
179 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
180 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
181 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
182 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
183 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
184 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
185 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
186 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
187 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
188 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
189 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
190 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
191 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
192 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
193 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
194 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
195 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
196 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
197 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
198 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
199 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
200 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
201 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
202 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
203 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
204 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
205 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
206 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
207 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
208 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
209 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
210 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
211 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
212 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
213 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
214 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
215 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
216 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
217 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
218 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
219 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
220 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
221 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
222 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
223 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
224 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
225 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
226 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0
229 __constant u32 c_sbox3[256] =
231 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
232 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
233 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
234 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
235 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
236 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
237 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
238 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
239 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
240 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
241 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
242 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
243 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
244 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
245 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
246 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
247 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
248 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
249 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
250 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
251 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
252 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
253 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
254 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
255 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
256 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
257 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
258 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
259 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
260 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
261 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
262 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
263 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
264 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
265 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
266 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
267 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
268 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
269 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
270 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
271 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
272 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
273 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
274 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
275 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
276 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
277 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
278 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
279 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
280 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
281 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
282 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
283 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
284 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
285 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
286 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
287 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
288 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
289 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
290 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
291 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
292 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
293 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
294 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
298 #define BF_ROUND(L,R,N) \
300 uchar4 c = as_uchar4 ((L)); \
309 (R) ^= tmp ^ P[(N)]; \
314 #define BF_ROUND(L,R,N) \
318 tmp = S0[__bfe ((L), 24, 8)]; \
319 tmp += S1[__bfe ((L), 16, 8)]; \
320 tmp ^= S2[__bfe ((L), 8, 8)]; \
321 tmp += S3[__bfe ((L), 0, 8)]; \
323 (R) ^= tmp ^ P[(N)]; \
327 #define BF_ENCRYPT(L,R) \
331 BF_ROUND (L, R, 1); \
332 BF_ROUND (R, L, 2); \
333 BF_ROUND (L, R, 3); \
334 BF_ROUND (R, L, 4); \
335 BF_ROUND (L, R, 5); \
336 BF_ROUND (R, L, 6); \
337 BF_ROUND (L, R, 7); \
338 BF_ROUND (R, L, 8); \
339 BF_ROUND (L, R, 9); \
340 BF_ROUND (R, L, 10); \
341 BF_ROUND (L, R, 11); \
342 BF_ROUND (R, L, 12); \
343 BF_ROUND (L, R, 13); \
344 BF_ROUND (R, L, 14); \
345 BF_ROUND (L, R, 15); \
346 BF_ROUND (R, L, 16); \
357 static void expand_key (u32 E[34], const u32 W[16], const u32 len)
359 u8 *E_cur = (u8 *) E;
360 u8 *E_stop = E_cur + 72;
362 while (E_cur < E_stop)
364 u8 *W_cur = (u8 *) W;
365 u8 *W_stop = W_cur + len;
367 while (W_cur < W_stop)
376 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global bcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
382 const u32 gid = get_global_id (0);
384 if (gid >= gid_max) return;
386 const u32 lid = get_local_id (0);
388 const u32 pw_len = pws[gid].pw_len;
392 w[ 0] = pws[gid].i[ 0];
393 w[ 1] = pws[gid].i[ 1];
394 w[ 2] = pws[gid].i[ 2];
395 w[ 3] = pws[gid].i[ 3];
396 w[ 4] = pws[gid].i[ 4];
397 w[ 5] = pws[gid].i[ 5];
398 w[ 6] = pws[gid].i[ 6];
399 w[ 7] = pws[gid].i[ 7];
400 w[ 8] = pws[gid].i[ 8];
401 w[ 9] = pws[gid].i[ 9];
402 w[10] = pws[gid].i[10];
403 w[11] = pws[gid].i[11];
404 w[12] = pws[gid].i[12];
405 w[13] = pws[gid].i[13];
406 w[14] = pws[gid].i[14];
407 w[15] = pws[gid].i[15];
411 expand_key (E, w, pw_len);
413 E[ 0] = swap32 (E[ 0]);
414 E[ 1] = swap32 (E[ 1]);
415 E[ 2] = swap32 (E[ 2]);
416 E[ 3] = swap32 (E[ 3]);
417 E[ 4] = swap32 (E[ 4]);
418 E[ 5] = swap32 (E[ 5]);
419 E[ 6] = swap32 (E[ 6]);
420 E[ 7] = swap32 (E[ 7]);
421 E[ 8] = swap32 (E[ 8]);
422 E[ 9] = swap32 (E[ 9]);
423 E[10] = swap32 (E[10]);
424 E[11] = swap32 (E[11]);
425 E[12] = swap32 (E[12]);
426 E[13] = swap32 (E[13]);
427 E[14] = swap32 (E[14]);
428 E[15] = swap32 (E[15]);
429 E[16] = swap32 (E[16]);
430 E[17] = swap32 (E[17]);
438 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
439 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
440 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
441 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
447 __local u32 S0_all[8][256];
448 __local u32 S1_all[8][256];
449 __local u32 S2_all[8][256];
450 __local u32 S3_all[8][256];
452 __local u32 *S0 = S0_all[lid];
453 __local u32 *S1 = S1_all[lid];
454 __local u32 *S2 = S2_all[lid];
455 __local u32 *S3 = S3_all[lid];
461 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
462 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
463 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
464 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
465 0x9216d5d9, 0x8979fb1b
468 for (u32 i = 0; i < 256; i++)
478 for (u32 i = 0; i < 18; i++)
486 for (u32 i = 0; i < 18; i += 2)
488 L0 ^= salt_buf[(i & 2) + 0];
489 R0 ^= salt_buf[(i & 2) + 1];
497 for (u32 i = 0; i < 256; i += 4)
516 for (u32 i = 0; i < 256; i += 4)
535 for (u32 i = 0; i < 256; i += 4)
554 for (u32 i = 0; i < 256; i += 4)
575 for (u32 i = 0; i < 18; i++)
577 tmps[gid].P[i] = P[i];
580 for (u32 i = 0; i < 256; i++)
582 tmps[gid].S0[i] = S0[i];
583 tmps[gid].S1[i] = S1[i];
584 tmps[gid].S2[i] = S2[i];
585 tmps[gid].S3[i] = S3[i];
589 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global bcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
595 const u32 gid = get_global_id (0);
597 if (gid >= gid_max) return;
599 const u32 lid = get_local_id (0);
601 const u32 pw_len = pws[gid].pw_len;
605 w[ 0] = pws[gid].i[ 0];
606 w[ 1] = pws[gid].i[ 1];
607 w[ 2] = pws[gid].i[ 2];
608 w[ 3] = pws[gid].i[ 3];
609 w[ 4] = pws[gid].i[ 4];
610 w[ 5] = pws[gid].i[ 5];
611 w[ 6] = pws[gid].i[ 6];
612 w[ 7] = pws[gid].i[ 7];
613 w[ 8] = pws[gid].i[ 8];
614 w[ 9] = pws[gid].i[ 9];
615 w[10] = pws[gid].i[10];
616 w[11] = pws[gid].i[11];
617 w[12] = pws[gid].i[12];
618 w[13] = pws[gid].i[13];
619 w[14] = pws[gid].i[14];
620 w[15] = pws[gid].i[15];
624 expand_key (E, w, pw_len);
626 E[ 0] = swap32 (E[ 0]);
627 E[ 1] = swap32 (E[ 1]);
628 E[ 2] = swap32 (E[ 2]);
629 E[ 3] = swap32 (E[ 3]);
630 E[ 4] = swap32 (E[ 4]);
631 E[ 5] = swap32 (E[ 5]);
632 E[ 6] = swap32 (E[ 6]);
633 E[ 7] = swap32 (E[ 7]);
634 E[ 8] = swap32 (E[ 8]);
635 E[ 9] = swap32 (E[ 9]);
636 E[10] = swap32 (E[10]);
637 E[11] = swap32 (E[11]);
638 E[12] = swap32 (E[12]);
639 E[13] = swap32 (E[13]);
640 E[14] = swap32 (E[14]);
641 E[15] = swap32 (E[15]);
642 E[16] = swap32 (E[16]);
643 E[17] = swap32 (E[17]);
649 for (u32 i = 0; i < 18; i++)
651 P[i] = tmps[gid].P[i];
654 __local u32 S0_all[8][256];
655 __local u32 S1_all[8][256];
656 __local u32 S2_all[8][256];
657 __local u32 S3_all[8][256];
659 __local u32 *S0 = S0_all[lid];
660 __local u32 *S1 = S1_all[lid];
661 __local u32 *S2 = S2_all[lid];
662 __local u32 *S3 = S3_all[lid];
664 for (u32 i = 0; i < 256; i++)
666 S0[i] = tmps[gid].S0[i];
667 S1[i] = tmps[gid].S1[i];
668 S2[i] = tmps[gid].S2[i];
669 S3[i] = tmps[gid].S3[i];
678 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
679 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
680 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
681 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
690 for (u32 i = 0; i < loop_cnt; i++)
692 for (u32 i = 0; i < 18; i++)
700 for (u32 i = 0; i < 9; i++)
708 for (u32 i = 0; i < 256; i += 2)
716 for (u32 i = 0; i < 256; i += 2)
724 for (u32 i = 0; i < 256; i += 2)
732 for (u32 i = 0; i < 256; i += 2)
740 P[ 0] ^= salt_buf[0];
741 P[ 1] ^= salt_buf[1];
742 P[ 2] ^= salt_buf[2];
743 P[ 3] ^= salt_buf[3];
744 P[ 4] ^= salt_buf[0];
745 P[ 5] ^= salt_buf[1];
746 P[ 6] ^= salt_buf[2];
747 P[ 7] ^= salt_buf[3];
748 P[ 8] ^= salt_buf[0];
749 P[ 9] ^= salt_buf[1];
750 P[10] ^= salt_buf[2];
751 P[11] ^= salt_buf[3];
752 P[12] ^= salt_buf[0];
753 P[13] ^= salt_buf[1];
754 P[14] ^= salt_buf[2];
755 P[15] ^= salt_buf[3];
756 P[16] ^= salt_buf[0];
757 P[17] ^= salt_buf[1];
762 for (u32 i = 0; i < 9; i++)
770 for (u32 i = 0; i < 256; i += 2)
778 for (u32 i = 0; i < 256; i += 2)
786 for (u32 i = 0; i < 256; i += 2)
794 for (u32 i = 0; i < 256; i += 2)
805 for (u32 i = 0; i < 18; i++)
807 tmps[gid].P[i] = P[i];
810 for (u32 i = 0; i < 256; i++)
812 tmps[gid].S0[i] = S0[i];
813 tmps[gid].S1[i] = S1[i];
814 tmps[gid].S2[i] = S2[i];
815 tmps[gid].S3[i] = S3[i];
819 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global bcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
825 const u32 gid = get_global_id (0);
827 if (gid >= gid_max) return;
829 const u32 lid = get_local_id (0);
835 for (u32 i = 0; i < 18; i++)
837 P[i] = tmps[gid].P[i];
840 __local u32 S0_all[8][256];
841 __local u32 S1_all[8][256];
842 __local u32 S2_all[8][256];
843 __local u32 S3_all[8][256];
845 __local u32 *S0 = S0_all[lid];
846 __local u32 *S1 = S1_all[lid];
847 __local u32 *S2 = S2_all[lid];
848 __local u32 *S3 = S3_all[lid];
850 for (u32 i = 0; i < 256; i++)
852 S0[i] = tmps[gid].S0[i];
853 S1[i] = tmps[gid].S1[i];
854 S2[i] = tmps[gid].S2[i];
855 S3[i] = tmps[gid].S3[i];
868 for (u32 i = 0; i < 64; i++)
879 for (u32 i = 0; i < 64; i++)
891 f &= ~0xff; // its just 23 not 24 !