2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "OpenCL/types_ocl.c"
18 #include "OpenCL/common.c"
20 #define COMPARE_S "OpenCL/check_single_comp4.c"
21 #define COMPARE_M "OpenCL/check_multi_comp4.c"
23 // http://www.schneier.com/code/constants.txt
25 __constant u32 c_sbox0[256] =
27 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
28 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
29 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
30 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
31 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
32 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
33 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
34 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
35 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
36 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
37 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
38 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
39 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
40 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
41 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
42 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
43 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
44 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
45 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
46 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
47 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
48 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
49 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
50 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
51 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
52 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
53 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
54 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
55 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
56 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
57 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
58 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
59 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
60 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
61 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
62 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
63 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
64 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
65 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
66 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
67 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
68 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
69 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
70 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
71 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
72 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
73 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
74 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
75 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
76 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
77 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
78 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
79 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
80 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
81 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
82 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
83 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
84 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
85 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
86 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
87 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
88 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
89 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
90 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a
93 __constant u32 c_sbox1[256] =
95 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
96 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
97 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
98 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
99 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
100 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
101 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
102 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
103 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
104 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
105 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
106 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
107 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
108 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
109 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
110 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
111 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
112 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
113 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
114 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
115 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
116 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
117 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
118 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
119 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
120 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
121 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
122 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
123 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
124 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
125 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
126 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
127 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
128 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
129 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
130 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
131 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
132 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
133 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
134 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
135 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
136 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
137 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
138 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
139 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
140 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
141 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
142 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
143 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
144 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
145 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
146 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
147 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
148 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
149 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
150 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
151 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
152 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
153 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
154 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
155 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
156 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
157 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
158 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7
161 __constant u32 c_sbox2[256] =
163 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
164 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
165 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
166 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
167 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
168 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
169 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
170 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
171 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
172 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
173 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
174 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
175 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
176 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
177 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
178 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
179 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
180 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
181 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
182 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
183 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
184 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
185 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
186 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
187 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
188 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
189 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
190 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
191 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
192 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
193 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
194 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
195 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
196 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
197 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
198 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
199 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
200 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
201 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
202 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
203 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
204 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
205 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
206 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
207 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
208 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
209 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
210 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
211 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
212 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
213 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
214 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
215 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
216 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
217 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
218 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
219 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
220 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
221 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
222 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
223 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
224 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
225 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
226 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0
229 __constant u32 c_sbox3[256] =
231 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
232 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
233 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
234 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
235 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
236 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
237 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
238 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
239 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
240 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
241 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
242 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
243 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
244 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
245 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
246 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
247 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
248 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
249 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
250 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
251 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
252 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
253 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
254 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
255 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
256 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
257 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
258 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
259 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
260 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
261 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
262 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
263 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
264 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
265 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
266 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
267 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
268 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
269 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
270 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
271 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
272 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
273 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
274 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
275 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
276 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
277 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
278 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
279 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
280 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
281 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
282 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
283 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
284 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
285 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
286 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
287 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
288 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
289 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
290 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
291 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
292 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
293 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
294 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
298 #define BF_ROUND(L,R,N) \
300 uchar4 c = as_uchar4 ((L)); \
309 (R) ^= tmp ^ P[(N)]; \
314 #define BF_ROUND(L,R,N) \
318 tmp = S0[__bfe ((L), 24, 8)]; \
319 tmp += S1[__bfe ((L), 16, 8)]; \
320 tmp ^= S2[__bfe ((L), 8, 8)]; \
321 tmp += S3[__bfe ((L), 0, 8)]; \
323 (R) ^= tmp ^ P[(N)]; \
328 #define BF_ROUND(L,R,N) \
330 uchar4 c = as_uchar4 ((L)); \
339 (R) ^= tmp ^ P[(N)]; \
343 #define BF_ENCRYPT(L,R) \
347 BF_ROUND (L, R, 1); \
348 BF_ROUND (R, L, 2); \
349 BF_ROUND (L, R, 3); \
350 BF_ROUND (R, L, 4); \
351 BF_ROUND (L, R, 5); \
352 BF_ROUND (R, L, 6); \
353 BF_ROUND (L, R, 7); \
354 BF_ROUND (R, L, 8); \
355 BF_ROUND (L, R, 9); \
356 BF_ROUND (R, L, 10); \
357 BF_ROUND (L, R, 11); \
358 BF_ROUND (R, L, 12); \
359 BF_ROUND (L, R, 13); \
360 BF_ROUND (R, L, 14); \
361 BF_ROUND (L, R, 15); \
362 BF_ROUND (R, L, 16); \
373 static void expand_key (u32 E[18], const u32 W[16], const u32 len)
375 u8 *E_cur = (u8 *) E;
376 u8 *E_stop = E_cur + 72;
378 while (E_cur < E_stop)
380 u8 *W_cur = (u8 *) W;
381 u8 *W_stop = W_cur + len;
383 while (W_cur < W_stop)
392 __kernel void m03200_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global bcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
398 const u32 gid = get_global_id (0);
399 const u32 lid = get_local_id (0);
401 if (gid >= gid_max) return;
403 const u32 pw_len = pws[gid].pw_len;
407 w[ 0] = pws[gid].i[ 0];
408 w[ 1] = pws[gid].i[ 1];
409 w[ 2] = pws[gid].i[ 2];
410 w[ 3] = pws[gid].i[ 3];
411 w[ 4] = pws[gid].i[ 4];
412 w[ 5] = pws[gid].i[ 5];
413 w[ 6] = pws[gid].i[ 6];
414 w[ 7] = pws[gid].i[ 7];
415 w[ 8] = pws[gid].i[ 8];
416 w[ 9] = pws[gid].i[ 9];
417 w[10] = pws[gid].i[10];
418 w[11] = pws[gid].i[11];
419 w[12] = pws[gid].i[12];
420 w[13] = pws[gid].i[13];
421 w[14] = pws[gid].i[14];
422 w[15] = pws[gid].i[15];
426 expand_key (E, w, pw_len);
428 E[ 0] = swap32 (E[ 0]);
429 E[ 1] = swap32 (E[ 1]);
430 E[ 2] = swap32 (E[ 2]);
431 E[ 3] = swap32 (E[ 3]);
432 E[ 4] = swap32 (E[ 4]);
433 E[ 5] = swap32 (E[ 5]);
434 E[ 6] = swap32 (E[ 6]);
435 E[ 7] = swap32 (E[ 7]);
436 E[ 8] = swap32 (E[ 8]);
437 E[ 9] = swap32 (E[ 9]);
438 E[10] = swap32 (E[10]);
439 E[11] = swap32 (E[11]);
440 E[12] = swap32 (E[12]);
441 E[13] = swap32 (E[13]);
442 E[14] = swap32 (E[14]);
443 E[15] = swap32 (E[15]);
444 E[16] = swap32 (E[16]);
445 E[17] = swap32 (E[17]);
447 for (u32 i = 0; i < 18; i++)
449 tmps[gid].E[i] = E[i];
458 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
459 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
460 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
461 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
467 __local u32 S0_all[8][256];
468 __local u32 S1_all[8][256];
469 __local u32 S2_all[8][256];
470 __local u32 S3_all[8][256];
472 __local u32 *S0 = S0_all[lid];
473 __local u32 *S1 = S1_all[lid];
474 __local u32 *S2 = S2_all[lid];
475 __local u32 *S3 = S3_all[lid];
481 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
482 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
483 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
484 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
485 0x9216d5d9, 0x8979fb1b
488 for (u32 i = 0; i < 256; i++)
498 for (u32 i = 0; i < 18; i++)
506 for (u32 i = 0; i < 18; i += 2)
508 L0 ^= salt_buf[(i & 2) + 0];
509 R0 ^= salt_buf[(i & 2) + 1];
517 for (u32 i = 0; i < 256; i += 4)
536 for (u32 i = 0; i < 256; i += 4)
555 for (u32 i = 0; i < 256; i += 4)
574 for (u32 i = 0; i < 256; i += 4)
595 for (u32 i = 0; i < 18; i++)
597 tmps[gid].P[i] = P[i];
600 for (u32 i = 0; i < 256; i++)
602 tmps[gid].S0[i] = S0[i];
603 tmps[gid].S1[i] = S1[i];
604 tmps[gid].S2[i] = S2[i];
605 tmps[gid].S3[i] = S3[i];
609 __kernel void m03200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global bcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
615 const u32 gid = get_global_id (0);
616 const u32 lid = get_local_id (0);
618 if (gid >= gid_max) return;
624 for (u32 i = 0; i < 18; i++)
626 E[i] = tmps[gid].E[i];
631 for (u32 i = 0; i < 18; i++)
633 P[i] = tmps[gid].P[i];
636 __local u32 S0_all[8][256];
637 __local u32 S1_all[8][256];
638 __local u32 S2_all[8][256];
639 __local u32 S3_all[8][256];
641 __local u32 *S0 = S0_all[lid];
642 __local u32 *S1 = S1_all[lid];
643 __local u32 *S2 = S2_all[lid];
644 __local u32 *S3 = S3_all[lid];
646 for (u32 i = 0; i < 256; i++)
648 S0[i] = tmps[gid].S0[i];
649 S1[i] = tmps[gid].S1[i];
650 S2[i] = tmps[gid].S2[i];
651 S3[i] = tmps[gid].S3[i];
660 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
661 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
662 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
663 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
672 for (u32 i = 0; i < loop_cnt; i++)
674 for (u32 i = 0; i < 18; i++)
683 for (u32 i = 0; i < 9; i++)
691 for (u32 i = 0; i < 256; i += 2)
699 for (u32 i = 0; i < 256; i += 2)
707 for (u32 i = 0; i < 256; i += 2)
715 for (u32 i = 0; i < 256; i += 2)
723 P[ 0] ^= salt_buf[0];
724 P[ 1] ^= salt_buf[1];
725 P[ 2] ^= salt_buf[2];
726 P[ 3] ^= salt_buf[3];
727 P[ 4] ^= salt_buf[0];
728 P[ 5] ^= salt_buf[1];
729 P[ 6] ^= salt_buf[2];
730 P[ 7] ^= salt_buf[3];
731 P[ 8] ^= salt_buf[0];
732 P[ 9] ^= salt_buf[1];
733 P[10] ^= salt_buf[2];
734 P[11] ^= salt_buf[3];
735 P[12] ^= salt_buf[0];
736 P[13] ^= salt_buf[1];
737 P[14] ^= salt_buf[2];
738 P[15] ^= salt_buf[3];
739 P[16] ^= salt_buf[0];
740 P[17] ^= salt_buf[1];
746 for (u32 i = 0; i < 9; i++)
754 for (u32 i = 0; i < 256; i += 2)
762 for (u32 i = 0; i < 256; i += 2)
770 for (u32 i = 0; i < 256; i += 2)
778 for (u32 i = 0; i < 256; i += 2)
789 for (u32 i = 0; i < 18; i++)
791 tmps[gid].P[i] = P[i];
794 for (u32 i = 0; i < 256; i++)
796 tmps[gid].S0[i] = S0[i];
797 tmps[gid].S1[i] = S1[i];
798 tmps[gid].S2[i] = S2[i];
799 tmps[gid].S3[i] = S3[i];
803 __kernel void m03200_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global bcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
809 const u32 gid = get_global_id (0);
810 const u32 lid = get_local_id (0);
812 if (gid >= gid_max) return;
818 for (u32 i = 0; i < 18; i++)
820 P[i] = tmps[gid].P[i];
823 __local u32 S0_all[8][256];
824 __local u32 S1_all[8][256];
825 __local u32 S2_all[8][256];
826 __local u32 S3_all[8][256];
828 __local u32 *S0 = S0_all[lid];
829 __local u32 *S1 = S1_all[lid];
830 __local u32 *S2 = S2_all[lid];
831 __local u32 *S3 = S3_all[lid];
833 for (u32 i = 0; i < 256; i++)
835 S0[i] = tmps[gid].S0[i];
836 S1[i] = tmps[gid].S1[i];
837 S2[i] = tmps[gid].S2[i];
838 S3[i] = tmps[gid].S3[i];
851 for (u32 i = 0; i < 64; i++)
862 for (u32 i = 0; i < 64; i++)
874 f &= ~0xff; // its just 23 not 24 !