-#define BOX(S,i) (S)[(i)]
-
-#define uint_to_hex_upper8(i) l_bin2asc[(i)]
-
-void lotus_mix (u32 *in, __local u32 *s_lotus_magic_table)
+#if VECT_SIZE == 1
+#define BOX1(S,i) (S)[(i)]
+#elif VECT_SIZE == 2
+#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#elif VECT_SIZE == 4
+#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#elif VECT_SIZE == 8
+#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#elif VECT_SIZE == 16
+#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#endif
+
+void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)