twofish_glue_3way.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*
  2. * Glue Code for 3-way parallel assembler optimized version of Twofish
  3. *
  4. * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  19. * USA
  20. *
  21. */
  22. #include <asm/crypto/glue_helper.h>
  23. #include <asm/crypto/twofish.h>
  24. #include <crypto/algapi.h>
  25. #include <crypto/b128ops.h>
  26. #include <crypto/internal/skcipher.h>
  27. #include <crypto/twofish.h>
  28. #include <linux/crypto.h>
  29. #include <linux/init.h>
  30. #include <linux/module.h>
  31. #include <linux/types.h>
  32. EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
  33. EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
  34. static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
  35. const u8 *key, unsigned int keylen)
  36. {
  37. return twofish_setkey(&tfm->base, key, keylen);
  38. }
  39. static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
  40. const u8 *src)
  41. {
  42. __twofish_enc_blk_3way(ctx, dst, src, false);
  43. }
  44. static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
  45. const u8 *src)
  46. {
  47. __twofish_enc_blk_3way(ctx, dst, src, true);
  48. }
  49. void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
  50. {
  51. u128 ivs[2];
  52. ivs[0] = src[0];
  53. ivs[1] = src[1];
  54. twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
  55. u128_xor(&dst[1], &dst[1], &ivs[0]);
  56. u128_xor(&dst[2], &dst[2], &ivs[1]);
  57. }
  58. EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
  59. void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
  60. {
  61. be128 ctrblk;
  62. if (dst != src)
  63. *dst = *src;
  64. le128_to_be128(&ctrblk, iv);
  65. le128_inc(iv);
  66. twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
  67. u128_xor(dst, dst, (u128 *)&ctrblk);
  68. }
  69. EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
  70. void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
  71. le128 *iv)
  72. {
  73. be128 ctrblks[3];
  74. if (dst != src) {
  75. dst[0] = src[0];
  76. dst[1] = src[1];
  77. dst[2] = src[2];
  78. }
  79. le128_to_be128(&ctrblks[0], iv);
  80. le128_inc(iv);
  81. le128_to_be128(&ctrblks[1], iv);
  82. le128_inc(iv);
  83. le128_to_be128(&ctrblks[2], iv);
  84. le128_inc(iv);
  85. twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
  86. }
  87. EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
  88. static const struct common_glue_ctx twofish_enc = {
  89. .num_funcs = 2,
  90. .fpu_blocks_limit = -1,
  91. .funcs = { {
  92. .num_blocks = 3,
  93. .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
  94. }, {
  95. .num_blocks = 1,
  96. .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
  97. } }
  98. };
  99. static const struct common_glue_ctx twofish_ctr = {
  100. .num_funcs = 2,
  101. .fpu_blocks_limit = -1,
  102. .funcs = { {
  103. .num_blocks = 3,
  104. .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
  105. }, {
  106. .num_blocks = 1,
  107. .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
  108. } }
  109. };
  110. static const struct common_glue_ctx twofish_dec = {
  111. .num_funcs = 2,
  112. .fpu_blocks_limit = -1,
  113. .funcs = { {
  114. .num_blocks = 3,
  115. .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
  116. }, {
  117. .num_blocks = 1,
  118. .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
  119. } }
  120. };
  121. static const struct common_glue_ctx twofish_dec_cbc = {
  122. .num_funcs = 2,
  123. .fpu_blocks_limit = -1,
  124. .funcs = { {
  125. .num_blocks = 3,
  126. .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
  127. }, {
  128. .num_blocks = 1,
  129. .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
  130. } }
  131. };
  132. static int ecb_encrypt(struct skcipher_request *req)
  133. {
  134. return glue_ecb_req_128bit(&twofish_enc, req);
  135. }
  136. static int ecb_decrypt(struct skcipher_request *req)
  137. {
  138. return glue_ecb_req_128bit(&twofish_dec, req);
  139. }
  140. static int cbc_encrypt(struct skcipher_request *req)
  141. {
  142. return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
  143. req);
  144. }
  145. static int cbc_decrypt(struct skcipher_request *req)
  146. {
  147. return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
  148. }
  149. static int ctr_crypt(struct skcipher_request *req)
  150. {
  151. return glue_ctr_req_128bit(&twofish_ctr, req);
  152. }
  153. static struct skcipher_alg tf_skciphers[] = {
  154. {
  155. .base.cra_name = "ecb(twofish)",
  156. .base.cra_driver_name = "ecb-twofish-3way",
  157. .base.cra_priority = 300,
  158. .base.cra_blocksize = TF_BLOCK_SIZE,
  159. .base.cra_ctxsize = sizeof(struct twofish_ctx),
  160. .base.cra_module = THIS_MODULE,
  161. .min_keysize = TF_MIN_KEY_SIZE,
  162. .max_keysize = TF_MAX_KEY_SIZE,
  163. .setkey = twofish_setkey_skcipher,
  164. .encrypt = ecb_encrypt,
  165. .decrypt = ecb_decrypt,
  166. }, {
  167. .base.cra_name = "cbc(twofish)",
  168. .base.cra_driver_name = "cbc-twofish-3way",
  169. .base.cra_priority = 300,
  170. .base.cra_blocksize = TF_BLOCK_SIZE,
  171. .base.cra_ctxsize = sizeof(struct twofish_ctx),
  172. .base.cra_module = THIS_MODULE,
  173. .min_keysize = TF_MIN_KEY_SIZE,
  174. .max_keysize = TF_MAX_KEY_SIZE,
  175. .ivsize = TF_BLOCK_SIZE,
  176. .setkey = twofish_setkey_skcipher,
  177. .encrypt = cbc_encrypt,
  178. .decrypt = cbc_decrypt,
  179. }, {
  180. .base.cra_name = "ctr(twofish)",
  181. .base.cra_driver_name = "ctr-twofish-3way",
  182. .base.cra_priority = 300,
  183. .base.cra_blocksize = 1,
  184. .base.cra_ctxsize = sizeof(struct twofish_ctx),
  185. .base.cra_module = THIS_MODULE,
  186. .min_keysize = TF_MIN_KEY_SIZE,
  187. .max_keysize = TF_MAX_KEY_SIZE,
  188. .ivsize = TF_BLOCK_SIZE,
  189. .chunksize = TF_BLOCK_SIZE,
  190. .setkey = twofish_setkey_skcipher,
  191. .encrypt = ctr_crypt,
  192. .decrypt = ctr_crypt,
  193. },
  194. };
  195. static bool is_blacklisted_cpu(void)
  196. {
  197. if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
  198. return false;
  199. if (boot_cpu_data.x86 == 0x06 &&
  200. (boot_cpu_data.x86_model == 0x1c ||
  201. boot_cpu_data.x86_model == 0x26 ||
  202. boot_cpu_data.x86_model == 0x36)) {
  203. /*
  204. * On Atom, twofish-3way is slower than original assembler
  205. * implementation. Twofish-3way trades off some performance in
  206. * storing blocks in 64bit registers to allow three blocks to
  207. * be processed parallel. Parallel operation then allows gaining
  208. * more performance than was trade off, on out-of-order CPUs.
  209. * However Atom does not benefit from this parallellism and
  210. * should be blacklisted.
  211. */
  212. return true;
  213. }
  214. if (boot_cpu_data.x86 == 0x0f) {
  215. /*
  216. * On Pentium 4, twofish-3way is slower than original assembler
  217. * implementation because excessive uses of 64bit rotate and
  218. * left-shifts (which are really slow on P4) needed to store and
  219. * handle 128bit block in two 64bit registers.
  220. */
  221. return true;
  222. }
  223. return false;
  224. }
  225. static int force;
  226. module_param(force, int, 0);
  227. MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
  228. static int __init init(void)
  229. {
  230. if (!force && is_blacklisted_cpu()) {
  231. printk(KERN_INFO
  232. "twofish-x86_64-3way: performance on this CPU "
  233. "would be suboptimal: disabling "
  234. "twofish-x86_64-3way.\n");
  235. return -ENODEV;
  236. }
  237. return crypto_register_skciphers(tf_skciphers,
  238. ARRAY_SIZE(tf_skciphers));
  239. }
  240. static void __exit fini(void)
  241. {
  242. crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
  243. }
  244. module_init(init);
  245. module_exit(fini);
  246. MODULE_LICENSE("GPL");
  247. MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
  248. MODULE_ALIAS_CRYPTO("twofish");
  249. MODULE_ALIAS_CRYPTO("twofish-asm");