sha256_ni_asm.S 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. /*
  2. * Intel SHA Extensions optimized implementation of a SHA-256 update function
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * Copyright(c) 2015 Intel Corporation.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of version 2 of the GNU General Public License as
  13. * published by the Free Software Foundation.
  14. *
  15. * This program is distributed in the hope that it will be useful, but
  16. * WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * Contact Information:
  21. * Sean Gulley <sean.m.gulley@intel.com>
  22. * Tim Chen <tim.c.chen@linux.intel.com>
  23. *
  24. * BSD LICENSE
  25. *
  26. * Copyright(c) 2015 Intel Corporation.
  27. *
  28. * Redistribution and use in source and binary forms, with or without
  29. * modification, are permitted provided that the following conditions
  30. * are met:
  31. *
  32. * * Redistributions of source code must retain the above copyright
  33. * notice, this list of conditions and the following disclaimer.
  34. * * Redistributions in binary form must reproduce the above copyright
  35. * notice, this list of conditions and the following disclaimer in
  36. * the documentation and/or other materials provided with the
  37. * distribution.
  38. * * Neither the name of Intel Corporation nor the names of its
  39. * contributors may be used to endorse or promote products derived
  40. * from this software without specific prior written permission.
  41. *
  42. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  43. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  44. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  45. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  46. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  47. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  48. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  49. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  50. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  51. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  52. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  53. *
  54. */
  55. #include <linux/linkage.h>
  56. #define DIGEST_PTR %rdi /* 1st arg */
  57. #define DATA_PTR %rsi /* 2nd arg */
  58. #define NUM_BLKS %rdx /* 3rd arg */
  59. #define SHA256CONSTANTS %rax
  60. #define MSG %xmm0
  61. #define STATE0 %xmm1
  62. #define STATE1 %xmm2
  63. #define MSGTMP0 %xmm3
  64. #define MSGTMP1 %xmm4
  65. #define MSGTMP2 %xmm5
  66. #define MSGTMP3 %xmm6
  67. #define MSGTMP4 %xmm7
  68. #define SHUF_MASK %xmm8
  69. #define ABEF_SAVE %xmm9
  70. #define CDGH_SAVE %xmm10
  71. /*
  72. * Intel SHA Extensions optimized implementation of a SHA-256 update function
  73. *
  74. * The function takes a pointer to the current hash values, a pointer to the
  75. * input data, and a number of 64 byte blocks to process. Once all blocks have
  76. * been processed, the digest pointer is updated with the resulting hash value.
  77. * The function only processes complete blocks, there is no functionality to
  78. * store partial blocks. All message padding and hash value initialization must
  79. * be done outside the update function.
  80. *
  81. * The indented lines in the loop are instructions related to rounds processing.
  82. * The non-indented lines are instructions related to the message schedule.
  83. *
  84. * void sha256_ni_transform(uint32_t *digest, const void *data,
  85. uint32_t numBlocks);
  86. * digest : pointer to digest
  87. * data: pointer to input data
  88. * numBlocks: Number of blocks to process
  89. */
  90. .text
  91. .align 32
  92. ENTRY(sha256_ni_transform)
  93. shl $6, NUM_BLKS /* convert to bytes */
  94. jz .Ldone_hash
  95. add DATA_PTR, NUM_BLKS /* pointer to end of data */
  96. /*
  97. * load initial hash values
  98. * Need to reorder these appropriately
  99. * DCBA, HGFE -> ABEF, CDGH
  100. */
  101. movdqu 0*16(DIGEST_PTR), STATE0
  102. movdqu 1*16(DIGEST_PTR), STATE1
  103. pshufd $0xB1, STATE0, STATE0 /* CDAB */
  104. pshufd $0x1B, STATE1, STATE1 /* EFGH */
  105. movdqa STATE0, MSGTMP4
  106. palignr $8, STATE1, STATE0 /* ABEF */
  107. pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */
  108. movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
  109. lea K256(%rip), SHA256CONSTANTS
  110. .Lloop0:
  111. /* Save hash values for addition after rounds */
  112. movdqa STATE0, ABEF_SAVE
  113. movdqa STATE1, CDGH_SAVE
  114. /* Rounds 0-3 */
  115. movdqu 0*16(DATA_PTR), MSG
  116. pshufb SHUF_MASK, MSG
  117. movdqa MSG, MSGTMP0
  118. paddd 0*16(SHA256CONSTANTS), MSG
  119. sha256rnds2 STATE0, STATE1
  120. pshufd $0x0E, MSG, MSG
  121. sha256rnds2 STATE1, STATE0
  122. /* Rounds 4-7 */
  123. movdqu 1*16(DATA_PTR), MSG
  124. pshufb SHUF_MASK, MSG
  125. movdqa MSG, MSGTMP1
  126. paddd 1*16(SHA256CONSTANTS), MSG
  127. sha256rnds2 STATE0, STATE1
  128. pshufd $0x0E, MSG, MSG
  129. sha256rnds2 STATE1, STATE0
  130. sha256msg1 MSGTMP1, MSGTMP0
  131. /* Rounds 8-11 */
  132. movdqu 2*16(DATA_PTR), MSG
  133. pshufb SHUF_MASK, MSG
  134. movdqa MSG, MSGTMP2
  135. paddd 2*16(SHA256CONSTANTS), MSG
  136. sha256rnds2 STATE0, STATE1
  137. pshufd $0x0E, MSG, MSG
  138. sha256rnds2 STATE1, STATE0
  139. sha256msg1 MSGTMP2, MSGTMP1
  140. /* Rounds 12-15 */
  141. movdqu 3*16(DATA_PTR), MSG
  142. pshufb SHUF_MASK, MSG
  143. movdqa MSG, MSGTMP3
  144. paddd 3*16(SHA256CONSTANTS), MSG
  145. sha256rnds2 STATE0, STATE1
  146. movdqa MSGTMP3, MSGTMP4
  147. palignr $4, MSGTMP2, MSGTMP4
  148. paddd MSGTMP4, MSGTMP0
  149. sha256msg2 MSGTMP3, MSGTMP0
  150. pshufd $0x0E, MSG, MSG
  151. sha256rnds2 STATE1, STATE0
  152. sha256msg1 MSGTMP3, MSGTMP2
  153. /* Rounds 16-19 */
  154. movdqa MSGTMP0, MSG
  155. paddd 4*16(SHA256CONSTANTS), MSG
  156. sha256rnds2 STATE0, STATE1
  157. movdqa MSGTMP0, MSGTMP4
  158. palignr $4, MSGTMP3, MSGTMP4
  159. paddd MSGTMP4, MSGTMP1
  160. sha256msg2 MSGTMP0, MSGTMP1
  161. pshufd $0x0E, MSG, MSG
  162. sha256rnds2 STATE1, STATE0
  163. sha256msg1 MSGTMP0, MSGTMP3
  164. /* Rounds 20-23 */
  165. movdqa MSGTMP1, MSG
  166. paddd 5*16(SHA256CONSTANTS), MSG
  167. sha256rnds2 STATE0, STATE1
  168. movdqa MSGTMP1, MSGTMP4
  169. palignr $4, MSGTMP0, MSGTMP4
  170. paddd MSGTMP4, MSGTMP2
  171. sha256msg2 MSGTMP1, MSGTMP2
  172. pshufd $0x0E, MSG, MSG
  173. sha256rnds2 STATE1, STATE0
  174. sha256msg1 MSGTMP1, MSGTMP0
  175. /* Rounds 24-27 */
  176. movdqa MSGTMP2, MSG
  177. paddd 6*16(SHA256CONSTANTS), MSG
  178. sha256rnds2 STATE0, STATE1
  179. movdqa MSGTMP2, MSGTMP4
  180. palignr $4, MSGTMP1, MSGTMP4
  181. paddd MSGTMP4, MSGTMP3
  182. sha256msg2 MSGTMP2, MSGTMP3
  183. pshufd $0x0E, MSG, MSG
  184. sha256rnds2 STATE1, STATE0
  185. sha256msg1 MSGTMP2, MSGTMP1
  186. /* Rounds 28-31 */
  187. movdqa MSGTMP3, MSG
  188. paddd 7*16(SHA256CONSTANTS), MSG
  189. sha256rnds2 STATE0, STATE1
  190. movdqa MSGTMP3, MSGTMP4
  191. palignr $4, MSGTMP2, MSGTMP4
  192. paddd MSGTMP4, MSGTMP0
  193. sha256msg2 MSGTMP3, MSGTMP0
  194. pshufd $0x0E, MSG, MSG
  195. sha256rnds2 STATE1, STATE0
  196. sha256msg1 MSGTMP3, MSGTMP2
  197. /* Rounds 32-35 */
  198. movdqa MSGTMP0, MSG
  199. paddd 8*16(SHA256CONSTANTS), MSG
  200. sha256rnds2 STATE0, STATE1
  201. movdqa MSGTMP0, MSGTMP4
  202. palignr $4, MSGTMP3, MSGTMP4
  203. paddd MSGTMP4, MSGTMP1
  204. sha256msg2 MSGTMP0, MSGTMP1
  205. pshufd $0x0E, MSG, MSG
  206. sha256rnds2 STATE1, STATE0
  207. sha256msg1 MSGTMP0, MSGTMP3
  208. /* Rounds 36-39 */
  209. movdqa MSGTMP1, MSG
  210. paddd 9*16(SHA256CONSTANTS), MSG
  211. sha256rnds2 STATE0, STATE1
  212. movdqa MSGTMP1, MSGTMP4
  213. palignr $4, MSGTMP0, MSGTMP4
  214. paddd MSGTMP4, MSGTMP2
  215. sha256msg2 MSGTMP1, MSGTMP2
  216. pshufd $0x0E, MSG, MSG
  217. sha256rnds2 STATE1, STATE0
  218. sha256msg1 MSGTMP1, MSGTMP0
  219. /* Rounds 40-43 */
  220. movdqa MSGTMP2, MSG
  221. paddd 10*16(SHA256CONSTANTS), MSG
  222. sha256rnds2 STATE0, STATE1
  223. movdqa MSGTMP2, MSGTMP4
  224. palignr $4, MSGTMP1, MSGTMP4
  225. paddd MSGTMP4, MSGTMP3
  226. sha256msg2 MSGTMP2, MSGTMP3
  227. pshufd $0x0E, MSG, MSG
  228. sha256rnds2 STATE1, STATE0
  229. sha256msg1 MSGTMP2, MSGTMP1
  230. /* Rounds 44-47 */
  231. movdqa MSGTMP3, MSG
  232. paddd 11*16(SHA256CONSTANTS), MSG
  233. sha256rnds2 STATE0, STATE1
  234. movdqa MSGTMP3, MSGTMP4
  235. palignr $4, MSGTMP2, MSGTMP4
  236. paddd MSGTMP4, MSGTMP0
  237. sha256msg2 MSGTMP3, MSGTMP0
  238. pshufd $0x0E, MSG, MSG
  239. sha256rnds2 STATE1, STATE0
  240. sha256msg1 MSGTMP3, MSGTMP2
  241. /* Rounds 48-51 */
  242. movdqa MSGTMP0, MSG
  243. paddd 12*16(SHA256CONSTANTS), MSG
  244. sha256rnds2 STATE0, STATE1
  245. movdqa MSGTMP0, MSGTMP4
  246. palignr $4, MSGTMP3, MSGTMP4
  247. paddd MSGTMP4, MSGTMP1
  248. sha256msg2 MSGTMP0, MSGTMP1
  249. pshufd $0x0E, MSG, MSG
  250. sha256rnds2 STATE1, STATE0
  251. sha256msg1 MSGTMP0, MSGTMP3
  252. /* Rounds 52-55 */
  253. movdqa MSGTMP1, MSG
  254. paddd 13*16(SHA256CONSTANTS), MSG
  255. sha256rnds2 STATE0, STATE1
  256. movdqa MSGTMP1, MSGTMP4
  257. palignr $4, MSGTMP0, MSGTMP4
  258. paddd MSGTMP4, MSGTMP2
  259. sha256msg2 MSGTMP1, MSGTMP2
  260. pshufd $0x0E, MSG, MSG
  261. sha256rnds2 STATE1, STATE0
  262. /* Rounds 56-59 */
  263. movdqa MSGTMP2, MSG
  264. paddd 14*16(SHA256CONSTANTS), MSG
  265. sha256rnds2 STATE0, STATE1
  266. movdqa MSGTMP2, MSGTMP4
  267. palignr $4, MSGTMP1, MSGTMP4
  268. paddd MSGTMP4, MSGTMP3
  269. sha256msg2 MSGTMP2, MSGTMP3
  270. pshufd $0x0E, MSG, MSG
  271. sha256rnds2 STATE1, STATE0
  272. /* Rounds 60-63 */
  273. movdqa MSGTMP3, MSG
  274. paddd 15*16(SHA256CONSTANTS), MSG
  275. sha256rnds2 STATE0, STATE1
  276. pshufd $0x0E, MSG, MSG
  277. sha256rnds2 STATE1, STATE0
  278. /* Add current hash values with previously saved */
  279. paddd ABEF_SAVE, STATE0
  280. paddd CDGH_SAVE, STATE1
  281. /* Increment data pointer and loop if more to process */
  282. add $64, DATA_PTR
  283. cmp NUM_BLKS, DATA_PTR
  284. jne .Lloop0
  285. /* Write hash values back in the correct order */
  286. pshufd $0x1B, STATE0, STATE0 /* FEBA */
  287. pshufd $0xB1, STATE1, STATE1 /* DCHG */
  288. movdqa STATE0, MSGTMP4
  289. pblendw $0xF0, STATE1, STATE0 /* DCBA */
  290. palignr $8, MSGTMP4, STATE1 /* HGFE */
  291. movdqu STATE0, 0*16(DIGEST_PTR)
  292. movdqu STATE1, 1*16(DIGEST_PTR)
  293. .Ldone_hash:
  294. ret
  295. ENDPROC(sha256_ni_transform)
  296. .section .rodata.cst256.K256, "aM", @progbits, 256
  297. .align 64
  298. K256:
  299. .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  300. .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  301. .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  302. .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  303. .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  304. .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  305. .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  306. .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  307. .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  308. .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  309. .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  310. .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  311. .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  312. .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  313. .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  314. .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  315. .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
  316. .align 16
  317. PSHUFFLE_BYTE_FLIP_MASK:
  318. .octa 0x0c0d0e0f08090a0b0405060700010203