sha1-ce-core.S 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. /*
  2. * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
  3. *
  4. * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/linkage.h>
  11. #include <asm/assembler.h>
  12. .text
  13. .arch armv8-a+crypto
  14. k0 .req v0
  15. k1 .req v1
  16. k2 .req v2
  17. k3 .req v3
  18. t0 .req v4
  19. t1 .req v5
  20. dga .req q6
  21. dgav .req v6
  22. dgb .req s7
  23. dgbv .req v7
  24. dg0q .req q12
  25. dg0s .req s12
  26. dg0v .req v12
  27. dg1s .req s13
  28. dg1v .req v13
  29. dg2s .req s14
  30. .macro add_only, op, ev, rc, s0, dg1
  31. .ifc \ev, ev
  32. add t1.4s, v\s0\().4s, \rc\().4s
  33. sha1h dg2s, dg0s
  34. .ifnb \dg1
  35. sha1\op dg0q, \dg1, t0.4s
  36. .else
  37. sha1\op dg0q, dg1s, t0.4s
  38. .endif
  39. .else
  40. .ifnb \s0
  41. add t0.4s, v\s0\().4s, \rc\().4s
  42. .endif
  43. sha1h dg1s, dg0s
  44. sha1\op dg0q, dg2s, t1.4s
  45. .endif
  46. .endm
  47. .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
  48. sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
  49. add_only \op, \ev, \rc, \s1, \dg1
  50. sha1su1 v\s0\().4s, v\s3\().4s
  51. .endm
  52. .macro loadrc, k, val, tmp
  53. movz \tmp, :abs_g0_nc:\val
  54. movk \tmp, :abs_g1:\val
  55. dup \k, \tmp
  56. .endm
  57. /*
  58. * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
  59. * int blocks)
  60. */
  61. ENTRY(sha1_ce_transform)
  62. frame_push 3
  63. mov x19, x0
  64. mov x20, x1
  65. mov x21, x2
  66. /* load round constants */
  67. 0: loadrc k0.4s, 0x5a827999, w6
  68. loadrc k1.4s, 0x6ed9eba1, w6
  69. loadrc k2.4s, 0x8f1bbcdc, w6
  70. loadrc k3.4s, 0xca62c1d6, w6
  71. /* load state */
  72. ld1 {dgav.4s}, [x19]
  73. ldr dgb, [x19, #16]
  74. /* load sha1_ce_state::finalize */
  75. ldr_l w4, sha1_ce_offsetof_finalize, x4
  76. ldr w4, [x19, x4]
  77. /* load input */
  78. 1: ld1 {v8.4s-v11.4s}, [x20], #64
  79. sub w21, w21, #1
  80. CPU_LE( rev32 v8.16b, v8.16b )
  81. CPU_LE( rev32 v9.16b, v9.16b )
  82. CPU_LE( rev32 v10.16b, v10.16b )
  83. CPU_LE( rev32 v11.16b, v11.16b )
  84. 2: add t0.4s, v8.4s, k0.4s
  85. mov dg0v.16b, dgav.16b
  86. add_update c, ev, k0, 8, 9, 10, 11, dgb
  87. add_update c, od, k0, 9, 10, 11, 8
  88. add_update c, ev, k0, 10, 11, 8, 9
  89. add_update c, od, k0, 11, 8, 9, 10
  90. add_update c, ev, k1, 8, 9, 10, 11
  91. add_update p, od, k1, 9, 10, 11, 8
  92. add_update p, ev, k1, 10, 11, 8, 9
  93. add_update p, od, k1, 11, 8, 9, 10
  94. add_update p, ev, k1, 8, 9, 10, 11
  95. add_update p, od, k2, 9, 10, 11, 8
  96. add_update m, ev, k2, 10, 11, 8, 9
  97. add_update m, od, k2, 11, 8, 9, 10
  98. add_update m, ev, k2, 8, 9, 10, 11
  99. add_update m, od, k2, 9, 10, 11, 8
  100. add_update m, ev, k3, 10, 11, 8, 9
  101. add_update p, od, k3, 11, 8, 9, 10
  102. add_only p, ev, k3, 9
  103. add_only p, od, k3, 10
  104. add_only p, ev, k3, 11
  105. add_only p, od
  106. /* update state */
  107. add dgbv.2s, dgbv.2s, dg1v.2s
  108. add dgav.4s, dgav.4s, dg0v.4s
  109. cbz w21, 3f
  110. if_will_cond_yield_neon
  111. st1 {dgav.4s}, [x19]
  112. str dgb, [x19, #16]
  113. do_cond_yield_neon
  114. b 0b
  115. endif_yield_neon
  116. b 1b
  117. /*
  118. * Final block: add padding and total bit count.
  119. * Skip if the input size was not a round multiple of the block size,
  120. * the padding is handled by the C code in that case.
  121. */
  122. 3: cbz x4, 4f
  123. ldr_l w4, sha1_ce_offsetof_count, x4
  124. ldr x4, [x19, x4]
  125. movi v9.2d, #0
  126. mov x8, #0x80000000
  127. movi v10.2d, #0
  128. ror x7, x4, #29 // ror(lsl(x4, 3), 32)
  129. fmov d8, x8
  130. mov x4, #0
  131. mov v11.d[0], xzr
  132. mov v11.d[1], x7
  133. b 2b
  134. /* store new state */
  135. 4: st1 {dgav.4s}, [x19]
  136. str dgb, [x19, #16]
  137. frame_pop
  138. ret
  139. ENDPROC(sha1_ce_transform)