sm3-ce-core.S 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /*
  2. * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
  3. *
  4. * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/linkage.h>
  11. #include <asm/assembler.h>
  12. .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
  13. .set .Lv\b\().4s, \b
  14. .endr
  15. .macro sm3partw1, rd, rn, rm
  16. .inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
  17. .endm
  18. .macro sm3partw2, rd, rn, rm
  19. .inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
  20. .endm
  21. .macro sm3ss1, rd, rn, rm, ra
  22. .inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
  23. .endm
  24. .macro sm3tt1a, rd, rn, rm, imm2
  25. .inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
  26. .endm
  27. .macro sm3tt1b, rd, rn, rm, imm2
  28. .inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
  29. .endm
  30. .macro sm3tt2a, rd, rn, rm, imm2
  31. .inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
  32. .endm
  33. .macro sm3tt2b, rd, rn, rm, imm2
  34. .inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
  35. .endm
  36. .macro round, ab, s0, t0, t1, i
  37. sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s
  38. shl \t1\().4s, \t0\().4s, #1
  39. sri \t1\().4s, \t0\().4s, #31
  40. sm3tt1\ab v8.4s, v5.4s, v10.4s, \i
  41. sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i
  42. .endm
  43. .macro qround, ab, s0, s1, s2, s3, s4
  44. .ifnb \s4
  45. ext \s4\().16b, \s1\().16b, \s2\().16b, #12
  46. ext v6.16b, \s0\().16b, \s1\().16b, #12
  47. ext v7.16b, \s2\().16b, \s3\().16b, #8
  48. sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s
  49. .endif
  50. eor v10.16b, \s0\().16b, \s1\().16b
  51. round \ab, \s0, v11, v12, 0
  52. round \ab, \s0, v12, v11, 1
  53. round \ab, \s0, v11, v12, 2
  54. round \ab, \s0, v12, v11, 3
  55. .ifnb \s4
  56. sm3partw2 \s4\().4s, v7.4s, v6.4s
  57. .endif
  58. .endm
  59. /*
  60. * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
  61. * int blocks)
  62. */
  63. .text
  64. ENTRY(sm3_ce_transform)
  65. /* load state */
  66. ld1 {v8.4s-v9.4s}, [x0]
  67. rev64 v8.4s, v8.4s
  68. rev64 v9.4s, v9.4s
  69. ext v8.16b, v8.16b, v8.16b, #8
  70. ext v9.16b, v9.16b, v9.16b, #8
  71. adr_l x8, .Lt
  72. ldp s13, s14, [x8]
  73. /* load input */
  74. 0: ld1 {v0.16b-v3.16b}, [x1], #64
  75. sub w2, w2, #1
  76. mov v15.16b, v8.16b
  77. mov v16.16b, v9.16b
  78. CPU_LE( rev32 v0.16b, v0.16b )
  79. CPU_LE( rev32 v1.16b, v1.16b )
  80. CPU_LE( rev32 v2.16b, v2.16b )
  81. CPU_LE( rev32 v3.16b, v3.16b )
  82. ext v11.16b, v13.16b, v13.16b, #4
  83. qround a, v0, v1, v2, v3, v4
  84. qround a, v1, v2, v3, v4, v0
  85. qround a, v2, v3, v4, v0, v1
  86. qround a, v3, v4, v0, v1, v2
  87. ext v11.16b, v14.16b, v14.16b, #4
  88. qround b, v4, v0, v1, v2, v3
  89. qround b, v0, v1, v2, v3, v4
  90. qround b, v1, v2, v3, v4, v0
  91. qround b, v2, v3, v4, v0, v1
  92. qround b, v3, v4, v0, v1, v2
  93. qround b, v4, v0, v1, v2, v3
  94. qround b, v0, v1, v2, v3, v4
  95. qround b, v1, v2, v3, v4, v0
  96. qround b, v2, v3, v4, v0, v1
  97. qround b, v3, v4
  98. qround b, v4, v0
  99. qround b, v0, v1
  100. eor v8.16b, v8.16b, v15.16b
  101. eor v9.16b, v9.16b, v16.16b
  102. /* handled all input blocks? */
  103. cbnz w2, 0b
  104. /* save state */
  105. rev64 v8.4s, v8.4s
  106. rev64 v9.4s, v9.4s
  107. ext v8.16b, v8.16b, v8.16b, #8
  108. ext v9.16b, v9.16b, v9.16b, #8
  109. st1 {v8.4s-v9.4s}, [x0]
  110. ret
  111. ENDPROC(sm3_ce_transform)
  112. .section ".rodata", "a"
  113. .align 3
  114. .Lt: .word 0x79cc4519, 0x9d8a7a87