sha1.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /*
  2. * linux/arch/arm/lib/sha1.S
  3. *
  4. * SHA transform optimized for ARM
  5. *
  6. * Copyright: (C) 2005 by Nicolas Pitre <nico@fluxnic.net>
  7. * Created: September 17, 2005
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License version 2 as
  11. * published by the Free Software Foundation.
  12. *
  13. * The reference implementation for this code is linux/lib/sha1.c
  14. */
  15. #include <linux/linkage.h>
  16. .text
  17. /*
  18. * void sha_transform(__u32 *digest, const char *in, __u32 *W)
  19. *
  20. * Note: the "in" ptr may be unaligned.
  21. */
  22. ENTRY(sha_transform)
  23. stmfd sp!, {r4 - r8, lr}
  24. @ for (i = 0; i < 16; i++)
  25. @ W[i] = be32_to_cpu(in[i]);
  26. #ifdef __ARMEB__
  27. mov r4, r0
  28. mov r0, r2
  29. mov r2, #64
  30. bl memcpy
  31. mov r2, r0
  32. mov r0, r4
  33. #else
  34. mov r3, r2
  35. mov lr, #16
  36. 1: ldrb r4, [r1], #1
  37. ldrb r5, [r1], #1
  38. ldrb r6, [r1], #1
  39. ldrb r7, [r1], #1
  40. subs lr, lr, #1
  41. orr r5, r5, r4, lsl #8
  42. orr r6, r6, r5, lsl #8
  43. orr r7, r7, r6, lsl #8
  44. str r7, [r3], #4
  45. bne 1b
  46. #endif
  47. @ for (i = 0; i < 64; i++)
  48. @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31);
  49. sub r3, r2, #4
  50. mov lr, #64
  51. 2: ldr r4, [r3, #4]!
  52. subs lr, lr, #1
  53. ldr r5, [r3, #8]
  54. ldr r6, [r3, #32]
  55. ldr r7, [r3, #52]
  56. eor r4, r4, r5
  57. eor r4, r4, r6
  58. eor r4, r4, r7
  59. mov r4, r4, ror #31
  60. str r4, [r3, #64]
  61. bne 2b
  62. /*
  63. * The SHA functions are:
  64. *
  65. * f1(B,C,D) = (D ^ (B & (C ^ D)))
  66. * f2(B,C,D) = (B ^ C ^ D)
  67. * f3(B,C,D) = ((B & C) | (D & (B | C)))
  68. *
  69. * Then the sub-blocks are processed as follows:
  70. *
  71. * A' = ror(A, 27) + f(B,C,D) + E + K + *W++
  72. * B' = A
  73. * C' = ror(B, 2)
  74. * D' = C
  75. * E' = D
  76. *
  77. * We therefore unroll each loop 5 times to avoid register shuffling.
  78. * Also the ror for C (and also D and E which are successivelyderived
  79. * from it) is applied in place to cut on an additional mov insn for
  80. * each round.
  81. */
  82. .macro sha_f1, A, B, C, D, E
  83. ldr r3, [r2], #4
  84. eor ip, \C, \D
  85. add \E, r1, \E, ror #2
  86. and ip, \B, ip, ror #2
  87. add \E, \E, \A, ror #27
  88. eor ip, ip, \D, ror #2
  89. add \E, \E, r3
  90. add \E, \E, ip
  91. .endm
  92. .macro sha_f2, A, B, C, D, E
  93. ldr r3, [r2], #4
  94. add \E, r1, \E, ror #2
  95. eor ip, \B, \C, ror #2
  96. add \E, \E, \A, ror #27
  97. eor ip, ip, \D, ror #2
  98. add \E, \E, r3
  99. add \E, \E, ip
  100. .endm
  101. .macro sha_f3, A, B, C, D, E
  102. ldr r3, [r2], #4
  103. add \E, r1, \E, ror #2
  104. orr ip, \B, \C, ror #2
  105. add \E, \E, \A, ror #27
  106. and ip, ip, \D, ror #2
  107. add \E, \E, r3
  108. and r3, \B, \C, ror #2
  109. orr ip, ip, r3
  110. add \E, \E, ip
  111. .endm
  112. ldmia r0, {r4 - r8}
  113. mov lr, #4
  114. ldr r1, .L_sha_K + 0
  115. /* adjust initial values */
  116. mov r6, r6, ror #30
  117. mov r7, r7, ror #30
  118. mov r8, r8, ror #30
  119. 3: subs lr, lr, #1
  120. sha_f1 r4, r5, r6, r7, r8
  121. sha_f1 r8, r4, r5, r6, r7
  122. sha_f1 r7, r8, r4, r5, r6
  123. sha_f1 r6, r7, r8, r4, r5
  124. sha_f1 r5, r6, r7, r8, r4
  125. bne 3b
  126. ldr r1, .L_sha_K + 4
  127. mov lr, #4
  128. 4: subs lr, lr, #1
  129. sha_f2 r4, r5, r6, r7, r8
  130. sha_f2 r8, r4, r5, r6, r7
  131. sha_f2 r7, r8, r4, r5, r6
  132. sha_f2 r6, r7, r8, r4, r5
  133. sha_f2 r5, r6, r7, r8, r4
  134. bne 4b
  135. ldr r1, .L_sha_K + 8
  136. mov lr, #4
  137. 5: subs lr, lr, #1
  138. sha_f3 r4, r5, r6, r7, r8
  139. sha_f3 r8, r4, r5, r6, r7
  140. sha_f3 r7, r8, r4, r5, r6
  141. sha_f3 r6, r7, r8, r4, r5
  142. sha_f3 r5, r6, r7, r8, r4
  143. bne 5b
  144. ldr r1, .L_sha_K + 12
  145. mov lr, #4
  146. 6: subs lr, lr, #1
  147. sha_f2 r4, r5, r6, r7, r8
  148. sha_f2 r8, r4, r5, r6, r7
  149. sha_f2 r7, r8, r4, r5, r6
  150. sha_f2 r6, r7, r8, r4, r5
  151. sha_f2 r5, r6, r7, r8, r4
  152. bne 6b
  153. ldmia r0, {r1, r2, r3, ip, lr}
  154. add r4, r1, r4
  155. add r5, r2, r5
  156. add r6, r3, r6, ror #2
  157. add r7, ip, r7, ror #2
  158. add r8, lr, r8, ror #2
  159. stmia r0, {r4 - r8}
  160. ldmfd sp!, {r4 - r8, pc}
  161. ENDPROC(sha_transform)
  162. .align 2
  163. .L_sha_K:
  164. .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
  165. /*
  166. * void sha_init(__u32 *buf)
  167. */
  168. .align 2
  169. .L_sha_initial_digest:
  170. .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
  171. ENTRY(sha_init)
  172. str lr, [sp, #-4]!
  173. adr r1, .L_sha_initial_digest
  174. ldmia r1, {r1, r2, r3, ip, lr}
  175. stmia r0, {r1, r2, r3, ip, lr}
  176. ldr pc, [sp], #4
  177. ENDPROC(sha_init)