sha512-arm.S 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. /* sha512-arm.S - ARM assembly implementation of SHA-512 transform
  2. *
  3. * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  4. *
  5. * This file is part of Libgcrypt.
  6. *
  7. * Libgcrypt is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as
  9. * published by the Free Software Foundation; either version 2.1 of
  10. * the License, or (at your option) any later version.
  11. *
  12. * Libgcrypt is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  19. */
  20. #include <config.h>
  21. #if defined(__ARMEL__)
  22. #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
  23. .text
  24. .syntax unified
  25. .arm
  26. /* structure of SHA512_CONTEXT */
  27. #define hd_a 0
  28. #define hd_b ((hd_a) + 8)
  29. #define hd_c ((hd_b) + 8)
  30. #define hd_d ((hd_c) + 8)
  31. #define hd_e ((hd_d) + 8)
  32. #define hd_f ((hd_e) + 8)
  33. #define hd_g ((hd_f) + 8)
  34. #define hd_h ((hd_g) + 8)
  35. /* register macros */
  36. #define RK r2
  37. #define RElo r0
  38. #define REhi r1
  39. #define RT1lo r3
  40. #define RT1hi r4
  41. #define RT2lo r5
  42. #define RT2hi r6
  43. #define RWlo r7
  44. #define RWhi r8
  45. #define RT3lo r9
  46. #define RT3hi r10
  47. #define RT4lo r11
  48. #define RT4hi ip
  49. #define RRND lr
  50. /* variable offsets in stack */
  51. #define ctx (0)
  52. #define data ((ctx) + 4)
  53. #define nblks ((data) + 4)
  54. #define _a ((nblks) + 4)
  55. #define _b ((_a) + 8)
  56. #define _c ((_b) + 8)
  57. #define _d ((_c) + 8)
  58. #define _e ((_d) + 8)
  59. #define _f ((_e) + 8)
  60. #define _g ((_f) + 8)
  61. #define _h ((_g) + 8)
  62. #define w(i) ((_h) + 8 + ((i) % 16) * 8)
  63. #define STACK_MAX (w(15) + 8)
  64. /* helper macros */
  65. #define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
  66. ldrb rout, [rsrc, #((offs) + 3)]; \
  67. ldrb rtmp, [rsrc, #((offs) + 2)]; \
  68. orr rout, rout, rtmp, lsl #8; \
  69. ldrb rtmp, [rsrc, #((offs) + 1)]; \
  70. orr rout, rout, rtmp, lsl #16; \
  71. ldrb rtmp, [rsrc, #((offs) + 0)]; \
  72. orr rout, rout, rtmp, lsl #24;
  73. #ifdef __ARMEL__
  74. /* bswap on little-endian */
  75. #ifdef HAVE_ARM_ARCH_V6
  76. #define be_to_host(reg, rtmp) \
  77. rev reg, reg;
  78. #else
  79. #define be_to_host(reg, rtmp) \
  80. eor rtmp, reg, reg, ror #16; \
  81. mov rtmp, rtmp, lsr #8; \
  82. bic rtmp, rtmp, #65280; \
  83. eor reg, rtmp, reg, ror #8;
  84. #endif
  85. #else
  86. /* nop on big-endian */
  87. #define be_to_host(reg, rtmp) /*_*/
  88. #endif
  89. #define host_to_host(x, y) /*_*/
  90. #define read_u64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, convert, rtmp) \
  91. ldr lo0, [rin, #((offs) + 0 * 8 + 4)]; \
  92. ldr hi0, [rin, #((offs) + 0 * 8 + 0)]; \
  93. ldr lo1, [rin, #((offs) + 1 * 8 + 4)]; \
  94. ldr hi1, [rin, #((offs) + 1 * 8 + 0)]; \
  95. ldr lo2, [rin, #((offs) + 2 * 8 + 4)]; \
  96. convert(lo0, rtmp); \
  97. ldr hi2, [rin, #((offs) + 2 * 8 + 0)]; \
  98. convert(hi0, rtmp); \
  99. ldr lo3, [rin, #((offs) + 3 * 8 + 4)]; \
  100. convert(lo1, rtmp); \
  101. ldr hi3, [rin, #((offs) + 3 * 8 + 0)]; \
  102. convert(hi1, rtmp); \
  103. convert(lo2, rtmp); \
  104. convert(hi2, rtmp); \
  105. convert(lo3, rtmp); \
  106. convert(hi3, rtmp);
  107. #define read_be64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, rtmp0) \
  108. read_u64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, be_to_host, rtmp0)
  109. /* need to handle unaligned reads by byte reads */
  110. #define read_be64_unaligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, rtmp0) \
  111. ldr_unaligned_be(lo0, rin, (offs) + 0 * 8 + 4, rtmp0); \
  112. ldr_unaligned_be(hi0, rin, (offs) + 0 * 8 + 0, rtmp0); \
  113. ldr_unaligned_be(lo1, rin, (offs) + 1 * 8 + 4, rtmp0); \
  114. ldr_unaligned_be(hi1, rin, (offs) + 1 * 8 + 0, rtmp0); \
  115. ldr_unaligned_be(lo2, rin, (offs) + 2 * 8 + 4, rtmp0); \
  116. ldr_unaligned_be(hi2, rin, (offs) + 2 * 8 + 0, rtmp0); \
  117. ldr_unaligned_be(lo3, rin, (offs) + 3 * 8 + 4, rtmp0); \
  118. ldr_unaligned_be(hi3, rin, (offs) + 3 * 8 + 0, rtmp0);
  119. /***********************************************************************
  120. * ARM assembly implementation of sha512 transform
  121. ***********************************************************************/
  122. /* Round function */
  123. #define R(_a,_b,_c,_d,_e,_f,_g,_h,W,wi) \
  124. /* Message expansion, t1 = _h + w[i] */ \
  125. W(_a,_h,wi); \
  126. \
  127. /* w = Sum1(_e) */ \
  128. mov RWlo, RElo, lsr#14; \
  129. ldm RK!, {RT2lo-RT2hi}; \
  130. mov RWhi, REhi, lsr#14; \
  131. eor RWlo, RWlo, RElo, lsr#18; \
  132. eor RWhi, RWhi, REhi, lsr#18; \
  133. ldr RT3lo, [sp, #(_f)]; \
  134. adds RT1lo, RT2lo; /* t1 += K */ \
  135. ldr RT3hi, [sp, #(_f) + 4]; \
  136. adc RT1hi, RT2hi; \
  137. ldr RT4lo, [sp, #(_g)]; \
  138. eor RWlo, RWlo, RElo, lsl#23; \
  139. ldr RT4hi, [sp, #(_g) + 4]; \
  140. eor RWhi, RWhi, REhi, lsl#23; \
  141. eor RWlo, RWlo, REhi, lsl#18; \
  142. eor RWhi, RWhi, RElo, lsl#18; \
  143. eor RWlo, RWlo, REhi, lsl#14; \
  144. eor RWhi, RWhi, RElo, lsl#14; \
  145. eor RWlo, RWlo, REhi, lsr#9; \
  146. eor RWhi, RWhi, RElo, lsr#9; \
  147. \
  148. /* Cho(_e,_f,_g) => (_e & _f) ^ (~_e & _g) */ \
  149. adds RT1lo, RWlo; /* t1 += Sum1(_e) */ \
  150. and RT3lo, RT3lo, RElo; \
  151. adc RT1hi, RWhi; \
  152. and RT3hi, RT3hi, REhi; \
  153. bic RT4lo, RT4lo, RElo; \
  154. bic RT4hi, RT4hi, REhi; \
  155. eor RT3lo, RT3lo, RT4lo; \
  156. eor RT3hi, RT3hi, RT4hi; \
  157. \
  158. /* Load D */ \
  159. /* t1 += Cho(_e,_f,_g) */ \
  160. ldr RElo, [sp, #(_d)]; \
  161. adds RT1lo, RT3lo; \
  162. ldr REhi, [sp, #(_d) + 4]; \
  163. adc RT1hi, RT3hi; \
  164. \
  165. /* Load A */ \
  166. ldr RT3lo, [sp, #(_a)]; \
  167. \
  168. /* _d += t1 */ \
  169. adds RElo, RT1lo; \
  170. ldr RT3hi, [sp, #(_a) + 4]; \
  171. adc REhi, RT1hi; \
  172. \
  173. /* Store D */ \
  174. str RElo, [sp, #(_d)]; \
  175. \
  176. /* t2 = Sum0(_a) */ \
  177. mov RT2lo, RT3lo, lsr#28; \
  178. str REhi, [sp, #(_d) + 4]; \
  179. mov RT2hi, RT3hi, lsr#28; \
  180. ldr RWlo, [sp, #(_b)]; \
  181. eor RT2lo, RT2lo, RT3lo, lsl#30; \
  182. ldr RWhi, [sp, #(_b) + 4]; \
  183. eor RT2hi, RT2hi, RT3hi, lsl#30; \
  184. eor RT2lo, RT2lo, RT3lo, lsl#25; \
  185. eor RT2hi, RT2hi, RT3hi, lsl#25; \
  186. eor RT2lo, RT2lo, RT3hi, lsl#4; \
  187. eor RT2hi, RT2hi, RT3lo, lsl#4; \
  188. eor RT2lo, RT2lo, RT3hi, lsr#2; \
  189. eor RT2hi, RT2hi, RT3lo, lsr#2; \
  190. eor RT2lo, RT2lo, RT3hi, lsr#7; \
  191. eor RT2hi, RT2hi, RT3lo, lsr#7; \
  192. \
  193. /* t2 += t1 */ \
  194. adds RT2lo, RT1lo; \
  195. ldr RT1lo, [sp, #(_c)]; \
  196. adc RT2hi, RT1hi; \
  197. \
  198. /* Maj(_a,_b,_c) => ((_a & _b) ^ (_c & (_a ^ _b))) */ \
  199. ldr RT1hi, [sp, #(_c) + 4]; \
  200. and RT4lo, RWlo, RT3lo; \
  201. and RT4hi, RWhi, RT3hi; \
  202. eor RWlo, RWlo, RT3lo; \
  203. eor RWhi, RWhi, RT3hi; \
  204. and RWlo, RWlo, RT1lo; \
  205. and RWhi, RWhi, RT1hi; \
  206. eor RWlo, RWlo, RT4lo; \
  207. eor RWhi, RWhi, RT4hi; \
  208. /* Message expansion */
  209. #define W_0_63(_a,_h,i) \
  210. ldr RT3lo, [sp, #(w(i-2))]; \
  211. adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \
  212. ldr RT3hi, [sp, #(w(i-2)) + 4]; \
  213. adc RT2hi, RWhi; \
  214. /* nw = S1(w[i-2]) */ \
  215. ldr RT1lo, [sp, #(_h)]; /* Load H */ \
  216. mov RWlo, RT3lo, lsr#19; \
  217. str RT2lo, [sp, #(_a)]; \
  218. eor RWlo, RWlo, RT3lo, lsl#3; \
  219. ldr RT1hi, [sp, #(_h) + 4]; \
  220. mov RWhi, RT3hi, lsr#19; \
  221. ldr RT2lo, [sp, #(w(i-7))]; \
  222. eor RWhi, RWhi, RT3hi, lsl#3; \
  223. str RT2hi, [sp, #(_a) + 4]; \
  224. eor RWlo, RWlo, RT3lo, lsr#6; \
  225. ldr RT2hi, [sp, #(w(i-7)) + 4]; \
  226. eor RWhi, RWhi, RT3hi, lsr#6; \
  227. eor RWlo, RWlo, RT3hi, lsl#13; \
  228. eor RWhi, RWhi, RT3lo, lsl#13; \
  229. eor RWlo, RWlo, RT3hi, lsr#29; \
  230. eor RWhi, RWhi, RT3lo, lsr#29; \
  231. ldr RT3lo, [sp, #(w(i-15))]; \
  232. eor RWlo, RWlo, RT3hi, lsl#26; \
  233. ldr RT3hi, [sp, #(w(i-15)) + 4]; \
  234. \
  235. adds RT2lo, RWlo; /* nw += w[i-7] */ \
  236. ldr RWlo, [sp, #(w(i-16))]; \
  237. adc RT2hi, RWhi; \
  238. mov RT4lo, RT3lo, lsr#1; /* S0(w[i-15]) */ \
  239. ldr RWhi, [sp, #(w(i-16)) + 4]; \
  240. mov RT4hi, RT3hi, lsr#1; \
  241. adds RT2lo, RWlo; /* nw += w[i-16] */ \
  242. eor RT4lo, RT4lo, RT3lo, lsr#8; \
  243. eor RT4hi, RT4hi, RT3hi, lsr#8; \
  244. eor RT4lo, RT4lo, RT3lo, lsr#7; \
  245. eor RT4hi, RT4hi, RT3hi, lsr#7; \
  246. eor RT4lo, RT4lo, RT3hi, lsl#31; \
  247. eor RT4hi, RT4hi, RT3lo, lsl#31; \
  248. eor RT4lo, RT4lo, RT3hi, lsl#24; \
  249. eor RT4hi, RT4hi, RT3lo, lsl#24; \
  250. eor RT4lo, RT4lo, RT3hi, lsl#25; \
  251. adc RT2hi, RWhi; \
  252. \
  253. /* nw += S0(w[i-15]) */ \
  254. adds RT2lo, RT4lo; \
  255. adc RT2hi, RT4hi; \
  256. \
  257. /* w[0] = nw */ \
  258. str RT2lo, [sp, #(w(i))]; \
  259. adds RT1lo, RWlo; \
  260. str RT2hi, [sp, #(w(i)) + 4]; \
  261. adc RT1hi, RWhi;
  262. #define W_64_79(_a,_h,i) \
  263. adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \
  264. ldr RWlo, [sp, #(w(i-16))]; \
  265. adc RT2hi, RWhi; \
  266. ldr RWhi, [sp, #(w(i-16)) + 4]; \
  267. ldr RT1lo, [sp, #(_h)]; /* Load H */ \
  268. ldr RT1hi, [sp, #(_h) + 4]; \
  269. str RT2lo, [sp, #(_a)]; \
  270. str RT2hi, [sp, #(_a) + 4]; \
  271. adds RT1lo, RWlo; \
  272. adc RT1hi, RWhi;
  273. .align 3
  274. .globl _gcry_sha512_transform_arm
  275. .type _gcry_sha512_transform_arm,%function;
  276. _gcry_sha512_transform_arm:
  277. /* Input:
  278. * r0: SHA512_CONTEXT
  279. * r1: data
  280. * r2: u64 k[] constants
  281. * r3: nblks
  282. */
  283. push {r4-r11, ip, lr};
  284. sub sp, sp, #STACK_MAX;
  285. movs RWlo, r3;
  286. str r0, [sp, #(ctx)];
  287. beq .Ldone;
  288. .Loop_blocks:
  289. str RWlo, [sp, #nblks];
  290. /* Load context to stack */
  291. add RWhi, sp, #(_a);
  292. ldm r0!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  293. stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  294. ldm r0, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  295. stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  296. /* Load input to w[16] */
  297. /* test if data is unaligned */
  298. tst r1, #3;
  299. beq 1f;
  300. /* unaligned load */
  301. add RWhi, sp, #(w(0));
  302. read_be64_unaligned_4(r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
  303. stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  304. read_be64_unaligned_4(r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
  305. stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  306. read_be64_unaligned_4(r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
  307. stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  308. read_be64_unaligned_4(r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
  309. b 2f;
  310. 1:
  311. /* aligned load */
  312. add RWhi, sp, #(w(0));
  313. read_be64_aligned_4(r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
  314. stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  315. read_be64_aligned_4(r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
  316. stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  317. read_be64_aligned_4(r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
  318. stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  319. read_be64_aligned_4(r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
  320. 2:
  321. add r1, #(16 * 8);
  322. stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
  323. str r1, [sp, #(data)];
  324. /* preload E & A */
  325. ldr RElo, [sp, #(_e)];
  326. ldr REhi, [sp, #(_e) + 4];
  327. mov RWlo, #0;
  328. ldr RT2lo, [sp, #(_a)];
  329. mov RRND, #(80-16);
  330. ldr RT2hi, [sp, #(_a) + 4];
  331. mov RWhi, #0;
  332. .Loop_rounds:
  333. R(_a, _b, _c, _d, _e, _f, _g, _h, W_0_63, 16);
  334. R(_h, _a, _b, _c, _d, _e, _f, _g, W_0_63, 17);
  335. R(_g, _h, _a, _b, _c, _d, _e, _f, W_0_63, 18);
  336. R(_f, _g, _h, _a, _b, _c, _d, _e, W_0_63, 19);
  337. R(_e, _f, _g, _h, _a, _b, _c, _d, W_0_63, 20);
  338. R(_d, _e, _f, _g, _h, _a, _b, _c, W_0_63, 21);
  339. R(_c, _d, _e, _f, _g, _h, _a, _b, W_0_63, 22);
  340. R(_b, _c, _d, _e, _f, _g, _h, _a, W_0_63, 23);
  341. R(_a, _b, _c, _d, _e, _f, _g, _h, W_0_63, 24);
  342. R(_h, _a, _b, _c, _d, _e, _f, _g, W_0_63, 25);
  343. R(_g, _h, _a, _b, _c, _d, _e, _f, W_0_63, 26);
  344. R(_f, _g, _h, _a, _b, _c, _d, _e, W_0_63, 27);
  345. R(_e, _f, _g, _h, _a, _b, _c, _d, W_0_63, 28);
  346. R(_d, _e, _f, _g, _h, _a, _b, _c, W_0_63, 29);
  347. R(_c, _d, _e, _f, _g, _h, _a, _b, W_0_63, 30);
  348. R(_b, _c, _d, _e, _f, _g, _h, _a, W_0_63, 31);
  349. subs RRND, #16;
  350. bne .Loop_rounds;
  351. R(_a, _b, _c, _d, _e, _f, _g, _h, W_64_79, 16);
  352. R(_h, _a, _b, _c, _d, _e, _f, _g, W_64_79, 17);
  353. R(_g, _h, _a, _b, _c, _d, _e, _f, W_64_79, 18);
  354. R(_f, _g, _h, _a, _b, _c, _d, _e, W_64_79, 19);
  355. R(_e, _f, _g, _h, _a, _b, _c, _d, W_64_79, 20);
  356. R(_d, _e, _f, _g, _h, _a, _b, _c, W_64_79, 21);
  357. R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 22);
  358. R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 23);
  359. R(_a, _b, _c, _d, _e, _f, _g, _h, W_64_79, 24);
  360. R(_h, _a, _b, _c, _d, _e, _f, _g, W_64_79, 25);
  361. R(_g, _h, _a, _b, _c, _d, _e, _f, W_64_79, 26);
  362. R(_f, _g, _h, _a, _b, _c, _d, _e, W_64_79, 27);
  363. R(_e, _f, _g, _h, _a, _b, _c, _d, W_64_79, 28);
  364. R(_d, _e, _f, _g, _h, _a, _b, _c, W_64_79, 29);
  365. R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 30);
  366. R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 31);
  367. ldr r0, [sp, #(ctx)];
  368. adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */
  369. ldr r1, [sp, #(data)];
  370. adc RT2hi, RWhi;
  371. ldm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
  372. adds RT1lo, RT2lo;
  373. ldr RT2lo, [sp, #(_b + 0)];
  374. adc RT1hi, RT2hi;
  375. ldr RT2hi, [sp, #(_b + 4)];
  376. adds RWlo, RT2lo;
  377. ldr RT2lo, [sp, #(_c + 0)];
  378. adc RWhi, RT2hi;
  379. ldr RT2hi, [sp, #(_c + 4)];
  380. adds RT3lo, RT2lo;
  381. ldr RT2lo, [sp, #(_d + 0)];
  382. adc RT3hi, RT2hi;
  383. ldr RT2hi, [sp, #(_d + 4)];
  384. adds RT4lo, RT2lo;
  385. ldr RT2lo, [sp, #(_e + 0)];
  386. adc RT4hi, RT2hi;
  387. stm r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
  388. ldr RT2hi, [sp, #(_e + 4)];
  389. ldm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
  390. adds RT1lo, RT2lo;
  391. ldr RT2lo, [sp, #(_f + 0)];
  392. adc RT1hi, RT2hi;
  393. ldr RT2hi, [sp, #(_f + 4)];
  394. adds RWlo, RT2lo;
  395. ldr RT2lo, [sp, #(_g + 0)];
  396. adc RWhi, RT2hi;
  397. ldr RT2hi, [sp, #(_g + 4)];
  398. adds RT3lo, RT2lo;
  399. ldr RT2lo, [sp, #(_h + 0)];
  400. adc RT3hi, RT2hi;
  401. ldr RT2hi, [sp, #(_h + 4)];
  402. adds RT4lo, RT2lo;
  403. adc RT4hi, RT2hi;
  404. stm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
  405. sub r0, r0, #(4 * 8);
  406. ldr RWlo, [sp, #nblks];
  407. sub RK, #(80 * 8);
  408. subs RWlo, #1;
  409. bne .Loop_blocks;
  410. .Ldone:
  411. mov r0, #STACK_MAX;
  412. __out:
  413. add sp, sp, #STACK_MAX;
  414. pop {r4-r11, ip, pc};
  415. .size _gcry_sha512_transform_arm,.-_gcry_sha512_transform_arm;
  416. #endif
  417. #endif