csumpartialcopygeneric.S 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. /*
  2. * linux/arch/arm/lib/csumpartialcopygeneric.S
  3. *
  4. * Copyright (C) 1995-2001 Russell King
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <asm/assembler.h>
  11. /*
  12. * unsigned int
  13. * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
  14. * r0 = src, r1 = dst, r2 = len, r3 = sum
  15. * Returns : r0 = checksum
  16. *
  17. * Note that 'tst' and 'teq' preserve the carry flag.
  18. */
  19. src .req r0
  20. dst .req r1
  21. len .req r2
  22. sum .req r3
  23. .Lzero: mov r0, sum
  24. load_regs
  25. /*
  26. * Align an unaligned destination pointer. We know that
  27. * we have >= 8 bytes here, so we don't need to check
  28. * the length. Note that the source pointer hasn't been
  29. * aligned yet.
  30. */
  31. .Ldst_unaligned:
  32. tst dst, #1
  33. beq .Ldst_16bit
  34. load1b ip
  35. sub len, len, #1
  36. adcs sum, sum, ip, put_byte_1 @ update checksum
  37. strb ip, [dst], #1
  38. tst dst, #2
  39. reteq lr @ dst is now 32bit aligned
  40. .Ldst_16bit: load2b r8, ip
  41. sub len, len, #2
  42. adcs sum, sum, r8, put_byte_0
  43. strb r8, [dst], #1
  44. adcs sum, sum, ip, put_byte_1
  45. strb ip, [dst], #1
  46. ret lr @ dst is now 32bit aligned
  47. /*
  48. * Handle 0 to 7 bytes, with any alignment of source and
  49. * destination pointers. Note that when we get here, C = 0
  50. */
  51. .Lless8: teq len, #0 @ check for zero count
  52. beq .Lzero
  53. /* we must have at least one byte. */
  54. tst dst, #1 @ dst 16-bit aligned
  55. beq .Lless8_aligned
  56. /* Align dst */
  57. load1b ip
  58. sub len, len, #1
  59. adcs sum, sum, ip, put_byte_1 @ update checksum
  60. strb ip, [dst], #1
  61. tst len, #6
  62. beq .Lless8_byteonly
  63. 1: load2b r8, ip
  64. sub len, len, #2
  65. adcs sum, sum, r8, put_byte_0
  66. strb r8, [dst], #1
  67. adcs sum, sum, ip, put_byte_1
  68. strb ip, [dst], #1
  69. .Lless8_aligned:
  70. tst len, #6
  71. bne 1b
  72. .Lless8_byteonly:
  73. tst len, #1
  74. beq .Ldone
  75. load1b r8
  76. adcs sum, sum, r8, put_byte_0 @ update checksum
  77. strb r8, [dst], #1
  78. b .Ldone
  79. FN_ENTRY
  80. save_regs
  81. cmp len, #8 @ Ensure that we have at least
  82. blo .Lless8 @ 8 bytes to copy.
  83. adds sum, sum, #0 @ C = 0
  84. tst dst, #3 @ Test destination alignment
  85. blne .Ldst_unaligned @ align destination, return here
  86. /*
  87. * Ok, the dst pointer is now 32bit aligned, and we know
  88. * that we must have more than 4 bytes to copy. Note
  89. * that C contains the carry from the dst alignment above.
  90. */
  91. tst src, #3 @ Test source alignment
  92. bne .Lsrc_not_aligned
  93. /* Routine for src & dst aligned */
  94. bics ip, len, #15
  95. beq 2f
  96. 1: load4l r4, r5, r6, r7
  97. stmia dst!, {r4, r5, r6, r7}
  98. adcs sum, sum, r4
  99. adcs sum, sum, r5
  100. adcs sum, sum, r6
  101. adcs sum, sum, r7
  102. sub ip, ip, #16
  103. teq ip, #0
  104. bne 1b
  105. 2: ands ip, len, #12
  106. beq 4f
  107. tst ip, #8
  108. beq 3f
  109. load2l r4, r5
  110. stmia dst!, {r4, r5}
  111. adcs sum, sum, r4
  112. adcs sum, sum, r5
  113. tst ip, #4
  114. beq 4f
  115. 3: load1l r4
  116. str r4, [dst], #4
  117. adcs sum, sum, r4
  118. 4: ands len, len, #3
  119. beq .Ldone
  120. load1l r4
  121. tst len, #2
  122. mov r5, r4, get_byte_0
  123. beq .Lexit
  124. adcs sum, sum, r4, lspush #16
  125. strb r5, [dst], #1
  126. mov r5, r4, get_byte_1
  127. strb r5, [dst], #1
  128. mov r5, r4, get_byte_2
  129. .Lexit: tst len, #1
  130. strneb r5, [dst], #1
  131. andne r5, r5, #255
  132. adcnes sum, sum, r5, put_byte_0
  133. /*
  134. * If the dst pointer was not 16-bit aligned, we
  135. * need to rotate the checksum here to get around
  136. * the inefficient byte manipulations in the
  137. * architecture independent code.
  138. */
  139. .Ldone: adc r0, sum, #0
  140. ldr sum, [sp, #0] @ dst
  141. tst sum, #1
  142. movne r0, r0, ror #8
  143. load_regs
  144. .Lsrc_not_aligned:
  145. adc sum, sum, #0 @ include C from dst alignment
  146. and ip, src, #3
  147. bic src, src, #3
  148. load1l r5
  149. cmp ip, #2
  150. beq .Lsrc2_aligned
  151. bhi .Lsrc3_aligned
  152. mov r4, r5, lspull #8 @ C = 0
  153. bics ip, len, #15
  154. beq 2f
  155. 1: load4l r5, r6, r7, r8
  156. orr r4, r4, r5, lspush #24
  157. mov r5, r5, lspull #8
  158. orr r5, r5, r6, lspush #24
  159. mov r6, r6, lspull #8
  160. orr r6, r6, r7, lspush #24
  161. mov r7, r7, lspull #8
  162. orr r7, r7, r8, lspush #24
  163. stmia dst!, {r4, r5, r6, r7}
  164. adcs sum, sum, r4
  165. adcs sum, sum, r5
  166. adcs sum, sum, r6
  167. adcs sum, sum, r7
  168. mov r4, r8, lspull #8
  169. sub ip, ip, #16
  170. teq ip, #0
  171. bne 1b
  172. 2: ands ip, len, #12
  173. beq 4f
  174. tst ip, #8
  175. beq 3f
  176. load2l r5, r6
  177. orr r4, r4, r5, lspush #24
  178. mov r5, r5, lspull #8
  179. orr r5, r5, r6, lspush #24
  180. stmia dst!, {r4, r5}
  181. adcs sum, sum, r4
  182. adcs sum, sum, r5
  183. mov r4, r6, lspull #8
  184. tst ip, #4
  185. beq 4f
  186. 3: load1l r5
  187. orr r4, r4, r5, lspush #24
  188. str r4, [dst], #4
  189. adcs sum, sum, r4
  190. mov r4, r5, lspull #8
  191. 4: ands len, len, #3
  192. beq .Ldone
  193. mov r5, r4, get_byte_0
  194. tst len, #2
  195. beq .Lexit
  196. adcs sum, sum, r4, lspush #16
  197. strb r5, [dst], #1
  198. mov r5, r4, get_byte_1
  199. strb r5, [dst], #1
  200. mov r5, r4, get_byte_2
  201. b .Lexit
  202. .Lsrc2_aligned: mov r4, r5, lspull #16
  203. adds sum, sum, #0
  204. bics ip, len, #15
  205. beq 2f
  206. 1: load4l r5, r6, r7, r8
  207. orr r4, r4, r5, lspush #16
  208. mov r5, r5, lspull #16
  209. orr r5, r5, r6, lspush #16
  210. mov r6, r6, lspull #16
  211. orr r6, r6, r7, lspush #16
  212. mov r7, r7, lspull #16
  213. orr r7, r7, r8, lspush #16
  214. stmia dst!, {r4, r5, r6, r7}
  215. adcs sum, sum, r4
  216. adcs sum, sum, r5
  217. adcs sum, sum, r6
  218. adcs sum, sum, r7
  219. mov r4, r8, lspull #16
  220. sub ip, ip, #16
  221. teq ip, #0
  222. bne 1b
  223. 2: ands ip, len, #12
  224. beq 4f
  225. tst ip, #8
  226. beq 3f
  227. load2l r5, r6
  228. orr r4, r4, r5, lspush #16
  229. mov r5, r5, lspull #16
  230. orr r5, r5, r6, lspush #16
  231. stmia dst!, {r4, r5}
  232. adcs sum, sum, r4
  233. adcs sum, sum, r5
  234. mov r4, r6, lspull #16
  235. tst ip, #4
  236. beq 4f
  237. 3: load1l r5
  238. orr r4, r4, r5, lspush #16
  239. str r4, [dst], #4
  240. adcs sum, sum, r4
  241. mov r4, r5, lspull #16
  242. 4: ands len, len, #3
  243. beq .Ldone
  244. mov r5, r4, get_byte_0
  245. tst len, #2
  246. beq .Lexit
  247. adcs sum, sum, r4
  248. strb r5, [dst], #1
  249. mov r5, r4, get_byte_1
  250. strb r5, [dst], #1
  251. tst len, #1
  252. beq .Ldone
  253. load1b r5
  254. b .Lexit
  255. .Lsrc3_aligned: mov r4, r5, lspull #24
  256. adds sum, sum, #0
  257. bics ip, len, #15
  258. beq 2f
  259. 1: load4l r5, r6, r7, r8
  260. orr r4, r4, r5, lspush #8
  261. mov r5, r5, lspull #24
  262. orr r5, r5, r6, lspush #8
  263. mov r6, r6, lspull #24
  264. orr r6, r6, r7, lspush #8
  265. mov r7, r7, lspull #24
  266. orr r7, r7, r8, lspush #8
  267. stmia dst!, {r4, r5, r6, r7}
  268. adcs sum, sum, r4
  269. adcs sum, sum, r5
  270. adcs sum, sum, r6
  271. adcs sum, sum, r7
  272. mov r4, r8, lspull #24
  273. sub ip, ip, #16
  274. teq ip, #0
  275. bne 1b
  276. 2: ands ip, len, #12
  277. beq 4f
  278. tst ip, #8
  279. beq 3f
  280. load2l r5, r6
  281. orr r4, r4, r5, lspush #8
  282. mov r5, r5, lspull #24
  283. orr r5, r5, r6, lspush #8
  284. stmia dst!, {r4, r5}
  285. adcs sum, sum, r4
  286. adcs sum, sum, r5
  287. mov r4, r6, lspull #24
  288. tst ip, #4
  289. beq 4f
  290. 3: load1l r5
  291. orr r4, r4, r5, lspush #8
  292. str r4, [dst], #4
  293. adcs sum, sum, r4
  294. mov r4, r5, lspull #24
  295. 4: ands len, len, #3
  296. beq .Ldone
  297. mov r5, r4, get_byte_0
  298. tst len, #2
  299. beq .Lexit
  300. strb r5, [dst], #1
  301. adcs sum, sum, r4
  302. load1l r4
  303. mov r5, r4, get_byte_0
  304. strb r5, [dst], #1
  305. adcs sum, sum, r4, lspush #24
  306. mov r5, r4, get_byte_1
  307. b .Lexit
  308. FN_EXIT