csumpartialcopygeneric.S 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /*
  2. * linux/arch/arm/lib/csumpartialcopygeneric.S
  3. *
  4. * Copyright (C) 1995-2001 Russell King
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. /*
  11. * unsigned int
  12. * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
  13. * r0 = src, r1 = dst, r2 = len, r3 = sum
  14. * Returns : r0 = checksum
  15. *
  16. * Note that 'tst' and 'teq' preserve the carry flag.
  17. */
  18. src .req r0
  19. dst .req r1
  20. len .req r2
  21. sum .req r3
  22. .Lzero: mov r0, sum
  23. load_regs
  24. /*
  25. * Align an unaligned destination pointer. We know that
  26. * we have >= 8 bytes here, so we don't need to check
  27. * the length. Note that the source pointer hasn't been
  28. * aligned yet.
  29. */
  30. .Ldst_unaligned:
  31. tst dst, #1
  32. beq .Ldst_16bit
  33. load1b ip
  34. sub len, len, #1
  35. adcs sum, sum, ip, put_byte_1 @ update checksum
  36. strb ip, [dst], #1
  37. tst dst, #2
  38. moveq pc, lr @ dst is now 32bit aligned
  39. .Ldst_16bit: load2b r8, ip
  40. sub len, len, #2
  41. adcs sum, sum, r8, put_byte_0
  42. strb r8, [dst], #1
  43. adcs sum, sum, ip, put_byte_1
  44. strb ip, [dst], #1
  45. mov pc, lr @ dst is now 32bit aligned
  46. /*
  47. * Handle 0 to 7 bytes, with any alignment of source and
  48. * destination pointers. Note that when we get here, C = 0
  49. */
  50. .Lless8: teq len, #0 @ check for zero count
  51. beq .Lzero
  52. /* we must have at least one byte. */
  53. tst dst, #1 @ dst 16-bit aligned
  54. beq .Lless8_aligned
  55. /* Align dst */
  56. load1b ip
  57. sub len, len, #1
  58. adcs sum, sum, ip, put_byte_1 @ update checksum
  59. strb ip, [dst], #1
  60. tst len, #6
  61. beq .Lless8_byteonly
  62. 1: load2b r8, ip
  63. sub len, len, #2
  64. adcs sum, sum, r8, put_byte_0
  65. strb r8, [dst], #1
  66. adcs sum, sum, ip, put_byte_1
  67. strb ip, [dst], #1
  68. .Lless8_aligned:
  69. tst len, #6
  70. bne 1b
  71. .Lless8_byteonly:
  72. tst len, #1
  73. beq .Ldone
  74. load1b r8
  75. adcs sum, sum, r8, put_byte_0 @ update checksum
  76. strb r8, [dst], #1
  77. b .Ldone
  78. FN_ENTRY
  79. save_regs
  80. cmp len, #8 @ Ensure that we have at least
  81. blo .Lless8 @ 8 bytes to copy.
  82. adds sum, sum, #0 @ C = 0
  83. tst dst, #3 @ Test destination alignment
  84. blne .Ldst_unaligned @ align destination, return here
  85. /*
  86. * Ok, the dst pointer is now 32bit aligned, and we know
  87. * that we must have more than 4 bytes to copy. Note
  88. * that C contains the carry from the dst alignment above.
  89. */
  90. tst src, #3 @ Test source alignment
  91. bne .Lsrc_not_aligned
  92. /* Routine for src & dst aligned */
  93. bics ip, len, #15
  94. beq 2f
  95. 1: load4l r4, r5, r6, r7
  96. stmia dst!, {r4, r5, r6, r7}
  97. adcs sum, sum, r4
  98. adcs sum, sum, r5
  99. adcs sum, sum, r6
  100. adcs sum, sum, r7
  101. sub ip, ip, #16
  102. teq ip, #0
  103. bne 1b
  104. 2: ands ip, len, #12
  105. beq 4f
  106. tst ip, #8
  107. beq 3f
  108. load2l r4, r5
  109. stmia dst!, {r4, r5}
  110. adcs sum, sum, r4
  111. adcs sum, sum, r5
  112. tst ip, #4
  113. beq 4f
  114. 3: load1l r4
  115. str r4, [dst], #4
  116. adcs sum, sum, r4
  117. 4: ands len, len, #3
  118. beq .Ldone
  119. load1l r4
  120. tst len, #2
  121. mov r5, r4, get_byte_0
  122. beq .Lexit
  123. adcs sum, sum, r4, push #16
  124. strb r5, [dst], #1
  125. mov r5, r4, get_byte_1
  126. strb r5, [dst], #1
  127. mov r5, r4, get_byte_2
  128. .Lexit: tst len, #1
  129. strneb r5, [dst], #1
  130. andne r5, r5, #255
  131. adcnes sum, sum, r5, put_byte_0
  132. /*
  133. * If the dst pointer was not 16-bit aligned, we
  134. * need to rotate the checksum here to get around
  135. * the inefficient byte manipulations in the
  136. * architecture independent code.
  137. */
  138. .Ldone: adc r0, sum, #0
  139. ldr sum, [sp, #0] @ dst
  140. tst sum, #1
  141. movne r0, r0, ror #8
  142. load_regs
  143. .Lsrc_not_aligned:
  144. adc sum, sum, #0 @ include C from dst alignment
  145. and ip, src, #3
  146. bic src, src, #3
  147. load1l r5
  148. cmp ip, #2
  149. beq .Lsrc2_aligned
  150. bhi .Lsrc3_aligned
  151. mov r4, r5, pull #8 @ C = 0
  152. bics ip, len, #15
  153. beq 2f
  154. 1: load4l r5, r6, r7, r8
  155. orr r4, r4, r5, push #24
  156. mov r5, r5, pull #8
  157. orr r5, r5, r6, push #24
  158. mov r6, r6, pull #8
  159. orr r6, r6, r7, push #24
  160. mov r7, r7, pull #8
  161. orr r7, r7, r8, push #24
  162. stmia dst!, {r4, r5, r6, r7}
  163. adcs sum, sum, r4
  164. adcs sum, sum, r5
  165. adcs sum, sum, r6
  166. adcs sum, sum, r7
  167. mov r4, r8, pull #8
  168. sub ip, ip, #16
  169. teq ip, #0
  170. bne 1b
  171. 2: ands ip, len, #12
  172. beq 4f
  173. tst ip, #8
  174. beq 3f
  175. load2l r5, r6
  176. orr r4, r4, r5, push #24
  177. mov r5, r5, pull #8
  178. orr r5, r5, r6, push #24
  179. stmia dst!, {r4, r5}
  180. adcs sum, sum, r4
  181. adcs sum, sum, r5
  182. mov r4, r6, pull #8
  183. tst ip, #4
  184. beq 4f
  185. 3: load1l r5
  186. orr r4, r4, r5, push #24
  187. str r4, [dst], #4
  188. adcs sum, sum, r4
  189. mov r4, r5, pull #8
  190. 4: ands len, len, #3
  191. beq .Ldone
  192. mov r5, r4, get_byte_0
  193. tst len, #2
  194. beq .Lexit
  195. adcs sum, sum, r4, push #16
  196. strb r5, [dst], #1
  197. mov r5, r4, get_byte_1
  198. strb r5, [dst], #1
  199. mov r5, r4, get_byte_2
  200. b .Lexit
  201. .Lsrc2_aligned: mov r4, r5, pull #16
  202. adds sum, sum, #0
  203. bics ip, len, #15
  204. beq 2f
  205. 1: load4l r5, r6, r7, r8
  206. orr r4, r4, r5, push #16
  207. mov r5, r5, pull #16
  208. orr r5, r5, r6, push #16
  209. mov r6, r6, pull #16
  210. orr r6, r6, r7, push #16
  211. mov r7, r7, pull #16
  212. orr r7, r7, r8, push #16
  213. stmia dst!, {r4, r5, r6, r7}
  214. adcs sum, sum, r4
  215. adcs sum, sum, r5
  216. adcs sum, sum, r6
  217. adcs sum, sum, r7
  218. mov r4, r8, pull #16
  219. sub ip, ip, #16
  220. teq ip, #0
  221. bne 1b
  222. 2: ands ip, len, #12
  223. beq 4f
  224. tst ip, #8
  225. beq 3f
  226. load2l r5, r6
  227. orr r4, r4, r5, push #16
  228. mov r5, r5, pull #16
  229. orr r5, r5, r6, push #16
  230. stmia dst!, {r4, r5}
  231. adcs sum, sum, r4
  232. adcs sum, sum, r5
  233. mov r4, r6, pull #16
  234. tst ip, #4
  235. beq 4f
  236. 3: load1l r5
  237. orr r4, r4, r5, push #16
  238. str r4, [dst], #4
  239. adcs sum, sum, r4
  240. mov r4, r5, pull #16
  241. 4: ands len, len, #3
  242. beq .Ldone
  243. mov r5, r4, get_byte_0
  244. tst len, #2
  245. beq .Lexit
  246. adcs sum, sum, r4
  247. strb r5, [dst], #1
  248. mov r5, r4, get_byte_1
  249. strb r5, [dst], #1
  250. tst len, #1
  251. beq .Ldone
  252. load1b r5
  253. b .Lexit
  254. .Lsrc3_aligned: mov r4, r5, pull #24
  255. adds sum, sum, #0
  256. bics ip, len, #15
  257. beq 2f
  258. 1: load4l r5, r6, r7, r8
  259. orr r4, r4, r5, push #8
  260. mov r5, r5, pull #24
  261. orr r5, r5, r6, push #8
  262. mov r6, r6, pull #24
  263. orr r6, r6, r7, push #8
  264. mov r7, r7, pull #24
  265. orr r7, r7, r8, push #8
  266. stmia dst!, {r4, r5, r6, r7}
  267. adcs sum, sum, r4
  268. adcs sum, sum, r5
  269. adcs sum, sum, r6
  270. adcs sum, sum, r7
  271. mov r4, r8, pull #24
  272. sub ip, ip, #16
  273. teq ip, #0
  274. bne 1b
  275. 2: ands ip, len, #12
  276. beq 4f
  277. tst ip, #8
  278. beq 3f
  279. load2l r5, r6
  280. orr r4, r4, r5, push #8
  281. mov r5, r5, pull #24
  282. orr r5, r5, r6, push #8
  283. stmia dst!, {r4, r5}
  284. adcs sum, sum, r4
  285. adcs sum, sum, r5
  286. mov r4, r6, pull #24
  287. tst ip, #4
  288. beq 4f
  289. 3: load1l r5
  290. orr r4, r4, r5, push #8
  291. str r4, [dst], #4
  292. adcs sum, sum, r4
  293. mov r4, r5, pull #24
  294. 4: ands len, len, #3
  295. beq .Ldone
  296. mov r5, r4, get_byte_0
  297. tst len, #2
  298. beq .Lexit
  299. strb r5, [dst], #1
  300. adcs sum, sum, r4
  301. load1l r4
  302. mov r5, r4, get_byte_0
  303. strb r5, [dst], #1
  304. adcs sum, sum, r4, push #24
  305. mov r5, r4, get_byte_1
  306. b .Lexit
  307. FN_EXIT