checksum.S 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /*
  2. * INET An implementation of the TCP/IP protocol suite for the LINUX
  3. * operating system. INET is implemented using the BSD Socket
  4. * interface as the means of communication with the user level.
  5. *
  6. * IP/TCP/UDP checksumming routines
  7. *
  8. * Authors: Jorge Cwik, <jorge@laser.satlink.net>
  9. * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  10. * Tom May, <ftom@netcom.com>
  11. * Pentium Pro/II routines:
  12. * Alexander Kjeldaas <astor@guardian.no>
  13. * Finn Arne Gangstad <finnag@guardian.no>
  14. * Lots of code moved from tcp.c and ip.c; see those files
  15. * for more names.
  16. *
  17. * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  18. * handling.
  19. * Andi Kleen, add zeroing on error
  20. * converted to pure assembler
  21. * Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
  22. *
  23. * This program is free software; you can redistribute it and/or
  24. * modify it under the terms of the GNU General Public License
  25. * as published by the Free Software Foundation; either version
  26. * 2 of the License, or (at your option) any later version.
  27. */
  28. #include <linux/linkage.h>
  29. #include <asm/assembler.h>
  30. #include <asm/errno.h>
  31. /*
  32. * computes a partial checksum, e.g. for TCP/UDP fragments
  33. */
  34. /*
  35. unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
  36. */
  37. #ifdef CONFIG_ISA_DUAL_ISSUE
  38. /*
  39. * Experiments with Ethernet and SLIP connections show that buff
  40. * is aligned on either a 2-byte or 4-byte boundary. We get at
  41. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  42. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  43. * alignment for the unrolled loop.
  44. */
  45. .text
  46. ENTRY(csum_partial)
  47. ; Function args
  48. ; r0: unsigned char *buff
  49. ; r1: int len
  50. ; r2: unsigned int sum
  51. push r2 || ldi r2, #0
  52. and3 r7, r0, #1 ; Check alignment.
  53. beqz r7, 1f ; Jump if alignment is ok.
  54. ; 1-byte mis aligned
  55. ldub r4, @r0 || addi r0, #1
  56. ; clear c-bit || Alignment uses up bytes.
  57. cmp r0, r0 || addi r1, #-1
  58. ldi r3, #0 || addx r2, r4
  59. addx r2, r3
  60. .fillinsn
  61. 1:
  62. and3 r4, r0, #2 ; Check alignment.
  63. beqz r4, 2f ; Jump if alignment is ok.
  64. ; clear c-bit || Alignment uses up two bytes.
  65. cmp r0, r0 || addi r1, #-2
  66. bgtz r1, 1f ; Jump if we had at least two bytes.
  67. bra 4f || addi r1, #2
  68. .fillinsn ; len(r1) was < 2. Deal with it.
  69. 1:
  70. ; 2-byte aligned
  71. lduh r4, @r0 || ldi r3, #0
  72. addx r2, r4 || addi r0, #2
  73. addx r2, r3
  74. .fillinsn
  75. 2:
  76. ; 4-byte aligned
  77. cmp r0, r0 ; clear c-bit
  78. srl3 r6, r1, #5
  79. beqz r6, 2f
  80. .fillinsn
  81. 1: ld r3, @r0+
  82. ld r4, @r0+ ; +4
  83. ld r5, @r0+ ; +8
  84. ld r3, @r0+ || addx r2, r3 ; +12
  85. ld r4, @r0+ || addx r2, r4 ; +16
  86. ld r5, @r0+ || addx r2, r5 ; +20
  87. ld r3, @r0+ || addx r2, r3 ; +24
  88. ld r4, @r0+ || addx r2, r4 ; +28
  89. addx r2, r5 || addi r6, #-1
  90. addx r2, r3
  91. addx r2, r4
  92. bnez r6, 1b
  93. addx r2, r6 ; r6=0
  94. cmp r0, r0 ; This clears c-bit
  95. .fillinsn
  96. 2: and3 r6, r1, #0x1c ; withdraw len
  97. beqz r6, 4f
  98. srli r6, #2
  99. .fillinsn
  100. 3: ld r4, @r0+ || addi r6, #-1
  101. addx r2, r4
  102. bnez r6, 3b
  103. addx r2, r6 ; r6=0
  104. cmp r0, r0 ; This clears c-bit
  105. .fillinsn
  106. 4: and3 r1, r1, #3
  107. beqz r1, 7f ; if len == 0 goto end
  108. and3 r6, r1, #2
  109. beqz r6, 5f ; if len < 2 goto 5f(1byte)
  110. lduh r4, @r0 || addi r0, #2
  111. addi r1, #-2 || slli r4, #16
  112. addx r2, r4
  113. beqz r1, 6f
  114. .fillinsn
  115. 5: ldub r4, @r0 || ldi r1, #0
  116. #ifndef __LITTLE_ENDIAN__
  117. slli r4, #8
  118. #endif
  119. addx r2, r4
  120. .fillinsn
  121. 6: addx r2, r1
  122. .fillinsn
  123. 7:
  124. and3 r0, r2, #0xffff
  125. srli r2, #16
  126. add r0, r2
  127. srl3 r2, r0, #16
  128. beqz r2, 1f
  129. addi r0, #1
  130. and3 r0, r0, #0xffff
  131. .fillinsn
  132. 1:
  133. beqz r7, 1f ; swap the upper byte for the lower
  134. and3 r2, r0, #0xff
  135. srl3 r0, r0, #8
  136. slli r2, #8
  137. or r0, r2
  138. .fillinsn
  139. 1:
  140. pop r2 || cmp r0, r0
  141. addx r0, r2 || ldi r2, #0
  142. addx r0, r2
  143. jmp r14
  144. #else /* not CONFIG_ISA_DUAL_ISSUE */
  145. /*
  146. * Experiments with Ethernet and SLIP connections show that buff
  147. * is aligned on either a 2-byte or 4-byte boundary. We get at
  148. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  149. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  150. * alignment for the unrolled loop.
  151. */
  152. .text
  153. ENTRY(csum_partial)
  154. ; Function args
  155. ; r0: unsigned char *buff
  156. ; r1: int len
  157. ; r2: unsigned int sum
  158. push r2
  159. ldi r2, #0
  160. and3 r7, r0, #1 ; Check alignment.
  161. beqz r7, 1f ; Jump if alignment is ok.
  162. ; 1-byte mis aligned
  163. ldub r4, @r0
  164. addi r0, #1
  165. addi r1, #-1 ; Alignment uses up bytes.
  166. cmp r0, r0 ; clear c-bit
  167. ldi r3, #0
  168. addx r2, r4
  169. addx r2, r3
  170. .fillinsn
  171. 1:
  172. and3 r4, r0, #2 ; Check alignment.
  173. beqz r4, 2f ; Jump if alignment is ok.
  174. addi r1, #-2 ; Alignment uses up two bytes.
  175. cmp r0, r0 ; clear c-bit
  176. bgtz r1, 1f ; Jump if we had at least two bytes.
  177. addi r1, #2 ; len(r1) was < 2. Deal with it.
  178. bra 4f
  179. .fillinsn
  180. 1:
  181. ; 2-byte aligned
  182. lduh r4, @r0
  183. addi r0, #2
  184. ldi r3, #0
  185. addx r2, r4
  186. addx r2, r3
  187. .fillinsn
  188. 2:
  189. ; 4-byte aligned
  190. cmp r0, r0 ; clear c-bit
  191. srl3 r6, r1, #5
  192. beqz r6, 2f
  193. .fillinsn
  194. 1: ld r3, @r0+
  195. ld r4, @r0+ ; +4
  196. ld r5, @r0+ ; +8
  197. addx r2, r3
  198. addx r2, r4
  199. addx r2, r5
  200. ld r3, @r0+ ; +12
  201. ld r4, @r0+ ; +16
  202. ld r5, @r0+ ; +20
  203. addx r2, r3
  204. addx r2, r4
  205. addx r2, r5
  206. ld r3, @r0+ ; +24
  207. ld r4, @r0+ ; +28
  208. addi r6, #-1
  209. addx r2, r3
  210. addx r2, r4
  211. bnez r6, 1b
  212. addx r2, r6 ; r6=0
  213. cmp r0, r0 ; This clears c-bit
  214. .fillinsn
  215. 2: and3 r6, r1, #0x1c ; withdraw len
  216. beqz r6, 4f
  217. srli r6, #2
  218. .fillinsn
  219. 3: ld r4, @r0+
  220. addi r6, #-1
  221. addx r2, r4
  222. bnez r6, 3b
  223. addx r2, r6 ; r6=0
  224. cmp r0, r0 ; This clears c-bit
  225. .fillinsn
  226. 4: and3 r1, r1, #3
  227. beqz r1, 7f ; if len == 0 goto end
  228. and3 r6, r1, #2
  229. beqz r6, 5f ; if len < 2 goto 5f(1byte)
  230. lduh r4, @r0
  231. addi r0, #2
  232. addi r1, #-2
  233. slli r4, #16
  234. addx r2, r4
  235. beqz r1, 6f
  236. .fillinsn
  237. 5: ldub r4, @r0
  238. #ifndef __LITTLE_ENDIAN__
  239. slli r4, #8
  240. #endif
  241. addx r2, r4
  242. .fillinsn
  243. 6: ldi r5, #0
  244. addx r2, r5
  245. .fillinsn
  246. 7:
  247. and3 r0, r2, #0xffff
  248. srli r2, #16
  249. add r0, r2
  250. srl3 r2, r0, #16
  251. beqz r2, 1f
  252. addi r0, #1
  253. and3 r0, r0, #0xffff
  254. .fillinsn
  255. 1:
  256. beqz r7, 1f
  257. mv r2, r0
  258. srl3 r0, r2, #8
  259. and3 r2, r2, #0xff
  260. slli r2, #8
  261. or r0, r2
  262. .fillinsn
  263. 1:
  264. pop r2
  265. cmp r0, r0
  266. addx r0, r2
  267. ldi r2, #0
  268. addx r0, r2
  269. jmp r14
  270. #endif /* not CONFIG_ISA_DUAL_ISSUE */
  271. /*
  272. unsigned int csum_partial_copy_generic (const char *src, char *dst,
  273. int len, int sum, int *src_err_ptr, int *dst_err_ptr)
  274. */
  275. /*
  276. * Copy from ds while checksumming, otherwise like csum_partial
  277. *
  278. * The macros SRC and DST specify the type of access for the instruction.
  279. * thus we can call a custom exception handler for all access types.
  280. *
  281. * FIXME: could someone double-check whether I haven't mixed up some SRC and
  282. * DST definitions? It's damn hard to trigger all cases. I hope I got
  283. * them all but there's no guarantee.
  284. */
  285. ENTRY(csum_partial_copy_generic)
  286. nop
  287. nop
  288. nop
  289. nop
  290. jmp r14
  291. nop
  292. nop
  293. nop
  294. .end