csum_64plus.S 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. ;
  2. ; linux/arch/c6x/lib/csum_64plus.s
  3. ;
  4. ; Port on Texas Instruments TMS320C6x architecture
  5. ;
  6. ; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
  7. ; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
  8. ;
  9. ; This program is free software; you can redistribute it and/or modify
  10. ; it under the terms of the GNU General Public License version 2 as
  11. ; published by the Free Software Foundation.
  12. ;
  13. #include <linux/linkage.h>
  14. ;
  15. ;unsigned int csum_partial_copy(const char *src, char * dst,
  16. ; int len, int sum)
  17. ;
  18. ; A4: src
  19. ; B4: dst
  20. ; A6: len
  21. ; B6: sum
  22. ; return csum in A4
  23. ;
  24. .text
  25. ENTRY(csum_partial_copy)
  26. MVC .S2 ILC,B30
  27. MV .D1X B6,A31 ; given csum
  28. ZERO .D1 A9 ; csum (a side)
  29. || ZERO .D2 B9 ; csum (b side)
  30. || SHRU .S2X A6,2,B5 ; len / 4
  31. ;; Check alignment and size
  32. AND .S1 3,A4,A1
  33. || AND .S2 3,B4,B0
  34. OR .L2X B0,A1,B0 ; non aligned condition
  35. || MVC .S2 B5,ILC
  36. || MVK .D2 1,B2
  37. || MV .D1X B5,A1 ; words condition
  38. [!A1] B .S1 L8
  39. [B0] BNOP .S1 L6,5
  40. SPLOOP 1
  41. ;; Main loop for aligned words
  42. LDW .D1T1 *A4++,A7
  43. NOP 4
  44. MV .S2X A7,B7
  45. || EXTU .S1 A7,0,16,A16
  46. STW .D2T2 B7,*B4++
  47. || MPYU .M2 B7,B2,B8
  48. || ADD .L1 A16,A9,A9
  49. NOP
  50. SPKERNEL 8,0
  51. || ADD .L2 B8,B9,B9
  52. ZERO .D1 A1
  53. || ADD .L1X A9,B9,A9 ; add csum from a and b sides
  54. L6:
  55. [!A1] BNOP .S1 L8,5
  56. ;; Main loop for non-aligned words
  57. SPLOOP 2
  58. || MVK .L1 1,A2
  59. LDNW .D1T1 *A4++,A7
  60. NOP 3
  61. NOP
  62. MV .S2X A7,B7
  63. || EXTU .S1 A7,0,16,A16
  64. || MPYU .M1 A7,A2,A8
  65. ADD .L1 A16,A9,A9
  66. SPKERNEL 6,0
  67. || STNW .D2T2 B7,*B4++
  68. || ADD .L1 A8,A9,A9
  69. L8: AND .S2X 2,A6,B5
  70. CMPGT .L2 B5,0,B0
  71. [!B0] BNOP .S1 L82,4
  72. ;; Manage half-word
  73. ZERO .L1 A7
  74. || ZERO .D1 A8
  75. #ifdef CONFIG_CPU_BIG_ENDIAN
  76. LDBU .D1T1 *A4++,A7
  77. LDBU .D1T1 *A4++,A8
  78. NOP 3
  79. SHL .S1 A7,8,A0
  80. ADD .S1 A8,A9,A9
  81. STB .D2T1 A7,*B4++
  82. || ADD .S1 A0,A9,A9
  83. STB .D2T1 A8,*B4++
  84. #else
  85. LDBU .D1T1 *A4++,A7
  86. LDBU .D1T1 *A4++,A8
  87. NOP 3
  88. ADD .S1 A7,A9,A9
  89. SHL .S1 A8,8,A0
  90. STB .D2T1 A7,*B4++
  91. || ADD .S1 A0,A9,A9
  92. STB .D2T1 A8,*B4++
  93. #endif
  94. ;; Manage eventually the last byte
  95. L82: AND .S2X 1,A6,B0
  96. [!B0] BNOP .S1 L9,5
  97. || ZERO .L1 A7
  98. L83: LDBU .D1T1 *A4++,A7
  99. NOP 4
  100. MV .L2X A7,B7
  101. #ifdef CONFIG_CPU_BIG_ENDIAN
  102. STB .D2T2 B7,*B4++
  103. || SHL .S1 A7,8,A7
  104. ADD .S1 A7,A9,A9
  105. #else
  106. STB .D2T2 B7,*B4++
  107. || ADD .S1 A7,A9,A9
  108. #endif
  109. ;; Fold the csum
  110. L9: SHRU .S2X A9,16,B0
  111. [!B0] BNOP .S1 L10,5
  112. L91: SHRU .S2X A9,16,B4
  113. || EXTU .S1 A9,16,16,A3
  114. ADD .D1X A3,B4,A9
  115. SHRU .S1 A9,16,A0
  116. [A0] BNOP .S1 L91,5
  117. L10: ADD .D1 A31,A9,A9
  118. MV .D1 A9,A4
  119. BNOP .S2 B3,4
  120. MVC .S2 B30,ILC
  121. ENDPROC(csum_partial_copy)
  122. ;
  123. ;unsigned short
  124. ;ip_fast_csum(unsigned char *iph, unsigned int ihl)
  125. ;{
  126. ; unsigned int checksum = 0;
  127. ; unsigned short *tosum = (unsigned short *) iph;
  128. ; int len;
  129. ;
  130. ; len = ihl*4;
  131. ;
  132. ; if (len <= 0)
  133. ; return 0;
  134. ;
  135. ; while(len) {
  136. ; len -= 2;
  137. ; checksum += *tosum++;
  138. ; }
  139. ; if (len & 1)
  140. ; checksum += *(unsigned char*) tosum;
  141. ;
  142. ; while(checksum >> 16)
  143. ; checksum = (checksum & 0xffff) + (checksum >> 16);
  144. ;
  145. ; return ~checksum;
  146. ;}
  147. ;
  148. ; A4: iph
  149. ; B4: ihl
  150. ; return checksum in A4
  151. ;
  152. .text
  153. ENTRY(ip_fast_csum)
  154. ZERO .D1 A5
  155. || MVC .S2 ILC,B30
  156. SHL .S2 B4,2,B0
  157. CMPGT .L2 B0,0,B1
  158. [!B1] BNOP .S1 L15,4
  159. [!B1] ZERO .D1 A3
  160. [!B0] B .S1 L12
  161. SHRU .S2 B0,1,B0
  162. MVC .S2 B0,ILC
  163. NOP 3
  164. SPLOOP 1
  165. LDHU .D1T1 *A4++,A3
  166. NOP 3
  167. NOP
  168. SPKERNEL 5,0
  169. || ADD .L1 A3,A5,A5
  170. L12: SHRU .S1 A5,16,A0
  171. [!A0] BNOP .S1 L14,5
  172. L13: SHRU .S2X A5,16,B4
  173. EXTU .S1 A5,16,16,A3
  174. ADD .D1X A3,B4,A5
  175. SHRU .S1 A5,16,A0
  176. [A0] BNOP .S1 L13,5
  177. L14: NOT .D1 A5,A3
  178. EXTU .S1 A3,16,16,A3
  179. L15: BNOP .S2 B3,3
  180. MVC .S2 B30,ILC
  181. MV .D1 A3,A4
  182. ENDPROC(ip_fast_csum)
  183. ;
  184. ;unsigned short
  185. ;do_csum(unsigned char *buff, unsigned int len)
  186. ;{
  187. ; int odd, count;
  188. ; unsigned int result = 0;
  189. ;
  190. ; if (len <= 0)
  191. ; goto out;
  192. ; odd = 1 & (unsigned long) buff;
  193. ; if (odd) {
  194. ;#ifdef __LITTLE_ENDIAN
  195. ; result += (*buff << 8);
  196. ;#else
  197. ; result = *buff;
  198. ;#endif
  199. ; len--;
  200. ; buff++;
  201. ; }
  202. ; count = len >> 1; /* nr of 16-bit words.. */
  203. ; if (count) {
  204. ; if (2 & (unsigned long) buff) {
  205. ; result += *(unsigned short *) buff;
  206. ; count--;
  207. ; len -= 2;
  208. ; buff += 2;
  209. ; }
  210. ; count >>= 1; /* nr of 32-bit words.. */
  211. ; if (count) {
  212. ; unsigned int carry = 0;
  213. ; do {
  214. ; unsigned int w = *(unsigned int *) buff;
  215. ; count--;
  216. ; buff += 4;
  217. ; result += carry;
  218. ; result += w;
  219. ; carry = (w > result);
  220. ; } while (count);
  221. ; result += carry;
  222. ; result = (result & 0xffff) + (result >> 16);
  223. ; }
  224. ; if (len & 2) {
  225. ; result += *(unsigned short *) buff;
  226. ; buff += 2;
  227. ; }
  228. ; }
  229. ; if (len & 1)
  230. ;#ifdef __LITTLE_ENDIAN
  231. ; result += *buff;
  232. ;#else
  233. ; result += (*buff << 8);
  234. ;#endif
  235. ; result = (result & 0xffff) + (result >> 16);
  236. ; /* add up carry.. */
  237. ; result = (result & 0xffff) + (result >> 16);
  238. ; if (odd)
  239. ; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
  240. ;out:
  241. ; return result;
  242. ;}
  243. ;
  244. ; A4: buff
  245. ; B4: len
  246. ; return checksum in A4
  247. ;
  248. ENTRY(do_csum)
  249. CMPGT .L2 B4,0,B0
  250. [!B0] BNOP .S1 L26,3
  251. EXTU .S1 A4,31,31,A0
  252. MV .L1 A0,A3
  253. || MV .S1X B3,A5
  254. || MV .L2 B4,B3
  255. || ZERO .D1 A1
  256. #ifdef CONFIG_CPU_BIG_ENDIAN
  257. [A0] SUB .L2 B3,1,B3
  258. || [A0] LDBU .D1T1 *A4++,A1
  259. #else
  260. [!A0] BNOP .S1 L21,5
  261. || [A0] LDBU .D1T1 *A4++,A0
  262. SUB .L2 B3,1,B3
  263. || SHL .S1 A0,8,A1
  264. L21:
  265. #endif
  266. SHR .S2 B3,1,B0
  267. [!B0] BNOP .S1 L24,3
  268. MVK .L1 2,A0
  269. AND .L1 A4,A0,A0
  270. [!A0] BNOP .S1 L22,5
  271. || [A0] LDHU .D1T1 *A4++,A0
  272. SUB .L2 B0,1,B0
  273. || SUB .S2 B3,2,B3
  274. || ADD .L1 A0,A1,A1
  275. L22:
  276. SHR .S2 B0,1,B0
  277. || ZERO .L1 A0
  278. [!B0] BNOP .S1 L23,5
  279. || [B0] MVC .S2 B0,ILC
  280. SPLOOP 3
  281. SPMASK L1
  282. || MV .L1 A1,A2
  283. || LDW .D1T1 *A4++,A1
  284. NOP 4
  285. ADD .L1 A0,A1,A0
  286. ADD .L1 A2,A0,A2
  287. SPKERNEL 1,2
  288. || CMPGTU .L1 A1,A2,A0
  289. ADD .L1 A0,A2,A6
  290. EXTU .S1 A6,16,16,A7
  291. SHRU .S2X A6,16,B0
  292. NOP 1
  293. ADD .L1X A7,B0,A1
  294. L23:
  295. MVK .L2 2,B0
  296. AND .L2 B3,B0,B0
  297. [B0] LDHU .D1T1 *A4++,A0
  298. NOP 4
  299. [B0] ADD .L1 A0,A1,A1
  300. L24:
  301. EXTU .S2 B3,31,31,B0
  302. #ifdef CONFIG_CPU_BIG_ENDIAN
  303. [!B0] BNOP .S1 L25,4
  304. || [B0] LDBU .D1T1 *A4,A0
  305. SHL .S1 A0,8,A0
  306. ADD .L1 A0,A1,A1
  307. L25:
  308. #else
  309. [B0] LDBU .D1T1 *A4,A0
  310. NOP 4
  311. [B0] ADD .L1 A0,A1,A1
  312. #endif
  313. EXTU .S1 A1,16,16,A0
  314. SHRU .S2X A1,16,B0
  315. NOP 1
  316. ADD .L1X A0,B0,A0
  317. SHRU .S1 A0,16,A1
  318. ADD .L1 A0,A1,A0
  319. EXTU .S1 A0,16,16,A1
  320. EXTU .S1 A1,16,24,A2
  321. EXTU .S1 A1,24,16,A0
  322. || MV .L2X A3,B0
  323. [B0] OR .L1 A0,A2,A1
  324. L26:
  325. NOP 1
  326. BNOP .S2X A5,4
  327. MV .L1 A1,A4
  328. ENDPROC(do_csum)
  329. ;__wsum csum_partial(const void *buff, int len, __wsum wsum)
  330. ;{
  331. ; unsigned int sum = (__force unsigned int)wsum;
  332. ; unsigned int result = do_csum(buff, len);
  333. ;
  334. ; /* add in old sum, and carry.. */
  335. ; result += sum;
  336. ; if (sum > result)
  337. ; result += 1;
  338. ; return (__force __wsum)result;
  339. ;}
  340. ;
  341. ENTRY(csum_partial)
  342. MV .L1X B3,A9
  343. || CALLP .S2 do_csum,B3
  344. || MV .S1 A6,A8
  345. BNOP .S2X A9,2
  346. ADD .L1 A8,A4,A1
  347. CMPGTU .L1 A8,A1,A0
  348. ADD .L1 A1,A0,A4
  349. ENDPROC(csum_partial)
  350. ;unsigned short
  351. ;ip_compute_csum(unsigned char *buff, unsigned int len)
  352. ;
  353. ; A4: buff
  354. ; B4: len
  355. ; return checksum in A4
  356. ENTRY(ip_compute_csum)
  357. MV .L1X B3,A9
  358. || CALLP .S2 do_csum,B3
  359. BNOP .S2X A9,3
  360. NOT .S1 A4,A4
  361. CLR .S1 A4,16,31,A4
  362. ENDPROC(ip_compute_csum)