csum-partial_64.c 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * arch/x86_64/lib/csum-partial.c
  4. *
  5. * This file contains network checksum routines that are better done
  6. * in an architecture-specific manner due to speed.
  7. */
  8. #include <linux/compiler.h>
  9. #include <linux/export.h>
  10. #include <asm/checksum.h>
  11. static inline unsigned short from32to16(unsigned a)
  12. {
  13. unsigned short b = a >> 16;
  14. asm("addw %w2,%w0\n\t"
  15. "adcw $0,%w0\n"
  16. : "=r" (b)
  17. : "0" (b), "r" (a));
  18. return b;
  19. }
  20. /*
  21. * Do a 64-bit checksum on an arbitrary memory area.
  22. * Returns a 32bit checksum.
  23. *
  24. * This isn't as time critical as it used to be because many NICs
  25. * do hardware checksumming these days.
  26. *
  27. * Things tried and found to not make it faster:
  28. * Manual Prefetching
  29. * Unrolling to an 128 bytes inner loop.
  30. * Using interleaving with more registers to break the carry chains.
  31. */
  32. static unsigned do_csum(const unsigned char *buff, unsigned len)
  33. {
  34. unsigned odd, count;
  35. unsigned long result = 0;
  36. if (unlikely(len == 0))
  37. return result;
  38. odd = 1 & (unsigned long) buff;
  39. if (unlikely(odd)) {
  40. result = *buff << 8;
  41. len--;
  42. buff++;
  43. }
  44. count = len >> 1; /* nr of 16-bit words.. */
  45. if (count) {
  46. if (2 & (unsigned long) buff) {
  47. result += *(unsigned short *)buff;
  48. count--;
  49. len -= 2;
  50. buff += 2;
  51. }
  52. count >>= 1; /* nr of 32-bit words.. */
  53. if (count) {
  54. unsigned long zero;
  55. unsigned count64;
  56. if (4 & (unsigned long) buff) {
  57. result += *(unsigned int *) buff;
  58. count--;
  59. len -= 4;
  60. buff += 4;
  61. }
  62. count >>= 1; /* nr of 64-bit words.. */
  63. /* main loop using 64byte blocks */
  64. zero = 0;
  65. count64 = count >> 3;
  66. while (count64) {
  67. asm("addq 0*8(%[src]),%[res]\n\t"
  68. "adcq 1*8(%[src]),%[res]\n\t"
  69. "adcq 2*8(%[src]),%[res]\n\t"
  70. "adcq 3*8(%[src]),%[res]\n\t"
  71. "adcq 4*8(%[src]),%[res]\n\t"
  72. "adcq 5*8(%[src]),%[res]\n\t"
  73. "adcq 6*8(%[src]),%[res]\n\t"
  74. "adcq 7*8(%[src]),%[res]\n\t"
  75. "adcq %[zero],%[res]"
  76. : [res] "=r" (result)
  77. : [src] "r" (buff), [zero] "r" (zero),
  78. "[res]" (result));
  79. buff += 64;
  80. count64--;
  81. }
  82. /* last up to 7 8byte blocks */
  83. count %= 8;
  84. while (count) {
  85. asm("addq %1,%0\n\t"
  86. "adcq %2,%0\n"
  87. : "=r" (result)
  88. : "m" (*(unsigned long *)buff),
  89. "r" (zero), "0" (result));
  90. --count;
  91. buff += 8;
  92. }
  93. result = add32_with_carry(result>>32,
  94. result&0xffffffff);
  95. if (len & 4) {
  96. result += *(unsigned int *) buff;
  97. buff += 4;
  98. }
  99. }
  100. if (len & 2) {
  101. result += *(unsigned short *) buff;
  102. buff += 2;
  103. }
  104. }
  105. if (len & 1)
  106. result += *buff;
  107. result = add32_with_carry(result>>32, result & 0xffffffff);
  108. if (unlikely(odd)) {
  109. result = from32to16(result);
  110. result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
  111. }
  112. return result;
  113. }
  114. /*
  115. * computes the checksum of a memory block at buff, length len,
  116. * and adds in "sum" (32-bit)
  117. *
  118. * returns a 32-bit number suitable for feeding into itself
  119. * or csum_tcpudp_magic
  120. *
  121. * this function must be called with even lengths, except
  122. * for the last fragment, which may be odd
  123. *
  124. * it's best to have buff aligned on a 64-bit boundary
  125. */
  126. __wsum csum_partial(const void *buff, int len, __wsum sum)
  127. {
  128. return (__force __wsum)add32_with_carry(do_csum(buff, len),
  129. (__force u32)sum);
  130. }
  131. EXPORT_SYMBOL(csum_partial);
  132. /*
  133. * this routine is used for miscellaneous IP-like checksums, mainly
  134. * in icmp.c
  135. */
  136. __sum16 ip_compute_csum(const void *buff, int len)
  137. {
  138. return csum_fold(csum_partial(buff,len,0));
  139. }
  140. EXPORT_SYMBOL(ip_compute_csum);