csum-partial_64.c 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. /*
  2. * arch/x86_64/lib/csum-partial.c
  3. *
  4. * This file contains network checksum routines that are better done
  5. * in an architecture-specific manner due to speed.
  6. */
  7. #include <linux/compiler.h>
  8. #include <linux/module.h>
  9. #include <asm/checksum.h>
  10. static inline unsigned short from32to16(unsigned a)
  11. {
  12. unsigned short b = a >> 16;
  13. asm("addw %w2,%w0\n\t"
  14. "adcw $0,%w0\n"
  15. : "=r" (b)
  16. : "0" (b), "r" (a));
  17. return b;
  18. }
  19. /*
  20. * Do a 64-bit checksum on an arbitrary memory area.
  21. * Returns a 32bit checksum.
  22. *
  23. * This isn't as time critical as it used to be because many NICs
  24. * do hardware checksumming these days.
  25. *
  26. * Things tried and found to not make it faster:
  27. * Manual Prefetching
  28. * Unrolling to an 128 bytes inner loop.
  29. * Using interleaving with more registers to break the carry chains.
  30. */
  31. static unsigned do_csum(const unsigned char *buff, unsigned len)
  32. {
  33. unsigned odd, count;
  34. unsigned long result = 0;
  35. if (unlikely(len == 0))
  36. return result;
  37. odd = 1 & (unsigned long) buff;
  38. if (unlikely(odd)) {
  39. result = *buff << 8;
  40. len--;
  41. buff++;
  42. }
  43. count = len >> 1; /* nr of 16-bit words.. */
  44. if (count) {
  45. if (2 & (unsigned long) buff) {
  46. result += *(unsigned short *)buff;
  47. count--;
  48. len -= 2;
  49. buff += 2;
  50. }
  51. count >>= 1; /* nr of 32-bit words.. */
  52. if (count) {
  53. unsigned long zero;
  54. unsigned count64;
  55. if (4 & (unsigned long) buff) {
  56. result += *(unsigned int *) buff;
  57. count--;
  58. len -= 4;
  59. buff += 4;
  60. }
  61. count >>= 1; /* nr of 64-bit words.. */
  62. /* main loop using 64byte blocks */
  63. zero = 0;
  64. count64 = count >> 3;
  65. while (count64) {
  66. asm("addq 0*8(%[src]),%[res]\n\t"
  67. "adcq 1*8(%[src]),%[res]\n\t"
  68. "adcq 2*8(%[src]),%[res]\n\t"
  69. "adcq 3*8(%[src]),%[res]\n\t"
  70. "adcq 4*8(%[src]),%[res]\n\t"
  71. "adcq 5*8(%[src]),%[res]\n\t"
  72. "adcq 6*8(%[src]),%[res]\n\t"
  73. "adcq 7*8(%[src]),%[res]\n\t"
  74. "adcq %[zero],%[res]"
  75. : [res] "=r" (result)
  76. : [src] "r" (buff), [zero] "r" (zero),
  77. "[res]" (result));
  78. buff += 64;
  79. count64--;
  80. }
  81. /* last up to 7 8byte blocks */
  82. count %= 8;
  83. while (count) {
  84. asm("addq %1,%0\n\t"
  85. "adcq %2,%0\n"
  86. : "=r" (result)
  87. : "m" (*(unsigned long *)buff),
  88. "r" (zero), "0" (result));
  89. --count;
  90. buff += 8;
  91. }
  92. result = add32_with_carry(result>>32,
  93. result&0xffffffff);
  94. if (len & 4) {
  95. result += *(unsigned int *) buff;
  96. buff += 4;
  97. }
  98. }
  99. if (len & 2) {
  100. result += *(unsigned short *) buff;
  101. buff += 2;
  102. }
  103. }
  104. if (len & 1)
  105. result += *buff;
  106. result = add32_with_carry(result>>32, result & 0xffffffff);
  107. if (unlikely(odd)) {
  108. result = from32to16(result);
  109. result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
  110. }
  111. return result;
  112. }
  113. /*
  114. * computes the checksum of a memory block at buff, length len,
  115. * and adds in "sum" (32-bit)
  116. *
  117. * returns a 32-bit number suitable for feeding into itself
  118. * or csum_tcpudp_magic
  119. *
  120. * this function must be called with even lengths, except
  121. * for the last fragment, which may be odd
  122. *
  123. * it's best to have buff aligned on a 64-bit boundary
  124. */
  125. __wsum csum_partial(const void *buff, int len, __wsum sum)
  126. {
  127. return (__force __wsum)add32_with_carry(do_csum(buff, len),
  128. (__force u32)sum);
  129. }
  130. /*
  131. * this routine is used for miscellaneous IP-like checksums, mainly
  132. * in icmp.c
  133. */
  134. __sum16 ip_compute_csum(const void *buff, int len)
  135. {
  136. return csum_fold(csum_partial(buff,len,0));
  137. }
  138. EXPORT_SYMBOL(ip_compute_csum);