in6_rss.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. /*-
  2. * Copyright (c) 2010-2011 Juniper Networks, Inc.
  3. * All rights reserved.
  4. *
  5. * This software was developed by Robert N. M. Watson under contract
  6. * to Juniper Networks, Inc.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. */
  29. #include <sys/cdefs.h>
  30. __FBSDID("$FreeBSD$");
  31. #include "opt_inet6.h"
  32. #include "opt_pcbgroup.h"
  33. #ifndef PCBGROUP
  34. #error "options RSS depends on options PCBGROUP"
  35. #endif
  36. #include <sys/param.h>
  37. #include <sys/mbuf.h>
  38. #include <sys/socket.h>
  39. #include <sys/priv.h>
  40. #include <sys/kernel.h>
  41. #include <sys/smp.h>
  42. #include <sys/sysctl.h>
  43. #include <sys/sbuf.h>
  44. #include <net/if.h>
  45. #include <net/if_var.h>
  46. #include <net/netisr.h>
  47. #include <net/rss_config.h>
  48. #include <netinet/in.h>
  49. #include <netinet/in_pcb.h>
  50. #include <netinet6/in6_rss.h>
  51. #include <netinet/in_var.h>
  52. /* for software rss hash support */
  53. #include <netinet/ip6.h>
  54. #include <netinet6/ip6_var.h>
  55. #include <netinet/tcp.h>
  56. #include <netinet/udp.h>
  57. /*
  58. * Hash an IPv6 2-tuple.
  59. */
  60. uint32_t
  61. rss_hash_ip6_2tuple(const struct in6_addr *src, const struct in6_addr *dst)
  62. {
  63. uint8_t data[sizeof(*src) + sizeof(*dst)];
  64. u_int datalen;
  65. datalen = 0;
  66. bcopy(src, &data[datalen], sizeof(*src));
  67. datalen += sizeof(*src);
  68. bcopy(dst, &data[datalen], sizeof(*dst));
  69. datalen += sizeof(*dst);
  70. return (rss_hash(datalen, data));
  71. }
  72. /*
  73. * Hash an IPv6 4-tuple.
  74. */
  75. uint32_t
  76. rss_hash_ip6_4tuple(const struct in6_addr *src, u_short srcport,
  77. const struct in6_addr *dst, u_short dstport)
  78. {
  79. uint8_t data[sizeof(*src) + sizeof(*dst) + sizeof(srcport) +
  80. sizeof(dstport)];
  81. u_int datalen;
  82. datalen = 0;
  83. bcopy(src, &data[datalen], sizeof(*src));
  84. datalen += sizeof(*src);
  85. bcopy(dst, &data[datalen], sizeof(*dst));
  86. datalen += sizeof(*dst);
  87. bcopy(&srcport, &data[datalen], sizeof(srcport));
  88. datalen += sizeof(srcport);
  89. bcopy(&dstport, &data[datalen], sizeof(dstport));
  90. datalen += sizeof(dstport);
  91. return (rss_hash(datalen, data));
  92. }
  93. /*
  94. * Calculate an appropriate ipv6 2-tuple or 4-tuple given the given
  95. * IPv6 source/destination address, UDP or TCP source/destination ports
  96. * and the protocol type.
  97. *
  98. * The protocol code may wish to do a software hash of the given
  99. * tuple. This depends upon the currently configured RSS hash types.
  100. *
  101. * This assumes that the packet in question isn't a fragment.
  102. *
  103. * It also assumes the packet source/destination address
  104. * are in "incoming" packet order (ie, source is "far" address.)
  105. */
  106. int
  107. rss_proto_software_hash_v6(const struct in6_addr *s, const struct in6_addr *d,
  108. u_short sp, u_short dp, int proto,
  109. uint32_t *hashval, uint32_t *hashtype)
  110. {
  111. uint32_t hash;
  112. /*
  113. * Next, choose the hash type depending upon the protocol
  114. * identifier.
  115. */
  116. if ((proto == IPPROTO_TCP) &&
  117. (rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
  118. hash = rss_hash_ip6_4tuple(s, sp, d, dp);
  119. *hashval = hash;
  120. *hashtype = M_HASHTYPE_RSS_TCP_IPV6;
  121. return (0);
  122. } else if ((proto == IPPROTO_UDP) &&
  123. (rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
  124. hash = rss_hash_ip6_4tuple(s, sp, d, dp);
  125. *hashval = hash;
  126. *hashtype = M_HASHTYPE_RSS_UDP_IPV6;
  127. return (0);
  128. } else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV6) {
  129. /* RSS doesn't hash on other protocols like SCTP; so 2-tuple */
  130. hash = rss_hash_ip6_2tuple(s, d);
  131. *hashval = hash;
  132. *hashtype = M_HASHTYPE_RSS_IPV6;
  133. return (0);
  134. }
  135. /* No configured available hashtypes! */
  136. RSS_DEBUG("no available hashtypes!\n");
  137. return (-1);
  138. }
  139. /*
  140. * Calculate an appropriate ipv6 2-tuple or 4-tuple given the given
  141. * IPv6 source/destination address, UDP or TCP source/destination ports
  142. * and the protocol type.
  143. *
  144. * The protocol code may wish to do a software hash of the given
  145. * tuple. This depends upon the currently configured RSS hash types.
  146. *
  147. * It assumes the packet source/destination address
  148. * are in "outgoin" packet order (ie, destination is "far" address.)
  149. */
  150. uint32_t
  151. xps_proto_software_hash_v6(const struct in6_addr *s, const struct in6_addr *d,
  152. u_short sp, u_short dp, int proto, uint32_t *hashtype)
  153. {
  154. uint32_t hash;
  155. /*
  156. * Next, choose the hash type depending upon the protocol
  157. * identifier.
  158. */
  159. if ((proto == IPPROTO_TCP) &&
  160. (rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
  161. hash = rss_hash_ip6_4tuple(d, dp, s, sp);
  162. *hashtype = M_HASHTYPE_RSS_TCP_IPV6;
  163. return (hash);
  164. } else if ((proto == IPPROTO_UDP) &&
  165. (rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
  166. hash = rss_hash_ip6_4tuple(d, dp, s, sp);
  167. *hashtype = M_HASHTYPE_RSS_UDP_IPV6;
  168. return (hash);
  169. } else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV6) {
  170. /* RSS doesn't hash on other protocols like SCTP; so 2-tuple */
  171. hash = rss_hash_ip6_2tuple(d, s);
  172. *hashtype = M_HASHTYPE_RSS_IPV6;
  173. return (hash);
  174. }
  175. *hashtype = M_HASHTYPE_NONE;
  176. return (0);
  177. }
  178. /*
  179. * Do a software calculation of the RSS for the given mbuf.
  180. *
  181. * This is typically used by the input path to recalculate the RSS after
  182. * some form of packet processing (eg de-capsulation, IP fragment reassembly.)
  183. *
  184. * dir is the packet direction - RSS_HASH_PKT_INGRESS for incoming and
  185. * RSS_HASH_PKT_EGRESS for outgoing.
  186. *
  187. * Returns 0 if a hash was done, -1 if no hash was done, +1 if
  188. * the mbuf already had a valid RSS flowid.
  189. *
  190. * This function doesn't modify the mbuf. It's up to the caller to
  191. * assign flowid/flowtype as appropriate.
  192. */
  193. int
  194. rss_mbuf_software_hash_v6(const struct mbuf *m, int dir, uint32_t *hashval,
  195. uint32_t *hashtype)
  196. {
  197. const struct ip6_hdr *ip6;
  198. const struct ip6_frag *ip6f;
  199. const struct tcphdr *th;
  200. const struct udphdr *uh;
  201. uint32_t flowtype;
  202. uint8_t proto;
  203. int off, newoff;
  204. int nxt;
  205. /*
  206. * XXX For now this only handles hashing on incoming mbufs.
  207. */
  208. if (dir != RSS_HASH_PKT_INGRESS) {
  209. RSS_DEBUG("called on EGRESS packet!\n");
  210. return (-1);
  211. }
  212. off = sizeof(struct ip6_hdr);
  213. /*
  214. * First, validate that the mbuf we have is long enough
  215. * to have an IPv6 header in it.
  216. */
  217. if (m->m_pkthdr.len < off) {
  218. RSS_DEBUG("short mbuf pkthdr\n");
  219. return (-1);
  220. }
  221. if (m->m_len < off) {
  222. RSS_DEBUG("short mbuf len\n");
  223. return (-1);
  224. }
  225. /* Ok, let's dereference that */
  226. ip6 = mtod(m, struct ip6_hdr *);
  227. proto = ip6->ip6_nxt;
  228. /*
  229. * Find the beginning of the TCP/UDP header.
  230. *
  231. * If this is a fragment then it shouldn't be four-tuple
  232. * hashed just yet. Once it's reassembled into a full
  233. * frame it should be re-hashed.
  234. */
  235. while (proto != IPPROTO_FRAGMENT) {
  236. newoff = ip6_nexthdr(m, off, proto, &nxt);
  237. if (newoff < 0)
  238. break;
  239. off = newoff;
  240. proto = nxt;
  241. }
  242. /*
  243. * Ignore the fragment header if this is an "atomic" fragment
  244. * (offset and m bit set to 0)
  245. */
  246. if (proto == IPPROTO_FRAGMENT) {
  247. if (m->m_len < off + sizeof(struct ip6_frag)) {
  248. RSS_DEBUG("short fragment frame?\n");
  249. return (-1);
  250. }
  251. ip6f = (const struct ip6_frag *)((c_caddr_t)ip6 + off);
  252. if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
  253. off = ip6_lasthdr(m, off, proto, &nxt);
  254. if (off < 0) {
  255. RSS_DEBUG("invalid extension header\n");
  256. return (-1);
  257. }
  258. proto = nxt;
  259. }
  260. }
  261. /*
  262. * If the mbuf flowid/flowtype matches the packet type,
  263. * and we don't support the 4-tuple version of the given protocol,
  264. * then signal to the owner that it can trust the flowid/flowtype
  265. * details.
  266. *
  267. * This is a little picky - eg, if TCPv6 / UDPv6 hashing
  268. * is supported but we got a TCP/UDP frame only 2-tuple hashed,
  269. * then we shouldn't just "trust" the 2-tuple hash. We need
  270. * a 4-tuple hash.
  271. */
  272. flowtype = M_HASHTYPE_GET(m);
  273. if (flowtype != M_HASHTYPE_NONE) {
  274. switch (proto) {
  275. case IPPROTO_UDP:
  276. if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6) &&
  277. (flowtype == M_HASHTYPE_RSS_UDP_IPV6)) {
  278. return (1);
  279. }
  280. /*
  281. * Only allow 2-tuple for UDP frames if we don't also
  282. * support 4-tuple for UDP.
  283. */
  284. if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV6) &&
  285. ((rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6) == 0) &&
  286. flowtype == M_HASHTYPE_RSS_IPV6) {
  287. return (1);
  288. }
  289. break;
  290. case IPPROTO_TCP:
  291. if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6) &&
  292. (flowtype == M_HASHTYPE_RSS_TCP_IPV6)) {
  293. return (1);
  294. }
  295. /*
  296. * Only allow 2-tuple for TCP frames if we don't also
  297. * support 4-tuple for TCP.
  298. */
  299. if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV6) &&
  300. ((rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6) == 0) &&
  301. flowtype == M_HASHTYPE_RSS_IPV6) {
  302. return (1);
  303. }
  304. break;
  305. default:
  306. if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV6) &&
  307. flowtype == M_HASHTYPE_RSS_IPV6) {
  308. return (1);
  309. }
  310. break;
  311. }
  312. }
  313. /*
  314. * Decode enough information to make a hash decision.
  315. */
  316. if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6) &&
  317. (proto == IPPROTO_TCP)) {
  318. if (m->m_len < off + sizeof(struct tcphdr)) {
  319. RSS_DEBUG("short TCP frame?\n");
  320. return (-1);
  321. }
  322. th = (const struct tcphdr *)((c_caddr_t)ip6 + off);
  323. return rss_proto_software_hash_v6(&ip6->ip6_src, &ip6->ip6_dst,
  324. th->th_sport,
  325. th->th_dport,
  326. proto,
  327. hashval,
  328. hashtype);
  329. } else if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6) &&
  330. (proto == IPPROTO_UDP)) {
  331. if (m->m_len < off + sizeof(struct udphdr)) {
  332. RSS_DEBUG("short UDP frame?\n");
  333. return (-1);
  334. }
  335. uh = (const struct udphdr *)((c_caddr_t)ip6 + off);
  336. return rss_proto_software_hash_v6(&ip6->ip6_src, &ip6->ip6_dst,
  337. uh->uh_sport,
  338. uh->uh_dport,
  339. proto,
  340. hashval,
  341. hashtype);
  342. } else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV6) {
  343. /* Default to 2-tuple hash */
  344. return rss_proto_software_hash_v6(&ip6->ip6_src, &ip6->ip6_dst,
  345. 0, /* source port */
  346. 0, /* destination port */
  347. 0, /* IPPROTO_IP */
  348. hashval,
  349. hashtype);
  350. } else {
  351. RSS_DEBUG("no available hashtypes!\n");
  352. return (-1);
  353. }
  354. }
  355. /*
  356. * Similar to rss_m2cpuid, but designed to be used by the IPv6 NETISR
  357. * on incoming frames.
  358. *
  359. * If an existing RSS hash exists and it matches what the configured
  360. * hashing is, then use it.
  361. *
  362. * If there's an existing RSS hash but the desired hash is different,
  363. * or if there's no useful RSS hash, then calculate it via
  364. * the software path.
  365. *
  366. * XXX TODO: definitely want statistics here!
  367. */
  368. struct mbuf *
  369. rss_soft_m2cpuid_v6(struct mbuf *m, uintptr_t source, u_int *cpuid)
  370. {
  371. uint32_t hash_val, hash_type;
  372. int ret;
  373. M_ASSERTPKTHDR(m);
  374. ret = rss_mbuf_software_hash_v6(m, RSS_HASH_PKT_INGRESS,
  375. &hash_val, &hash_type);
  376. if (ret > 0) {
  377. /* mbuf has a valid hash already; don't need to modify it */
  378. *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m));
  379. } else if (ret == 0) {
  380. /* hash was done; update */
  381. m->m_pkthdr.flowid = hash_val;
  382. M_HASHTYPE_SET(m, hash_type);
  383. *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m));
  384. } else { /* ret < 0 */
  385. /* no hash was done */
  386. *cpuid = NETISR_CPUID_NONE;
  387. }
  388. return (m);
  389. }