if_epair.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932
  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause
  3. *
  4. * Copyright (c) 2008 The FreeBSD Foundation
  5. * Copyright (c) 2009-2021 Bjoern A. Zeeb <bz@FreeBSD.org>
  6. *
  7. * This software was developed by CK Software GmbH under sponsorship
  8. * from the FreeBSD Foundation.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. * 1. Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  20. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  23. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29. * SUCH DAMAGE.
  30. */
  31. /*
  32. * A pair of virtual back-to-back connected ethernet like interfaces
  33. * (``two interfaces with a virtual cross-over cable'').
  34. *
  35. * This is mostly intended to be used to provide connectivity between
  36. * different virtual network stack instances.
  37. */
  38. #include <sys/cdefs.h>
  39. #include "opt_rss.h"
  40. #include "opt_inet.h"
  41. #include "opt_inet6.h"
  42. #include <sys/param.h>
  43. #include <sys/bus.h>
  44. #include <sys/hash.h>
  45. #include <sys/interrupt.h>
  46. #include <sys/jail.h>
  47. #include <sys/kernel.h>
  48. #include <sys/libkern.h>
  49. #include <sys/malloc.h>
  50. #include <sys/mbuf.h>
  51. #include <sys/module.h>
  52. #include <sys/proc.h>
  53. #include <sys/queue.h>
  54. #include <sys/sched.h>
  55. #include <sys/smp.h>
  56. #include <sys/socket.h>
  57. #include <sys/sockio.h>
  58. #include <sys/taskqueue.h>
  59. #include <net/bpf.h>
  60. #include <net/ethernet.h>
  61. #include <net/if.h>
  62. #include <net/if_var.h>
  63. #include <net/if_clone.h>
  64. #include <net/if_media.h>
  65. #include <net/if_var.h>
  66. #include <net/if_private.h>
  67. #include <net/if_types.h>
  68. #include <net/netisr.h>
  69. #ifdef RSS
  70. #include <net/rss_config.h>
  71. #ifdef INET
  72. #include <netinet/in_rss.h>
  73. #endif
  74. #ifdef INET6
  75. #include <netinet6/in6_rss.h>
  76. #endif
  77. #endif
  78. #include <net/vnet.h>
  79. static const char epairname[] = "epair";
  80. #define RXRSIZE 4096 /* Probably overkill by 4-8x. */
  81. static MALLOC_DEFINE(M_EPAIR, epairname,
  82. "Pair of virtual cross-over connected Ethernet-like interfaces");
  83. VNET_DEFINE_STATIC(struct if_clone *, epair_cloner);
  84. #define V_epair_cloner VNET(epair_cloner)
  85. static unsigned int next_index = 0;
  86. #define EPAIR_LOCK_INIT() mtx_init(&epair_n_index_mtx, "epairidx", \
  87. NULL, MTX_DEF)
  88. #define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx)
  89. #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx)
  90. #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx)
  91. struct epair_softc;
  92. struct epair_queue {
  93. struct mtx mtx;
  94. struct mbufq q;
  95. int id;
  96. enum {
  97. EPAIR_QUEUE_IDLE,
  98. EPAIR_QUEUE_WAKING,
  99. EPAIR_QUEUE_RUNNING,
  100. } state;
  101. struct task tx_task;
  102. struct epair_softc *sc;
  103. };
  104. static struct mtx epair_n_index_mtx;
  105. struct epair_softc {
  106. struct ifnet *ifp; /* This ifp. */
  107. struct ifnet *oifp; /* other ifp of pair. */
  108. int num_queues;
  109. struct epair_queue *queues;
  110. struct ifmedia media; /* Media config (fake). */
  111. STAILQ_ENTRY(epair_softc) entry;
  112. };
  113. struct epair_tasks_t {
  114. int tasks;
  115. struct taskqueue *tq[MAXCPU];
  116. };
  117. static struct epair_tasks_t epair_tasks;
  118. static void
  119. epair_clear_mbuf(struct mbuf *m)
  120. {
  121. M_ASSERTPKTHDR(m);
  122. /* Remove any CSUM_SND_TAG as ether_input will barf. */
  123. if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
  124. m_snd_tag_rele(m->m_pkthdr.snd_tag);
  125. m->m_pkthdr.snd_tag = NULL;
  126. m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
  127. }
  128. /* Clear vlan information. */
  129. m->m_flags &= ~M_VLANTAG;
  130. m->m_pkthdr.ether_vtag = 0;
  131. m_tag_delete_nonpersistent(m);
  132. }
  133. static void
  134. epair_tx_start_deferred(void *arg, int pending)
  135. {
  136. struct epair_queue *q = (struct epair_queue *)arg;
  137. if_t ifp;
  138. struct mbuf *m, *n;
  139. bool resched;
  140. ifp = q->sc->ifp;
  141. if_ref(ifp);
  142. CURVNET_SET(ifp->if_vnet);
  143. mtx_lock(&q->mtx);
  144. m = mbufq_flush(&q->q);
  145. q->state = EPAIR_QUEUE_RUNNING;
  146. mtx_unlock(&q->mtx);
  147. while (m != NULL) {
  148. n = STAILQ_NEXT(m, m_stailqpkt);
  149. m->m_nextpkt = NULL;
  150. if_input(ifp, m);
  151. m = n;
  152. }
  153. /*
  154. * Avoid flushing the queue more than once per task. We can otherwise
  155. * end up starving ourselves in a multi-epair routing configuration.
  156. */
  157. mtx_lock(&q->mtx);
  158. if (!mbufq_empty(&q->q)) {
  159. resched = true;
  160. q->state = EPAIR_QUEUE_WAKING;
  161. } else {
  162. resched = false;
  163. q->state = EPAIR_QUEUE_IDLE;
  164. }
  165. mtx_unlock(&q->mtx);
  166. if (resched)
  167. taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task);
  168. CURVNET_RESTORE();
  169. if_rele(ifp);
  170. }
  171. static struct epair_queue *
  172. epair_select_queue(struct epair_softc *sc, struct mbuf *m)
  173. {
  174. uint32_t bucket;
  175. #ifdef RSS
  176. struct ether_header *eh;
  177. int ret;
  178. ret = rss_m2bucket(m, &bucket);
  179. if (ret) {
  180. /* Actually hash the packet. */
  181. eh = mtod(m, struct ether_header *);
  182. switch (ntohs(eh->ether_type)) {
  183. #ifdef INET
  184. case ETHERTYPE_IP:
  185. rss_soft_m2cpuid_v4(m, 0, &bucket);
  186. break;
  187. #endif
  188. #ifdef INET6
  189. case ETHERTYPE_IPV6:
  190. rss_soft_m2cpuid_v6(m, 0, &bucket);
  191. break;
  192. #endif
  193. default:
  194. bucket = 0;
  195. break;
  196. }
  197. }
  198. bucket %= sc->num_queues;
  199. #else
  200. bucket = 0;
  201. #endif
  202. return (&sc->queues[bucket]);
  203. }
  204. static void
  205. epair_prepare_mbuf(struct mbuf *m, struct ifnet *src_ifp)
  206. {
  207. M_ASSERTPKTHDR(m);
  208. epair_clear_mbuf(m);
  209. if_setrcvif(m, src_ifp);
  210. M_SETFIB(m, src_ifp->if_fib);
  211. MPASS(m->m_nextpkt == NULL);
  212. MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
  213. }
  214. static void
  215. epair_menq(struct mbuf *m, struct epair_softc *osc)
  216. {
  217. struct epair_queue *q;
  218. struct ifnet *ifp, *oifp;
  219. int error, len;
  220. bool mcast;
  221. /*
  222. * I know this looks weird. We pass the "other sc" as we need that one
  223. * and can get both ifps from it as well.
  224. */
  225. oifp = osc->ifp;
  226. ifp = osc->oifp;
  227. epair_prepare_mbuf(m, oifp);
  228. /* Save values as once the mbuf is queued, it's not ours anymore. */
  229. len = m->m_pkthdr.len;
  230. mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
  231. q = epair_select_queue(osc, m);
  232. mtx_lock(&q->mtx);
  233. if (q->state == EPAIR_QUEUE_IDLE) {
  234. q->state = EPAIR_QUEUE_WAKING;
  235. taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task);
  236. }
  237. error = mbufq_enqueue(&q->q, m);
  238. mtx_unlock(&q->mtx);
  239. if (error != 0) {
  240. m_freem(m);
  241. if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
  242. } else {
  243. if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
  244. if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
  245. if (mcast)
  246. if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
  247. if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
  248. }
  249. }
  250. static void
  251. epair_start(struct ifnet *ifp)
  252. {
  253. struct mbuf *m;
  254. struct epair_softc *sc;
  255. struct ifnet *oifp;
  256. /*
  257. * We get packets here from ether_output via if_handoff()
  258. * and need to put them into the input queue of the oifp
  259. * and will put the packet into the receive-queue (rxq) of the
  260. * other interface (oifp) of our pair.
  261. */
  262. sc = ifp->if_softc;
  263. oifp = sc->oifp;
  264. sc = oifp->if_softc;
  265. for (;;) {
  266. IFQ_DEQUEUE(&ifp->if_snd, m);
  267. if (m == NULL)
  268. break;
  269. M_ASSERTPKTHDR(m);
  270. BPF_MTAP(ifp, m);
  271. /* In case either interface is not usable drop the packet. */
  272. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
  273. (ifp->if_flags & IFF_UP) == 0 ||
  274. (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
  275. (oifp->if_flags & IFF_UP) == 0) {
  276. m_freem(m);
  277. continue;
  278. }
  279. epair_menq(m, sc);
  280. }
  281. }
  282. static int
  283. epair_transmit(struct ifnet *ifp, struct mbuf *m)
  284. {
  285. struct epair_softc *sc;
  286. struct ifnet *oifp;
  287. #ifdef ALTQ
  288. int len;
  289. bool mcast;
  290. #endif
  291. if (m == NULL)
  292. return (0);
  293. M_ASSERTPKTHDR(m);
  294. /*
  295. * We could just transmit this, but it makes testing easier if we're a
  296. * little bit more like real hardware.
  297. * Allow just that little bit extra for ethernet (and vlan) headers.
  298. */
  299. if (m->m_pkthdr.len > (ifp->if_mtu + sizeof(struct ether_vlan_header))) {
  300. m_freem(m);
  301. if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  302. return (E2BIG);
  303. }
  304. /*
  305. * We are not going to use the interface en/dequeue mechanism
  306. * on the TX side. We are called from ether_output_frame()
  307. * and will put the packet into the receive-queue (rxq) of the
  308. * other interface (oifp) of our pair.
  309. */
  310. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
  311. m_freem(m);
  312. if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  313. return (ENXIO);
  314. }
  315. if ((ifp->if_flags & IFF_UP) == 0) {
  316. m_freem(m);
  317. if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  318. return (ENETDOWN);
  319. }
  320. BPF_MTAP(ifp, m);
  321. /*
  322. * In case the outgoing interface is not usable,
  323. * drop the packet.
  324. */
  325. sc = ifp->if_softc;
  326. oifp = sc->oifp;
  327. if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
  328. (oifp->if_flags & IFF_UP) == 0) {
  329. if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  330. m_freem(m);
  331. return (0);
  332. }
  333. #ifdef ALTQ
  334. len = m->m_pkthdr.len;
  335. mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
  336. int error = 0;
  337. /* Support ALTQ via the classic if_start() path. */
  338. IF_LOCK(&ifp->if_snd);
  339. if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
  340. ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
  341. if (error)
  342. if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
  343. IF_UNLOCK(&ifp->if_snd);
  344. if (!error) {
  345. if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
  346. if (mcast)
  347. if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
  348. epair_start(ifp);
  349. }
  350. return (error);
  351. }
  352. IF_UNLOCK(&ifp->if_snd);
  353. #endif
  354. epair_menq(m, oifp->if_softc);
  355. return (0);
  356. }
  357. static void
  358. epair_qflush(struct ifnet *ifp __unused)
  359. {
  360. }
  361. static int
  362. epair_media_change(struct ifnet *ifp __unused)
  363. {
  364. /* Do nothing. */
  365. return (0);
  366. }
  367. static void
  368. epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr)
  369. {
  370. imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
  371. imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX;
  372. }
  373. static int
  374. epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  375. {
  376. struct epair_softc *sc;
  377. struct ifreq *ifr;
  378. int error;
  379. ifr = (struct ifreq *)data;
  380. switch (cmd) {
  381. case SIOCSIFFLAGS:
  382. case SIOCADDMULTI:
  383. case SIOCDELMULTI:
  384. error = 0;
  385. break;
  386. case SIOCSIFMEDIA:
  387. case SIOCGIFMEDIA:
  388. sc = ifp->if_softc;
  389. error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd);
  390. break;
  391. case SIOCSIFMTU:
  392. /* We basically allow all kinds of MTUs. */
  393. ifp->if_mtu = ifr->ifr_mtu;
  394. error = 0;
  395. break;
  396. default:
  397. /* Let the common ethernet handler process this. */
  398. error = ether_ioctl(ifp, cmd, data);
  399. break;
  400. }
  401. return (error);
  402. }
  403. static void
  404. epair_init(void *dummy __unused)
  405. {
  406. }
  407. /*
  408. * Interface cloning functions.
  409. * We use our private ones so that we can create/destroy our secondary
  410. * device along with the primary one.
  411. */
  412. static int
  413. epair_clone_match(struct if_clone *ifc, const char *name)
  414. {
  415. const char *cp;
  416. /*
  417. * Our base name is epair.
  418. * Our interfaces will be named epair<n>[ab].
  419. * So accept anything of the following list:
  420. * - epair
  421. * - epair<n>
  422. * but not the epair<n>[ab] versions.
  423. */
  424. if (strncmp(epairname, name, sizeof(epairname)-1) != 0)
  425. return (0);
  426. for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) {
  427. if (*cp < '0' || *cp > '9')
  428. return (0);
  429. }
  430. return (1);
  431. }
  432. static void
  433. epair_clone_add(struct if_clone *ifc, struct epair_softc *scb)
  434. {
  435. struct ifnet *ifp;
  436. uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
  437. ifp = scb->ifp;
  438. /* Copy epairNa etheraddr and change the last byte. */
  439. memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
  440. eaddr[5] = 0x0b;
  441. ether_ifattach(ifp, eaddr);
  442. if_clone_addif(ifc, ifp);
  443. }
  444. static struct epair_softc *
  445. epair_alloc_sc(struct if_clone *ifc)
  446. {
  447. struct epair_softc *sc;
  448. struct ifnet *ifp = if_alloc(IFT_ETHER);
  449. sc = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
  450. sc->ifp = ifp;
  451. sc->num_queues = epair_tasks.tasks;
  452. sc->queues = mallocarray(sc->num_queues, sizeof(struct epair_queue),
  453. M_EPAIR, M_WAITOK);
  454. for (int i = 0; i < sc->num_queues; i++) {
  455. struct epair_queue *q = &sc->queues[i];
  456. q->id = i;
  457. q->state = EPAIR_QUEUE_IDLE;
  458. mtx_init(&q->mtx, "epairq", NULL, MTX_DEF | MTX_NEW);
  459. mbufq_init(&q->q, RXRSIZE);
  460. q->sc = sc;
  461. NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q);
  462. }
  463. /* Initialise pseudo media types. */
  464. ifmedia_init(&sc->media, 0, epair_media_change, epair_media_status);
  465. ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_T, 0, NULL);
  466. ifmedia_set(&sc->media, IFM_ETHER | IFM_10G_T);
  467. return (sc);
  468. }
  469. static void
  470. epair_setup_ifp(struct epair_softc *sc, char *name, int unit)
  471. {
  472. struct ifnet *ifp = sc->ifp;
  473. ifp->if_softc = sc;
  474. strlcpy(ifp->if_xname, name, IFNAMSIZ);
  475. ifp->if_dname = epairname;
  476. ifp->if_dunit = unit;
  477. ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
  478. ifp->if_capabilities = IFCAP_VLAN_MTU;
  479. ifp->if_capenable = IFCAP_VLAN_MTU;
  480. ifp->if_transmit = epair_transmit;
  481. ifp->if_qflush = epair_qflush;
  482. ifp->if_start = epair_start;
  483. ifp->if_ioctl = epair_ioctl;
  484. ifp->if_init = epair_init;
  485. if_setsendqlen(ifp, ifqmaxlen);
  486. if_setsendqready(ifp);
  487. ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */
  488. }
  489. static void
  490. epair_generate_mac(struct epair_softc *sc, uint8_t *eaddr)
  491. {
  492. uint32_t key[3];
  493. uint32_t hash;
  494. uint64_t hostid;
  495. EPAIR_LOCK();
  496. #ifdef SMP
  497. /* Get an approximate distribution. */
  498. hash = next_index % mp_ncpus;
  499. #else
  500. hash = 0;
  501. #endif
  502. EPAIR_UNLOCK();
  503. /*
  504. * Calculate the etheraddr hashing the hostid and the
  505. * interface index. The result would be hopefully unique.
  506. * Note that the "a" component of an epair instance may get moved
  507. * to a different VNET after creation. In that case its index
  508. * will be freed and the index can get reused by new epair instance.
  509. * Make sure we do not create same etheraddr again.
  510. */
  511. getcredhostid(curthread->td_ucred, (unsigned long *)&hostid);
  512. if (hostid == 0)
  513. arc4rand(&hostid, sizeof(hostid), 0);
  514. struct ifnet *ifp = sc->ifp;
  515. EPAIR_LOCK();
  516. if (ifp->if_index > next_index)
  517. next_index = ifp->if_index;
  518. else
  519. next_index++;
  520. key[0] = (uint32_t)next_index;
  521. EPAIR_UNLOCK();
  522. key[1] = (uint32_t)(hostid & 0xffffffff);
  523. key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff);
  524. hash = jenkins_hash32(key, 3, 0);
  525. eaddr[0] = 0x02;
  526. memcpy(&eaddr[1], &hash, 4);
  527. eaddr[5] = 0x0a;
  528. }
  529. static void
  530. epair_free_sc(struct epair_softc *sc)
  531. {
  532. if_free(sc->ifp);
  533. ifmedia_removeall(&sc->media);
  534. for (int i = 0; i < sc->num_queues; i++) {
  535. struct epair_queue *q = &sc->queues[i];
  536. mtx_destroy(&q->mtx);
  537. }
  538. free(sc->queues, M_EPAIR);
  539. free(sc, M_EPAIR);
  540. }
  541. static void
  542. epair_set_state(struct ifnet *ifp, bool running)
  543. {
  544. if (running) {
  545. ifp->if_drv_flags |= IFF_DRV_RUNNING;
  546. if_link_state_change(ifp, LINK_STATE_UP);
  547. } else {
  548. if_link_state_change(ifp, LINK_STATE_DOWN);
  549. ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
  550. }
  551. }
  552. static int
  553. epair_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit)
  554. {
  555. int error = 0, unit, wildcard;
  556. char *dp;
  557. /* Try to see if a special unit was requested. */
  558. error = ifc_name2unit(name, &unit);
  559. if (error != 0)
  560. return (error);
  561. wildcard = (unit < 0);
  562. error = ifc_alloc_unit(ifc, &unit);
  563. if (error != 0)
  564. return (error);
  565. /*
  566. * If no unit had been given, we need to adjust the ifName.
  567. * Also make sure there is space for our extra [ab] suffix.
  568. */
  569. for (dp = name; *dp != '\0'; dp++);
  570. if (wildcard) {
  571. int slen = snprintf(dp, len - (dp - name), "%d", unit);
  572. if (slen > len - (dp - name) - 1) {
  573. /* ifName too long. */
  574. error = ENOSPC;
  575. goto done;
  576. }
  577. dp += slen;
  578. }
  579. if (len - (dp - name) - 1 < 1) {
  580. /* No space left for our [ab] suffix. */
  581. error = ENOSPC;
  582. goto done;
  583. }
  584. *dp = 'b';
  585. /* Must not change dp so we can replace 'a' by 'b' later. */
  586. *(dp+1) = '\0';
  587. /* Check if 'a' and 'b' interfaces already exist. */
  588. if (ifunit(name) != NULL) {
  589. error = EEXIST;
  590. goto done;
  591. }
  592. *dp = 'a';
  593. if (ifunit(name) != NULL) {
  594. error = EEXIST;
  595. goto done;
  596. }
  597. *punit = unit;
  598. done:
  599. if (error != 0)
  600. ifc_free_unit(ifc, unit);
  601. return (error);
  602. }
  603. static int
  604. epair_clone_create(struct if_clone *ifc, char *name, size_t len,
  605. struct ifc_data *ifd, struct ifnet **ifpp)
  606. {
  607. struct epair_softc *sca, *scb;
  608. struct ifnet *ifp;
  609. char *dp;
  610. int error, unit;
  611. uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
  612. error = epair_handle_unit(ifc, name, len, &unit);
  613. if (error != 0)
  614. return (error);
  615. /* Allocate memory for both [ab] interfaces */
  616. sca = epair_alloc_sc(ifc);
  617. scb = epair_alloc_sc(ifc);
  618. /*
  619. * Cross-reference the interfaces so we will be able to free both.
  620. */
  621. sca->oifp = scb->ifp;
  622. scb->oifp = sca->ifp;
  623. /* Finish initialization of interface <n>a. */
  624. ifp = sca->ifp;
  625. epair_setup_ifp(sca, name, unit);
  626. epair_generate_mac(sca, eaddr);
  627. ether_ifattach(ifp, eaddr);
  628. /* Swap the name and finish initialization of interface <n>b. */
  629. dp = name + strlen(name) - 1;
  630. *dp = 'b';
  631. epair_setup_ifp(scb, name, unit);
  632. ifp = scb->ifp;
  633. /* We need to play some tricks here for the second interface. */
  634. strlcpy(name, epairname, len);
  635. /* Correctly set the name for the cloner list. */
  636. strlcpy(name, scb->ifp->if_xname, len);
  637. epair_clone_add(ifc, scb);
  638. /*
  639. * Restore name to <n>a as the ifp for this will go into the
  640. * cloner list for the initial call.
  641. */
  642. strlcpy(name, sca->ifp->if_xname, len);
  643. /* Tell the world, that we are ready to rock. */
  644. epair_set_state(sca->ifp, true);
  645. epair_set_state(scb->ifp, true);
  646. *ifpp = sca->ifp;
  647. return (0);
  648. }
  649. static void
  650. epair_drain_rings(struct epair_softc *sc)
  651. {
  652. for (int i = 0; i < sc->num_queues; i++) {
  653. struct epair_queue *q;
  654. struct mbuf *m, *n;
  655. q = &sc->queues[i];
  656. mtx_lock(&q->mtx);
  657. m = mbufq_flush(&q->q);
  658. mtx_unlock(&q->mtx);
  659. for (; m != NULL; m = n) {
  660. n = m->m_nextpkt;
  661. m_freem(m);
  662. }
  663. }
  664. }
  665. static int
  666. epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
  667. {
  668. struct ifnet *oifp;
  669. struct epair_softc *sca, *scb;
  670. int unit, error;
  671. /*
  672. * In case we called into if_clone_destroyif() ourselves
  673. * again to remove the second interface, the softc will be
  674. * NULL. In that case so not do anything but return success.
  675. */
  676. if (ifp->if_softc == NULL)
  677. return (0);
  678. unit = ifp->if_dunit;
  679. sca = ifp->if_softc;
  680. oifp = sca->oifp;
  681. scb = oifp->if_softc;
  682. /* Frist get the interfaces down and detached. */
  683. epair_set_state(ifp, false);
  684. epair_set_state(oifp, false);
  685. ether_ifdetach(ifp);
  686. ether_ifdetach(oifp);
  687. /* Third free any queued packets and all the resources. */
  688. CURVNET_SET_QUIET(oifp->if_vnet);
  689. epair_drain_rings(scb);
  690. oifp->if_softc = NULL;
  691. error = if_clone_destroyif(ifc, oifp);
  692. if (error)
  693. panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
  694. __func__, error);
  695. epair_free_sc(scb);
  696. CURVNET_RESTORE();
  697. epair_drain_rings(sca);
  698. epair_free_sc(sca);
  699. /* Last free the cloner unit. */
  700. ifc_free_unit(ifc, unit);
  701. return (0);
  702. }
  703. static void
  704. vnet_epair_init(const void *unused __unused)
  705. {
  706. struct if_clone_addreq req = {
  707. .match_f = epair_clone_match,
  708. .create_f = epair_clone_create,
  709. .destroy_f = epair_clone_destroy,
  710. };
  711. V_epair_cloner = ifc_attach_cloner(epairname, &req);
  712. }
  713. VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
  714. vnet_epair_init, NULL);
  715. static void
  716. vnet_epair_uninit(const void *unused __unused)
  717. {
  718. ifc_detach_cloner(V_epair_cloner);
  719. }
  720. VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
  721. vnet_epair_uninit, NULL);
  722. static int
  723. epair_mod_init(void)
  724. {
  725. char name[32];
  726. epair_tasks.tasks = 0;
  727. #ifdef RSS
  728. int cpu;
  729. CPU_FOREACH(cpu) {
  730. cpuset_t cpu_mask;
  731. /* Pin to this CPU so we get appropriate NUMA allocations. */
  732. thread_lock(curthread);
  733. sched_bind(curthread, cpu);
  734. thread_unlock(curthread);
  735. snprintf(name, sizeof(name), "epair_task_%d", cpu);
  736. epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK,
  737. taskqueue_thread_enqueue,
  738. &epair_tasks.tq[cpu]);
  739. CPU_SETOF(cpu, &cpu_mask);
  740. taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET,
  741. &cpu_mask, "%s", name);
  742. epair_tasks.tasks++;
  743. }
  744. thread_lock(curthread);
  745. sched_unbind(curthread);
  746. thread_unlock(curthread);
  747. #else
  748. snprintf(name, sizeof(name), "epair_task");
  749. epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK,
  750. taskqueue_thread_enqueue,
  751. &epair_tasks.tq[0]);
  752. taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name);
  753. epair_tasks.tasks = 1;
  754. #endif
  755. return (0);
  756. }
  757. static void
  758. epair_mod_cleanup(void)
  759. {
  760. for (int i = 0; i < epair_tasks.tasks; i++) {
  761. taskqueue_drain_all(epair_tasks.tq[i]);
  762. taskqueue_free(epair_tasks.tq[i]);
  763. }
  764. }
  765. static int
  766. epair_modevent(module_t mod, int type, void *data)
  767. {
  768. int ret;
  769. switch (type) {
  770. case MOD_LOAD:
  771. EPAIR_LOCK_INIT();
  772. ret = epair_mod_init();
  773. if (ret != 0)
  774. return (ret);
  775. if (bootverbose)
  776. printf("%s: %s initialized.\n", __func__, epairname);
  777. break;
  778. case MOD_UNLOAD:
  779. epair_mod_cleanup();
  780. EPAIR_LOCK_DESTROY();
  781. if (bootverbose)
  782. printf("%s: %s unloaded.\n", __func__, epairname);
  783. break;
  784. default:
  785. return (EOPNOTSUPP);
  786. }
  787. return (0);
  788. }
  789. static moduledata_t epair_mod = {
  790. "if_epair",
  791. epair_modevent,
  792. 0
  793. };
  794. DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
  795. MODULE_VERSION(if_epair, 3);