if_lagg.c 65 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722
  1. /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */
  2. /*
  3. * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
  4. * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
  5. * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
  6. *
  7. * Permission to use, copy, modify, and distribute this software for any
  8. * purpose with or without fee is hereby granted, provided that the above
  9. * copyright notice and this permission notice appear in all copies.
  10. *
  11. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  12. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  14. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18. */
  19. #include <sys/cdefs.h>
  20. #include "opt_inet.h"
  21. #include "opt_inet6.h"
  22. #include "opt_kern_tls.h"
  23. #include "opt_ratelimit.h"
  24. #include <sys/param.h>
  25. #include <sys/kernel.h>
  26. #include <sys/malloc.h>
  27. #include <sys/mbuf.h>
  28. #include <sys/queue.h>
  29. #include <sys/socket.h>
  30. #include <sys/sockio.h>
  31. #include <sys/sysctl.h>
  32. #include <sys/module.h>
  33. #include <sys/priv.h>
  34. #include <sys/systm.h>
  35. #include <sys/proc.h>
  36. #include <sys/lock.h>
  37. #include <sys/rmlock.h>
  38. #include <sys/sx.h>
  39. #include <sys/taskqueue.h>
  40. #include <sys/eventhandler.h>
  41. #include <net/ethernet.h>
  42. #include <net/if.h>
  43. #include <net/if_clone.h>
  44. #include <net/if_arp.h>
  45. #include <net/if_dl.h>
  46. #include <net/if_media.h>
  47. #include <net/if_types.h>
  48. #include <net/if_var.h>
  49. #include <net/if_private.h>
  50. #include <net/bpf.h>
  51. #include <net/route.h>
  52. #include <net/vnet.h>
  53. #include <net/infiniband.h>
  54. #if defined(INET) || defined(INET6)
  55. #include <netinet/in.h>
  56. #include <netinet/ip.h>
  57. #endif
  58. #ifdef INET
  59. #include <netinet/in_systm.h>
  60. #include <netinet/if_ether.h>
  61. #endif
  62. #ifdef INET6
  63. #include <netinet/ip6.h>
  64. #include <netinet6/in6_var.h>
  65. #include <netinet6/in6_ifattach.h>
  66. #endif
  67. #include <net/if_vlan_var.h>
  68. #include <net/if_lagg.h>
  69. #include <net/ieee8023ad_lacp.h>
  70. #ifdef DEV_NETMAP
  71. MODULE_DEPEND(if_lagg, netmap, 1, 1, 1);
  72. #endif
  73. #define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx")
  74. #define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx)
  75. #define LAGG_XLOCK(_sc) sx_xlock(&(_sc)->sc_sx)
  76. #define LAGG_XUNLOCK(_sc) sx_xunlock(&(_sc)->sc_sx)
  77. #define LAGG_XLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_XLOCKED)
  78. #define LAGG_SLOCK(_sc) sx_slock(&(_sc)->sc_sx)
  79. #define LAGG_SUNLOCK(_sc) sx_sunlock(&(_sc)->sc_sx)
  80. #define LAGG_SXLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_LOCKED)
  81. /* Special flags we should propagate to the lagg ports. */
  82. static struct {
  83. int flag;
  84. int (*func)(struct ifnet *, int);
  85. } lagg_pflags[] = {
  86. {IFF_PROMISC, ifpromisc},
  87. {IFF_ALLMULTI, if_allmulti},
  88. {0, NULL}
  89. };
  90. struct lagg_snd_tag {
  91. struct m_snd_tag com;
  92. struct m_snd_tag *tag;
  93. };
  94. VNET_DEFINE_STATIC(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
  95. #define V_lagg_list VNET(lagg_list)
  96. VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
  97. #define V_lagg_list_mtx VNET(lagg_list_mtx)
  98. #define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \
  99. "if_lagg list", NULL, MTX_DEF)
  100. #define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx)
  101. #define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx)
  102. #define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx)
  103. static eventhandler_tag lagg_detach_cookie = NULL;
  104. static int lagg_clone_create(struct if_clone *, char *, size_t,
  105. struct ifc_data *, struct ifnet **);
  106. static int lagg_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
  107. VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner);
  108. #define V_lagg_cloner VNET(lagg_cloner)
  109. static const char laggname[] = "lagg";
  110. static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface");
  111. static void lagg_capabilities(struct lagg_softc *);
  112. static int lagg_port_create(struct lagg_softc *, struct ifnet *);
  113. static int lagg_port_destroy(struct lagg_port *, int);
  114. static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *);
  115. static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *);
  116. static void lagg_linkstate(struct lagg_softc *);
  117. static void lagg_port_state(struct ifnet *, int);
  118. static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
  119. static int lagg_port_output(struct ifnet *, struct mbuf *,
  120. const struct sockaddr *, struct route *);
  121. static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
  122. #ifdef LAGG_PORT_STACKING
  123. static int lagg_port_checkstacking(struct lagg_softc *);
  124. #endif
  125. static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
  126. static void lagg_if_updown(struct lagg_softc *, bool);
  127. static void lagg_init(void *);
  128. static void lagg_stop(struct lagg_softc *);
  129. static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
  130. #if defined(KERN_TLS) || defined(RATELIMIT)
  131. static int lagg_snd_tag_alloc(struct ifnet *,
  132. union if_snd_tag_alloc_params *,
  133. struct m_snd_tag **);
  134. static int lagg_snd_tag_modify(struct m_snd_tag *,
  135. union if_snd_tag_modify_params *);
  136. static int lagg_snd_tag_query(struct m_snd_tag *,
  137. union if_snd_tag_query_params *);
  138. static void lagg_snd_tag_free(struct m_snd_tag *);
  139. static struct m_snd_tag *lagg_next_snd_tag(struct m_snd_tag *);
  140. static void lagg_ratelimit_query(struct ifnet *,
  141. struct if_ratelimit_query_results *);
  142. #endif
  143. static int lagg_setmulti(struct lagg_port *);
  144. static int lagg_clrmulti(struct lagg_port *);
  145. static void lagg_setcaps(struct lagg_port *, int cap, int cap2);
  146. static int lagg_setflag(struct lagg_port *, int, int,
  147. int (*func)(struct ifnet *, int));
  148. static int lagg_setflags(struct lagg_port *, int status);
  149. static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
  150. static int lagg_transmit_ethernet(struct ifnet *, struct mbuf *);
  151. static int lagg_transmit_infiniband(struct ifnet *, struct mbuf *);
  152. static void lagg_qflush(struct ifnet *);
  153. static int lagg_media_change(struct ifnet *);
  154. static void lagg_media_status(struct ifnet *, struct ifmediareq *);
  155. static struct lagg_port *lagg_link_active(struct lagg_softc *,
  156. struct lagg_port *);
  157. /* Simple round robin */
  158. static void lagg_rr_attach(struct lagg_softc *);
  159. static int lagg_rr_start(struct lagg_softc *, struct mbuf *);
  160. /* Active failover */
  161. static int lagg_fail_start(struct lagg_softc *, struct mbuf *);
  162. static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
  163. struct mbuf *);
  164. /* Loadbalancing */
  165. static void lagg_lb_attach(struct lagg_softc *);
  166. static void lagg_lb_detach(struct lagg_softc *);
  167. static int lagg_lb_port_create(struct lagg_port *);
  168. static void lagg_lb_port_destroy(struct lagg_port *);
  169. static int lagg_lb_start(struct lagg_softc *, struct mbuf *);
  170. static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
  171. /* Broadcast */
  172. static int lagg_bcast_start(struct lagg_softc *, struct mbuf *);
  173. /* 802.3ad LACP */
  174. static void lagg_lacp_attach(struct lagg_softc *);
  175. static void lagg_lacp_detach(struct lagg_softc *);
  176. static int lagg_lacp_start(struct lagg_softc *, struct mbuf *);
  177. static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
  178. struct mbuf *);
  179. static void lagg_lacp_lladdr(struct lagg_softc *);
  180. /* Default input */
  181. static struct mbuf *lagg_default_input(struct lagg_softc *, struct lagg_port *,
  182. struct mbuf *);
  183. /* lagg protocol table */
  184. static const struct lagg_proto {
  185. lagg_proto pr_num;
  186. void (*pr_attach)(struct lagg_softc *);
  187. void (*pr_detach)(struct lagg_softc *);
  188. int (*pr_start)(struct lagg_softc *, struct mbuf *);
  189. struct mbuf * (*pr_input)(struct lagg_softc *, struct lagg_port *,
  190. struct mbuf *);
  191. int (*pr_addport)(struct lagg_port *);
  192. void (*pr_delport)(struct lagg_port *);
  193. void (*pr_linkstate)(struct lagg_port *);
  194. void (*pr_init)(struct lagg_softc *);
  195. void (*pr_stop)(struct lagg_softc *);
  196. void (*pr_lladdr)(struct lagg_softc *);
  197. void (*pr_request)(struct lagg_softc *, void *);
  198. void (*pr_portreq)(struct lagg_port *, void *);
  199. } lagg_protos[] = {
  200. {
  201. .pr_num = LAGG_PROTO_NONE
  202. },
  203. {
  204. .pr_num = LAGG_PROTO_ROUNDROBIN,
  205. .pr_attach = lagg_rr_attach,
  206. .pr_start = lagg_rr_start,
  207. .pr_input = lagg_default_input,
  208. },
  209. {
  210. .pr_num = LAGG_PROTO_FAILOVER,
  211. .pr_start = lagg_fail_start,
  212. .pr_input = lagg_fail_input,
  213. },
  214. {
  215. .pr_num = LAGG_PROTO_LOADBALANCE,
  216. .pr_attach = lagg_lb_attach,
  217. .pr_detach = lagg_lb_detach,
  218. .pr_start = lagg_lb_start,
  219. .pr_input = lagg_default_input,
  220. .pr_addport = lagg_lb_port_create,
  221. .pr_delport = lagg_lb_port_destroy,
  222. },
  223. {
  224. .pr_num = LAGG_PROTO_LACP,
  225. .pr_attach = lagg_lacp_attach,
  226. .pr_detach = lagg_lacp_detach,
  227. .pr_start = lagg_lacp_start,
  228. .pr_input = lagg_lacp_input,
  229. .pr_addport = lacp_port_create,
  230. .pr_delport = lacp_port_destroy,
  231. .pr_linkstate = lacp_linkstate,
  232. .pr_init = lacp_init,
  233. .pr_stop = lacp_stop,
  234. .pr_lladdr = lagg_lacp_lladdr,
  235. .pr_request = lacp_req,
  236. .pr_portreq = lacp_portreq,
  237. },
  238. {
  239. .pr_num = LAGG_PROTO_BROADCAST,
  240. .pr_start = lagg_bcast_start,
  241. .pr_input = lagg_default_input,
  242. },
  243. };
  244. SYSCTL_DECL(_net_link);
  245. SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  246. "Link Aggregation");
  247. /* Allow input on any failover links */
  248. VNET_DEFINE_STATIC(int, lagg_failover_rx_all);
  249. #define V_lagg_failover_rx_all VNET(lagg_failover_rx_all)
  250. SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
  251. &VNET_NAME(lagg_failover_rx_all), 0,
  252. "Accept input from any interface in a failover lagg");
  253. /* Default value for using flowid */
  254. VNET_DEFINE_STATIC(int, def_use_flowid) = 0;
  255. #define V_def_use_flowid VNET(def_use_flowid)
  256. SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid,
  257. CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(def_use_flowid), 0,
  258. "Default setting for using flow id for load sharing");
  259. /* Default value for using numa */
  260. VNET_DEFINE_STATIC(int, def_use_numa) = 1;
  261. #define V_def_use_numa VNET(def_use_numa)
  262. SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa,
  263. CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(def_use_numa), 0,
  264. "Use numa to steer flows");
  265. /* Default value for flowid shift */
  266. VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
  267. #define V_def_flowid_shift VNET(def_flowid_shift)
  268. SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift,
  269. CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(def_flowid_shift), 0,
  270. "Default setting for flowid shift for load sharing");
  271. static void
  272. vnet_lagg_init(const void *unused __unused)
  273. {
  274. LAGG_LIST_LOCK_INIT();
  275. SLIST_INIT(&V_lagg_list);
  276. struct if_clone_addreq req = {
  277. .create_f = lagg_clone_create,
  278. .destroy_f = lagg_clone_destroy,
  279. .flags = IFC_F_AUTOUNIT,
  280. };
  281. V_lagg_cloner = ifc_attach_cloner(laggname, &req);
  282. }
  283. VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
  284. vnet_lagg_init, NULL);
  285. static void
  286. vnet_lagg_uninit(const void *unused __unused)
  287. {
  288. ifc_detach_cloner(V_lagg_cloner);
  289. LAGG_LIST_LOCK_DESTROY();
  290. }
  291. VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
  292. vnet_lagg_uninit, NULL);
  293. static int
  294. lagg_modevent(module_t mod, int type, void *data)
  295. {
  296. switch (type) {
  297. case MOD_LOAD:
  298. lagg_input_ethernet_p = lagg_input_ethernet;
  299. lagg_input_infiniband_p = lagg_input_infiniband;
  300. lagg_linkstate_p = lagg_port_state;
  301. lagg_detach_cookie = EVENTHANDLER_REGISTER(
  302. ifnet_departure_event, lagg_port_ifdetach, NULL,
  303. EVENTHANDLER_PRI_ANY);
  304. break;
  305. case MOD_UNLOAD:
  306. EVENTHANDLER_DEREGISTER(ifnet_departure_event,
  307. lagg_detach_cookie);
  308. lagg_input_ethernet_p = NULL;
  309. lagg_input_infiniband_p = NULL;
  310. lagg_linkstate_p = NULL;
  311. break;
  312. default:
  313. return (EOPNOTSUPP);
  314. }
  315. return (0);
  316. }
  317. static moduledata_t lagg_mod = {
  318. "if_lagg",
  319. lagg_modevent,
  320. 0
  321. };
  322. DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  323. MODULE_VERSION(if_lagg, 1);
  324. MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1);
  325. static void
  326. lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
  327. {
  328. LAGG_XLOCK_ASSERT(sc);
  329. KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
  330. __func__, sc));
  331. if (sc->sc_ifflags & IFF_DEBUG)
  332. if_printf(sc->sc_ifp, "using proto %u\n", pr);
  333. if (lagg_protos[pr].pr_attach != NULL)
  334. lagg_protos[pr].pr_attach(sc);
  335. sc->sc_proto = pr;
  336. }
  337. static void
  338. lagg_proto_detach(struct lagg_softc *sc)
  339. {
  340. lagg_proto pr;
  341. LAGG_XLOCK_ASSERT(sc);
  342. pr = sc->sc_proto;
  343. sc->sc_proto = LAGG_PROTO_NONE;
  344. if (lagg_protos[pr].pr_detach != NULL)
  345. lagg_protos[pr].pr_detach(sc);
  346. }
  347. static inline int
  348. lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
  349. {
  350. return (lagg_protos[sc->sc_proto].pr_start(sc, m));
  351. }
  352. static inline struct mbuf *
  353. lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
  354. {
  355. return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
  356. }
  357. static int
  358. lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
  359. {
  360. if (lagg_protos[sc->sc_proto].pr_addport == NULL)
  361. return (0);
  362. else
  363. return (lagg_protos[sc->sc_proto].pr_addport(lp));
  364. }
  365. static void
  366. lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
  367. {
  368. if (lagg_protos[sc->sc_proto].pr_delport != NULL)
  369. lagg_protos[sc->sc_proto].pr_delport(lp);
  370. }
  371. static void
  372. lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
  373. {
  374. if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
  375. lagg_protos[sc->sc_proto].pr_linkstate(lp);
  376. }
  377. static void
  378. lagg_proto_init(struct lagg_softc *sc)
  379. {
  380. if (lagg_protos[sc->sc_proto].pr_init != NULL)
  381. lagg_protos[sc->sc_proto].pr_init(sc);
  382. }
  383. static void
  384. lagg_proto_stop(struct lagg_softc *sc)
  385. {
  386. if (lagg_protos[sc->sc_proto].pr_stop != NULL)
  387. lagg_protos[sc->sc_proto].pr_stop(sc);
  388. }
  389. static void
  390. lagg_proto_lladdr(struct lagg_softc *sc)
  391. {
  392. if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
  393. lagg_protos[sc->sc_proto].pr_lladdr(sc);
  394. }
  395. static void
  396. lagg_proto_request(struct lagg_softc *sc, void *v)
  397. {
  398. if (lagg_protos[sc->sc_proto].pr_request != NULL)
  399. lagg_protos[sc->sc_proto].pr_request(sc, v);
  400. }
  401. static void
  402. lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
  403. {
  404. if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
  405. lagg_protos[sc->sc_proto].pr_portreq(lp, v);
  406. }
  407. /*
  408. * This routine is run via an vlan
  409. * config EVENT
  410. */
  411. static void
  412. lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  413. {
  414. struct lagg_softc *sc = ifp->if_softc;
  415. struct lagg_port *lp;
  416. if (ifp->if_softc != arg) /* Not our event */
  417. return;
  418. LAGG_XLOCK(sc);
  419. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  420. EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
  421. LAGG_XUNLOCK(sc);
  422. }
  423. /*
  424. * This routine is run via an vlan
  425. * unconfig EVENT
  426. */
  427. static void
  428. lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  429. {
  430. struct lagg_softc *sc = ifp->if_softc;
  431. struct lagg_port *lp;
  432. if (ifp->if_softc != arg) /* Not our event */
  433. return;
  434. LAGG_XLOCK(sc);
  435. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  436. EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
  437. LAGG_XUNLOCK(sc);
  438. }
  439. static int
  440. lagg_clone_create(struct if_clone *ifc, char *name, size_t len,
  441. struct ifc_data *ifd, struct ifnet **ifpp)
  442. {
  443. struct iflaggparam iflp;
  444. struct lagg_softc *sc;
  445. struct ifnet *ifp;
  446. int if_type;
  447. int error;
  448. static const uint8_t eaddr[LAGG_ADDR_LEN];
  449. if (ifd->params != NULL) {
  450. error = ifc_copyin(ifd, &iflp, sizeof(iflp));
  451. if (error)
  452. return (error);
  453. switch (iflp.lagg_type) {
  454. case LAGG_TYPE_ETHERNET:
  455. if_type = IFT_ETHER;
  456. break;
  457. case LAGG_TYPE_INFINIBAND:
  458. if_type = IFT_INFINIBAND;
  459. break;
  460. default:
  461. return (EINVAL);
  462. }
  463. } else {
  464. if_type = IFT_ETHER;
  465. }
  466. sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK | M_ZERO);
  467. ifp = sc->sc_ifp = if_alloc(if_type);
  468. LAGG_SX_INIT(sc);
  469. mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF);
  470. callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0);
  471. LAGG_XLOCK(sc);
  472. if (V_def_use_flowid)
  473. sc->sc_opts |= LAGG_OPT_USE_FLOWID;
  474. if (V_def_use_numa)
  475. sc->sc_opts |= LAGG_OPT_USE_NUMA;
  476. sc->flowid_shift = V_def_flowid_shift;
  477. /* Hash all layers by default */
  478. sc->sc_flags = MBUF_HASHFLAG_L2 | MBUF_HASHFLAG_L3 | MBUF_HASHFLAG_L4;
  479. lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
  480. CK_SLIST_INIT(&sc->sc_ports);
  481. switch (if_type) {
  482. case IFT_ETHER:
  483. /* Initialise pseudo media types */
  484. ifmedia_init(&sc->sc_media, 0, lagg_media_change,
  485. lagg_media_status);
  486. ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
  487. ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
  488. if_initname(ifp, laggname, ifd->unit);
  489. ifp->if_transmit = lagg_transmit_ethernet;
  490. break;
  491. case IFT_INFINIBAND:
  492. if_initname(ifp, laggname, ifd->unit);
  493. ifp->if_transmit = lagg_transmit_infiniband;
  494. break;
  495. default:
  496. break;
  497. }
  498. ifp->if_softc = sc;
  499. ifp->if_qflush = lagg_qflush;
  500. ifp->if_init = lagg_init;
  501. ifp->if_ioctl = lagg_ioctl;
  502. ifp->if_get_counter = lagg_get_counter;
  503. ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
  504. #if defined(KERN_TLS) || defined(RATELIMIT)
  505. ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
  506. ifp->if_ratelimit_query = lagg_ratelimit_query;
  507. #endif
  508. ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
  509. /*
  510. * Attach as an ordinary ethernet device, children will be attached
  511. * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG.
  512. */
  513. switch (if_type) {
  514. case IFT_ETHER:
  515. ether_ifattach(ifp, eaddr);
  516. break;
  517. case IFT_INFINIBAND:
  518. infiniband_ifattach(ifp, eaddr, sc->sc_bcast_addr);
  519. break;
  520. default:
  521. break;
  522. }
  523. sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
  524. lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
  525. sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
  526. lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
  527. /* Insert into the global list of laggs */
  528. LAGG_LIST_LOCK();
  529. SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
  530. LAGG_LIST_UNLOCK();
  531. LAGG_XUNLOCK(sc);
  532. *ifpp = ifp;
  533. return (0);
  534. }
  535. static int
  536. lagg_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
  537. {
  538. struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  539. struct lagg_port *lp;
  540. LAGG_XLOCK(sc);
  541. sc->sc_destroying = 1;
  542. lagg_stop(sc);
  543. ifp->if_flags &= ~IFF_UP;
  544. EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
  545. EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
  546. /* Shutdown and remove lagg ports */
  547. while ((lp = CK_SLIST_FIRST(&sc->sc_ports)) != NULL)
  548. lagg_port_destroy(lp, 1);
  549. /* Unhook the aggregation protocol */
  550. lagg_proto_detach(sc);
  551. LAGG_XUNLOCK(sc);
  552. switch (ifp->if_type) {
  553. case IFT_ETHER:
  554. ether_ifdetach(ifp);
  555. ifmedia_removeall(&sc->sc_media);
  556. break;
  557. case IFT_INFINIBAND:
  558. infiniband_ifdetach(ifp);
  559. break;
  560. default:
  561. break;
  562. }
  563. if_free(ifp);
  564. LAGG_LIST_LOCK();
  565. SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
  566. LAGG_LIST_UNLOCK();
  567. mtx_destroy(&sc->sc_mtx);
  568. LAGG_SX_DESTROY(sc);
  569. free(sc, M_LAGG);
  570. return (0);
  571. }
  572. static void
  573. lagg_capabilities(struct lagg_softc *sc)
  574. {
  575. struct lagg_port *lp;
  576. int cap, cap2, ena, ena2, pena, pena2;
  577. uint64_t hwa;
  578. struct ifnet_hw_tsomax hw_tsomax;
  579. LAGG_XLOCK_ASSERT(sc);
  580. /* Get common enabled capabilities for the lagg ports */
  581. ena = ena2 = ~0;
  582. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  583. ena &= lp->lp_ifp->if_capenable;
  584. ena2 &= lp->lp_ifp->if_capenable2;
  585. }
  586. if (CK_SLIST_FIRST(&sc->sc_ports) == NULL)
  587. ena = ena2 = 0;
  588. /*
  589. * Apply common enabled capabilities back to the lagg ports.
  590. * May require several iterations if they are dependent.
  591. */
  592. do {
  593. pena = ena;
  594. pena2 = ena2;
  595. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  596. lagg_setcaps(lp, ena, ena2);
  597. ena &= lp->lp_ifp->if_capenable;
  598. ena2 &= lp->lp_ifp->if_capenable2;
  599. }
  600. } while (pena != ena || pena2 != ena2);
  601. /* Get other capabilities from the lagg ports */
  602. cap = cap2 = ~0;
  603. hwa = ~(uint64_t)0;
  604. memset(&hw_tsomax, 0, sizeof(hw_tsomax));
  605. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  606. cap &= lp->lp_ifp->if_capabilities;
  607. cap2 &= lp->lp_ifp->if_capabilities2;
  608. hwa &= lp->lp_ifp->if_hwassist;
  609. if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
  610. }
  611. if (CK_SLIST_FIRST(&sc->sc_ports) == NULL)
  612. cap = cap2 = hwa = 0;
  613. if (sc->sc_ifp->if_capabilities != cap ||
  614. sc->sc_ifp->if_capenable != ena ||
  615. sc->sc_ifp->if_capenable2 != ena2 ||
  616. sc->sc_ifp->if_hwassist != hwa ||
  617. if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
  618. sc->sc_ifp->if_capabilities = cap;
  619. sc->sc_ifp->if_capabilities2 = cap2;
  620. sc->sc_ifp->if_capenable = ena;
  621. sc->sc_ifp->if_capenable2 = ena2;
  622. sc->sc_ifp->if_hwassist = hwa;
  623. getmicrotime(&sc->sc_ifp->if_lastchange);
  624. if (sc->sc_ifflags & IFF_DEBUG)
  625. if_printf(sc->sc_ifp,
  626. "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
  627. }
  628. }
  629. static int
  630. lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
  631. {
  632. struct lagg_softc *sc_ptr;
  633. struct lagg_port *lp, *tlp;
  634. struct ifreq ifr;
  635. int error, i, oldmtu;
  636. int if_type;
  637. uint64_t *pval;
  638. LAGG_XLOCK_ASSERT(sc);
  639. if (sc->sc_ifp == ifp) {
  640. if_printf(sc->sc_ifp,
  641. "cannot add a lagg to itself as a port\n");
  642. return (EINVAL);
  643. }
  644. if (sc->sc_destroying == 1)
  645. return (ENXIO);
  646. /* Limit the maximal number of lagg ports */
  647. if (sc->sc_count >= LAGG_MAX_PORTS)
  648. return (ENOSPC);
  649. /* Check if port has already been associated to a lagg */
  650. if (ifp->if_lagg != NULL) {
  651. /* Port is already in the current lagg? */
  652. lp = (struct lagg_port *)ifp->if_lagg;
  653. if (lp->lp_softc == sc)
  654. return (EEXIST);
  655. return (EBUSY);
  656. }
  657. switch (sc->sc_ifp->if_type) {
  658. case IFT_ETHER:
  659. /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
  660. if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
  661. return (EPROTONOSUPPORT);
  662. if_type = IFT_IEEE8023ADLAG;
  663. break;
  664. case IFT_INFINIBAND:
  665. /* XXX Disallow non-infiniband interfaces */
  666. if (ifp->if_type != IFT_INFINIBAND)
  667. return (EPROTONOSUPPORT);
  668. if_type = IFT_INFINIBANDLAG;
  669. break;
  670. default:
  671. break;
  672. }
  673. /* Allow the first Ethernet member to define the MTU */
  674. oldmtu = -1;
  675. if (CK_SLIST_EMPTY(&sc->sc_ports)) {
  676. sc->sc_ifp->if_mtu = ifp->if_mtu;
  677. } else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
  678. if (ifp->if_ioctl == NULL) {
  679. if_printf(sc->sc_ifp, "cannot change MTU for %s\n",
  680. ifp->if_xname);
  681. return (EINVAL);
  682. }
  683. oldmtu = ifp->if_mtu;
  684. strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name));
  685. ifr.ifr_mtu = sc->sc_ifp->if_mtu;
  686. error = (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
  687. if (error != 0) {
  688. if_printf(sc->sc_ifp, "invalid MTU for %s\n",
  689. ifp->if_xname);
  690. return (error);
  691. }
  692. ifr.ifr_mtu = oldmtu;
  693. }
  694. lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK | M_ZERO);
  695. lp->lp_softc = sc;
  696. /* Check if port is a stacked lagg */
  697. LAGG_LIST_LOCK();
  698. SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
  699. if (ifp == sc_ptr->sc_ifp) {
  700. LAGG_LIST_UNLOCK();
  701. free(lp, M_LAGG);
  702. if (oldmtu != -1)
  703. (*ifp->if_ioctl)(ifp, SIOCSIFMTU,
  704. (caddr_t)&ifr);
  705. return (EINVAL);
  706. /* XXX disable stacking for the moment, its untested */
  707. #ifdef LAGG_PORT_STACKING
  708. lp->lp_flags |= LAGG_PORT_STACK;
  709. if (lagg_port_checkstacking(sc_ptr) >=
  710. LAGG_MAX_STACKING) {
  711. LAGG_LIST_UNLOCK();
  712. free(lp, M_LAGG);
  713. if (oldmtu != -1)
  714. (*ifp->if_ioctl)(ifp, SIOCSIFMTU,
  715. (caddr_t)&ifr);
  716. return (E2BIG);
  717. }
  718. #endif
  719. }
  720. }
  721. LAGG_LIST_UNLOCK();
  722. if_ref(ifp);
  723. lp->lp_ifp = ifp;
  724. bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen);
  725. lp->lp_ifcapenable = ifp->if_capenable;
  726. if (CK_SLIST_EMPTY(&sc->sc_ports)) {
  727. bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
  728. lagg_proto_lladdr(sc);
  729. EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  730. } else {
  731. if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
  732. }
  733. lagg_setflags(lp, 1);
  734. if (CK_SLIST_EMPTY(&sc->sc_ports))
  735. sc->sc_primary = lp;
  736. /* Change the interface type */
  737. lp->lp_iftype = ifp->if_type;
  738. ifp->if_type = if_type;
  739. ifp->if_lagg = lp;
  740. lp->lp_ioctl = ifp->if_ioctl;
  741. ifp->if_ioctl = lagg_port_ioctl;
  742. lp->lp_output = ifp->if_output;
  743. ifp->if_output = lagg_port_output;
  744. /* Read port counters */
  745. pval = lp->port_counters.val;
  746. for (i = 0; i < IFCOUNTERS; i++, pval++)
  747. *pval = ifp->if_get_counter(ifp, i);
  748. /*
  749. * Insert into the list of ports.
  750. * Keep ports sorted by if_index. It is handy, when configuration
  751. * is predictable and `ifconfig laggN create ...` command
  752. * will lead to the same result each time.
  753. */
  754. CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
  755. if (tlp->lp_ifp->if_index < ifp->if_index && (
  756. CK_SLIST_NEXT(tlp, lp_entries) == NULL ||
  757. ((struct lagg_port*)CK_SLIST_NEXT(tlp, lp_entries))->lp_ifp->if_index >
  758. ifp->if_index))
  759. break;
  760. }
  761. if (tlp != NULL)
  762. CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries);
  763. else
  764. CK_SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
  765. sc->sc_count++;
  766. lagg_setmulti(lp);
  767. if ((error = lagg_proto_addport(sc, lp)) != 0) {
  768. /* Remove the port, without calling pr_delport. */
  769. lagg_port_destroy(lp, 0);
  770. if (oldmtu != -1)
  771. (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
  772. return (error);
  773. }
  774. /* Update lagg capabilities */
  775. lagg_capabilities(sc);
  776. lagg_linkstate(sc);
  777. return (0);
  778. }
  779. #ifdef LAGG_PORT_STACKING
  780. static int
  781. lagg_port_checkstacking(struct lagg_softc *sc)
  782. {
  783. struct lagg_softc *sc_ptr;
  784. struct lagg_port *lp;
  785. int m = 0;
  786. LAGG_SXLOCK_ASSERT(sc);
  787. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  788. if (lp->lp_flags & LAGG_PORT_STACK) {
  789. sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
  790. m = MAX(m, lagg_port_checkstacking(sc_ptr));
  791. }
  792. }
  793. return (m + 1);
  794. }
  795. #endif
  796. static void
  797. lagg_port_destroy_cb(epoch_context_t ec)
  798. {
  799. struct lagg_port *lp;
  800. struct ifnet *ifp;
  801. lp = __containerof(ec, struct lagg_port, lp_epoch_ctx);
  802. ifp = lp->lp_ifp;
  803. if_rele(ifp);
  804. free(lp, M_LAGG);
  805. }
  806. static int
  807. lagg_port_destroy(struct lagg_port *lp, int rundelport)
  808. {
  809. struct lagg_softc *sc = lp->lp_softc;
  810. struct lagg_port *lp_ptr, *lp0;
  811. struct ifnet *ifp = lp->lp_ifp;
  812. uint64_t *pval, vdiff;
  813. int i;
  814. LAGG_XLOCK_ASSERT(sc);
  815. if (rundelport)
  816. lagg_proto_delport(sc, lp);
  817. if (lp->lp_detaching == 0)
  818. lagg_clrmulti(lp);
  819. /* Restore interface */
  820. ifp->if_type = lp->lp_iftype;
  821. ifp->if_ioctl = lp->lp_ioctl;
  822. ifp->if_output = lp->lp_output;
  823. ifp->if_lagg = NULL;
  824. /* Update detached port counters */
  825. pval = lp->port_counters.val;
  826. for (i = 0; i < IFCOUNTERS; i++, pval++) {
  827. vdiff = ifp->if_get_counter(ifp, i) - *pval;
  828. sc->detached_counters.val[i] += vdiff;
  829. }
  830. /* Finally, remove the port from the lagg */
  831. CK_SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
  832. sc->sc_count--;
  833. /* Update the primary interface */
  834. if (lp == sc->sc_primary) {
  835. uint8_t lladdr[LAGG_ADDR_LEN];
  836. if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL)
  837. bzero(&lladdr, LAGG_ADDR_LEN);
  838. else
  839. bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN);
  840. sc->sc_primary = lp0;
  841. if (sc->sc_destroying == 0) {
  842. bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen);
  843. lagg_proto_lladdr(sc);
  844. EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  845. /*
  846. * Update lladdr for each port (new primary needs update
  847. * as well, to switch from old lladdr to its 'real' one).
  848. * We can skip this if the lagg is being destroyed.
  849. */
  850. CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
  851. if_setlladdr(lp_ptr->lp_ifp, lladdr,
  852. lp_ptr->lp_ifp->if_addrlen);
  853. }
  854. }
  855. if (lp->lp_ifflags)
  856. if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
  857. if (lp->lp_detaching == 0) {
  858. lagg_setflags(lp, 0);
  859. lagg_setcaps(lp, lp->lp_ifcapenable, lp->lp_ifcapenable2);
  860. if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen);
  861. }
  862. /*
  863. * free port and release it's ifnet reference after a grace period has
  864. * elapsed.
  865. */
  866. NET_EPOCH_CALL(lagg_port_destroy_cb, &lp->lp_epoch_ctx);
  867. /* Update lagg capabilities */
  868. lagg_capabilities(sc);
  869. lagg_linkstate(sc);
  870. return (0);
  871. }
  872. static int
  873. lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  874. {
  875. struct lagg_reqport *rp = (struct lagg_reqport *)data;
  876. struct lagg_softc *sc;
  877. struct lagg_port *lp = NULL;
  878. int error = 0;
  879. /* Should be checked by the caller */
  880. switch (ifp->if_type) {
  881. case IFT_IEEE8023ADLAG:
  882. case IFT_INFINIBANDLAG:
  883. if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
  884. goto fallback;
  885. break;
  886. default:
  887. goto fallback;
  888. }
  889. switch (cmd) {
  890. case SIOCGLAGGPORT:
  891. if (rp->rp_portname[0] == '\0' ||
  892. ifunit(rp->rp_portname) != ifp) {
  893. error = EINVAL;
  894. break;
  895. }
  896. LAGG_SLOCK(sc);
  897. if (__predict_true((lp = ifp->if_lagg) != NULL &&
  898. lp->lp_softc == sc))
  899. lagg_port2req(lp, rp);
  900. else
  901. error = ENOENT; /* XXXGL: can happen? */
  902. LAGG_SUNLOCK(sc);
  903. break;
  904. case SIOCSIFCAP:
  905. case SIOCSIFCAPNV:
  906. if (lp->lp_ioctl == NULL) {
  907. error = EINVAL;
  908. break;
  909. }
  910. error = (*lp->lp_ioctl)(ifp, cmd, data);
  911. if (error)
  912. break;
  913. /* Update lagg interface capabilities */
  914. LAGG_XLOCK(sc);
  915. lagg_capabilities(sc);
  916. LAGG_XUNLOCK(sc);
  917. VLAN_CAPABILITIES(sc->sc_ifp);
  918. break;
  919. case SIOCSIFMTU:
  920. /* Do not allow the MTU to be changed once joined */
  921. error = EINVAL;
  922. break;
  923. default:
  924. goto fallback;
  925. }
  926. return (error);
  927. fallback:
  928. if (lp != NULL && lp->lp_ioctl != NULL)
  929. return ((*lp->lp_ioctl)(ifp, cmd, data));
  930. return (EINVAL);
  931. }
  932. /*
  933. * Requests counter @cnt data.
  934. *
  935. * Counter value is calculated the following way:
  936. * 1) for each port, sum difference between current and "initial" measurements.
  937. * 2) add lagg logical interface counters.
  938. * 3) add data from detached_counters array.
  939. *
  940. * We also do the following things on ports attach/detach:
  941. * 1) On port attach we store all counters it has into port_counter array.
  942. * 2) On port detach we add the different between "initial" and
  943. * current counters data to detached_counters array.
  944. */
  945. static uint64_t
  946. lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
  947. {
  948. struct epoch_tracker et;
  949. struct lagg_softc *sc;
  950. struct lagg_port *lp;
  951. struct ifnet *lpifp;
  952. uint64_t newval, oldval, vsum;
  953. /* Revise this when we've got non-generic counters. */
  954. KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
  955. sc = (struct lagg_softc *)ifp->if_softc;
  956. vsum = 0;
  957. NET_EPOCH_ENTER(et);
  958. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  959. /* Saved attached value */
  960. oldval = lp->port_counters.val[cnt];
  961. /* current value */
  962. lpifp = lp->lp_ifp;
  963. newval = lpifp->if_get_counter(lpifp, cnt);
  964. /* Calculate diff and save new */
  965. vsum += newval - oldval;
  966. }
  967. NET_EPOCH_EXIT(et);
  968. /*
  969. * Add counter data which might be added by upper
  970. * layer protocols operating on logical interface.
  971. */
  972. vsum += if_get_counter_default(ifp, cnt);
  973. /*
  974. * Add counter data from detached ports counters
  975. */
  976. vsum += sc->detached_counters.val[cnt];
  977. return (vsum);
  978. }
  979. /*
  980. * For direct output to child ports.
  981. */
  982. static int
  983. lagg_port_output(struct ifnet *ifp, struct mbuf *m,
  984. const struct sockaddr *dst, struct route *ro)
  985. {
  986. struct lagg_port *lp = ifp->if_lagg;
  987. switch (dst->sa_family) {
  988. case pseudo_AF_HDRCMPLT:
  989. case AF_UNSPEC:
  990. if (lp != NULL)
  991. return ((*lp->lp_output)(ifp, m, dst, ro));
  992. }
  993. /* drop any other frames */
  994. m_freem(m);
  995. return (ENETDOWN);
  996. }
  997. static void
  998. lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
  999. {
  1000. struct lagg_port *lp;
  1001. struct lagg_softc *sc;
  1002. if ((lp = ifp->if_lagg) == NULL)
  1003. return;
  1004. /* If the ifnet is just being renamed, don't do anything. */
  1005. if (ifp->if_flags & IFF_RENAMING)
  1006. return;
  1007. sc = lp->lp_softc;
  1008. LAGG_XLOCK(sc);
  1009. lp->lp_detaching = 1;
  1010. lagg_port_destroy(lp, 1);
  1011. LAGG_XUNLOCK(sc);
  1012. VLAN_CAPABILITIES(sc->sc_ifp);
  1013. }
  1014. static void
  1015. lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
  1016. {
  1017. struct lagg_softc *sc = lp->lp_softc;
  1018. strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
  1019. strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
  1020. rp->rp_prio = lp->lp_prio;
  1021. rp->rp_flags = lp->lp_flags;
  1022. lagg_proto_portreq(sc, lp, &rp->rp_psc);
  1023. /* Add protocol specific flags */
  1024. switch (sc->sc_proto) {
  1025. case LAGG_PROTO_FAILOVER:
  1026. if (lp == sc->sc_primary)
  1027. rp->rp_flags |= LAGG_PORT_MASTER;
  1028. if (lp == lagg_link_active(sc, sc->sc_primary))
  1029. rp->rp_flags |= LAGG_PORT_ACTIVE;
  1030. break;
  1031. case LAGG_PROTO_ROUNDROBIN:
  1032. case LAGG_PROTO_LOADBALANCE:
  1033. case LAGG_PROTO_BROADCAST:
  1034. if (LAGG_PORTACTIVE(lp))
  1035. rp->rp_flags |= LAGG_PORT_ACTIVE;
  1036. break;
  1037. case LAGG_PROTO_LACP:
  1038. /* LACP has a different definition of active */
  1039. if (lacp_isactive(lp))
  1040. rp->rp_flags |= LAGG_PORT_ACTIVE;
  1041. if (lacp_iscollecting(lp))
  1042. rp->rp_flags |= LAGG_PORT_COLLECTING;
  1043. if (lacp_isdistributing(lp))
  1044. rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
  1045. break;
  1046. }
  1047. }
  1048. static void
  1049. lagg_watchdog_infiniband(void *arg)
  1050. {
  1051. struct epoch_tracker et;
  1052. struct lagg_softc *sc;
  1053. struct lagg_port *lp;
  1054. struct ifnet *ifp;
  1055. struct ifnet *lp_ifp;
  1056. sc = arg;
  1057. /*
  1058. * Because infiniband nodes have a fixed MAC address, which is
  1059. * generated by the so-called GID, we need to regularly update
  1060. * the link level address of the parent lagg<N> device when
  1061. * the active port changes. Possibly we could piggy-back on
  1062. * link up/down events aswell, but using a timer also provides
  1063. * a guarantee against too frequent events. This operation
  1064. * does not have to be atomic.
  1065. */
  1066. NET_EPOCH_ENTER(et);
  1067. lp = lagg_link_active(sc, sc->sc_primary);
  1068. if (lp != NULL) {
  1069. ifp = sc->sc_ifp;
  1070. lp_ifp = lp->lp_ifp;
  1071. if (ifp != NULL && lp_ifp != NULL &&
  1072. (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0 ||
  1073. memcmp(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen) != 0)) {
  1074. memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen);
  1075. memcpy(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen);
  1076. CURVNET_SET(ifp->if_vnet);
  1077. EVENTHANDLER_INVOKE(iflladdr_event, ifp);
  1078. CURVNET_RESTORE();
  1079. }
  1080. }
  1081. NET_EPOCH_EXIT(et);
  1082. callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg);
  1083. }
  1084. static void
  1085. lagg_if_updown(struct lagg_softc *sc, bool up)
  1086. {
  1087. struct ifreq ifr = {};
  1088. struct lagg_port *lp;
  1089. LAGG_XLOCK_ASSERT(sc);
  1090. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1091. if (up)
  1092. if_up(lp->lp_ifp);
  1093. else
  1094. if_down(lp->lp_ifp);
  1095. if (lp->lp_ioctl != NULL)
  1096. lp->lp_ioctl(lp->lp_ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
  1097. }
  1098. }
  1099. static void
  1100. lagg_init(void *xsc)
  1101. {
  1102. struct lagg_softc *sc = (struct lagg_softc *)xsc;
  1103. struct ifnet *ifp = sc->sc_ifp;
  1104. struct lagg_port *lp;
  1105. LAGG_XLOCK(sc);
  1106. if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
  1107. LAGG_XUNLOCK(sc);
  1108. return;
  1109. }
  1110. ifp->if_drv_flags |= IFF_DRV_RUNNING;
  1111. /*
  1112. * Update the port lladdrs if needed.
  1113. * This might be if_setlladdr() notification
  1114. * that lladdr has been changed.
  1115. */
  1116. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1117. if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
  1118. ifp->if_addrlen) != 0)
  1119. if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen);
  1120. }
  1121. lagg_if_updown(sc, true);
  1122. lagg_proto_init(sc);
  1123. if (ifp->if_type == IFT_INFINIBAND) {
  1124. mtx_lock(&sc->sc_mtx);
  1125. lagg_watchdog_infiniband(sc);
  1126. mtx_unlock(&sc->sc_mtx);
  1127. }
  1128. LAGG_XUNLOCK(sc);
  1129. }
  1130. static void
  1131. lagg_stop(struct lagg_softc *sc)
  1132. {
  1133. struct ifnet *ifp = sc->sc_ifp;
  1134. LAGG_XLOCK_ASSERT(sc);
  1135. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
  1136. return;
  1137. ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
  1138. lagg_proto_stop(sc);
  1139. mtx_lock(&sc->sc_mtx);
  1140. callout_stop(&sc->sc_watchdog);
  1141. mtx_unlock(&sc->sc_mtx);
  1142. lagg_if_updown(sc, false);
  1143. callout_drain(&sc->sc_watchdog);
  1144. }
  1145. static int
  1146. lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  1147. {
  1148. struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  1149. struct lagg_reqall *ra = (struct lagg_reqall *)data;
  1150. struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
  1151. struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
  1152. struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
  1153. struct ifreq *ifr = (struct ifreq *)data;
  1154. struct lagg_port *lp;
  1155. struct ifnet *tpif;
  1156. struct thread *td = curthread;
  1157. char *buf, *outbuf;
  1158. int count, buflen, len, error = 0, oldmtu;
  1159. bzero(&rpbuf, sizeof(rpbuf));
  1160. /* XXX: This can race with lagg_clone_destroy. */
  1161. switch (cmd) {
  1162. case SIOCGLAGG:
  1163. LAGG_XLOCK(sc);
  1164. buflen = sc->sc_count * sizeof(struct lagg_reqport);
  1165. outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
  1166. ra->ra_proto = sc->sc_proto;
  1167. lagg_proto_request(sc, &ra->ra_psc);
  1168. count = 0;
  1169. buf = outbuf;
  1170. len = min(ra->ra_size, buflen);
  1171. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1172. if (len < sizeof(rpbuf))
  1173. break;
  1174. lagg_port2req(lp, &rpbuf);
  1175. memcpy(buf, &rpbuf, sizeof(rpbuf));
  1176. count++;
  1177. buf += sizeof(rpbuf);
  1178. len -= sizeof(rpbuf);
  1179. }
  1180. LAGG_XUNLOCK(sc);
  1181. ra->ra_ports = count;
  1182. ra->ra_size = count * sizeof(rpbuf);
  1183. error = copyout(outbuf, ra->ra_port, ra->ra_size);
  1184. free(outbuf, M_TEMP);
  1185. break;
  1186. case SIOCSLAGG:
  1187. error = priv_check(td, PRIV_NET_LAGG);
  1188. if (error)
  1189. break;
  1190. if (ra->ra_proto >= LAGG_PROTO_MAX) {
  1191. error = EPROTONOSUPPORT;
  1192. break;
  1193. }
  1194. /* Infiniband only supports the failover protocol. */
  1195. if (ra->ra_proto != LAGG_PROTO_FAILOVER &&
  1196. ifp->if_type == IFT_INFINIBAND) {
  1197. error = EPROTONOSUPPORT;
  1198. break;
  1199. }
  1200. LAGG_XLOCK(sc);
  1201. lagg_proto_detach(sc);
  1202. lagg_proto_attach(sc, ra->ra_proto);
  1203. LAGG_XUNLOCK(sc);
  1204. break;
  1205. case SIOCGLAGGOPTS:
  1206. LAGG_XLOCK(sc);
  1207. ro->ro_opts = sc->sc_opts;
  1208. if (sc->sc_proto == LAGG_PROTO_LACP) {
  1209. struct lacp_softc *lsc;
  1210. lsc = (struct lacp_softc *)sc->sc_psc;
  1211. if (lsc->lsc_debug.lsc_tx_test != 0)
  1212. ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
  1213. if (lsc->lsc_debug.lsc_rx_test != 0)
  1214. ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
  1215. if (lsc->lsc_strict_mode != 0)
  1216. ro->ro_opts |= LAGG_OPT_LACP_STRICT;
  1217. if (lsc->lsc_fast_timeout != 0)
  1218. ro->ro_opts |= LAGG_OPT_LACP_FAST_TIMO;
  1219. ro->ro_active = sc->sc_active;
  1220. } else {
  1221. ro->ro_active = 0;
  1222. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  1223. ro->ro_active += LAGG_PORTACTIVE(lp);
  1224. }
  1225. ro->ro_bkt = sc->sc_stride;
  1226. ro->ro_flapping = sc->sc_flapping;
  1227. ro->ro_flowid_shift = sc->flowid_shift;
  1228. LAGG_XUNLOCK(sc);
  1229. break;
  1230. case SIOCSLAGGOPTS:
  1231. error = priv_check(td, PRIV_NET_LAGG);
  1232. if (error)
  1233. break;
  1234. /*
  1235. * The stride option was added without defining a corresponding
  1236. * LAGG_OPT flag, so handle a non-zero value before checking
  1237. * anything else to preserve compatibility.
  1238. */
  1239. LAGG_XLOCK(sc);
  1240. if (ro->ro_opts == 0 && ro->ro_bkt != 0) {
  1241. if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN) {
  1242. LAGG_XUNLOCK(sc);
  1243. error = EINVAL;
  1244. break;
  1245. }
  1246. sc->sc_stride = ro->ro_bkt;
  1247. }
  1248. if (ro->ro_opts == 0) {
  1249. LAGG_XUNLOCK(sc);
  1250. break;
  1251. }
  1252. /*
  1253. * Set options. LACP options are stored in sc->sc_psc,
  1254. * not in sc_opts.
  1255. */
  1256. int valid, lacp;
  1257. switch (ro->ro_opts) {
  1258. case LAGG_OPT_USE_FLOWID:
  1259. case -LAGG_OPT_USE_FLOWID:
  1260. case LAGG_OPT_USE_NUMA:
  1261. case -LAGG_OPT_USE_NUMA:
  1262. case LAGG_OPT_FLOWIDSHIFT:
  1263. case LAGG_OPT_RR_LIMIT:
  1264. valid = 1;
  1265. lacp = 0;
  1266. break;
  1267. case LAGG_OPT_LACP_TXTEST:
  1268. case -LAGG_OPT_LACP_TXTEST:
  1269. case LAGG_OPT_LACP_RXTEST:
  1270. case -LAGG_OPT_LACP_RXTEST:
  1271. case LAGG_OPT_LACP_STRICT:
  1272. case -LAGG_OPT_LACP_STRICT:
  1273. case LAGG_OPT_LACP_FAST_TIMO:
  1274. case -LAGG_OPT_LACP_FAST_TIMO:
  1275. valid = lacp = 1;
  1276. break;
  1277. default:
  1278. valid = lacp = 0;
  1279. break;
  1280. }
  1281. if (valid == 0 ||
  1282. (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
  1283. /* Invalid combination of options specified. */
  1284. error = EINVAL;
  1285. LAGG_XUNLOCK(sc);
  1286. break; /* Return from SIOCSLAGGOPTS. */
  1287. }
  1288. /*
  1289. * Store new options into sc->sc_opts except for
  1290. * FLOWIDSHIFT, RR and LACP options.
  1291. */
  1292. if (lacp == 0) {
  1293. if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
  1294. sc->flowid_shift = ro->ro_flowid_shift;
  1295. else if (ro->ro_opts == LAGG_OPT_RR_LIMIT) {
  1296. if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN ||
  1297. ro->ro_bkt == 0) {
  1298. error = EINVAL;
  1299. LAGG_XUNLOCK(sc);
  1300. break;
  1301. }
  1302. sc->sc_stride = ro->ro_bkt;
  1303. } else if (ro->ro_opts > 0)
  1304. sc->sc_opts |= ro->ro_opts;
  1305. else
  1306. sc->sc_opts &= ~ro->ro_opts;
  1307. } else {
  1308. struct lacp_softc *lsc;
  1309. struct lacp_port *lp;
  1310. lsc = (struct lacp_softc *)sc->sc_psc;
  1311. switch (ro->ro_opts) {
  1312. case LAGG_OPT_LACP_TXTEST:
  1313. lsc->lsc_debug.lsc_tx_test = 1;
  1314. break;
  1315. case -LAGG_OPT_LACP_TXTEST:
  1316. lsc->lsc_debug.lsc_tx_test = 0;
  1317. break;
  1318. case LAGG_OPT_LACP_RXTEST:
  1319. lsc->lsc_debug.lsc_rx_test = 1;
  1320. break;
  1321. case -LAGG_OPT_LACP_RXTEST:
  1322. lsc->lsc_debug.lsc_rx_test = 0;
  1323. break;
  1324. case LAGG_OPT_LACP_STRICT:
  1325. lsc->lsc_strict_mode = 1;
  1326. break;
  1327. case -LAGG_OPT_LACP_STRICT:
  1328. lsc->lsc_strict_mode = 0;
  1329. break;
  1330. case LAGG_OPT_LACP_FAST_TIMO:
  1331. LACP_LOCK(lsc);
  1332. LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
  1333. lp->lp_state |= LACP_STATE_TIMEOUT;
  1334. LACP_UNLOCK(lsc);
  1335. lsc->lsc_fast_timeout = 1;
  1336. break;
  1337. case -LAGG_OPT_LACP_FAST_TIMO:
  1338. LACP_LOCK(lsc);
  1339. LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
  1340. lp->lp_state &= ~LACP_STATE_TIMEOUT;
  1341. LACP_UNLOCK(lsc);
  1342. lsc->lsc_fast_timeout = 0;
  1343. break;
  1344. }
  1345. }
  1346. LAGG_XUNLOCK(sc);
  1347. break;
  1348. case SIOCGLAGGFLAGS:
  1349. rf->rf_flags = 0;
  1350. LAGG_XLOCK(sc);
  1351. if (sc->sc_flags & MBUF_HASHFLAG_L2)
  1352. rf->rf_flags |= LAGG_F_HASHL2;
  1353. if (sc->sc_flags & MBUF_HASHFLAG_L3)
  1354. rf->rf_flags |= LAGG_F_HASHL3;
  1355. if (sc->sc_flags & MBUF_HASHFLAG_L4)
  1356. rf->rf_flags |= LAGG_F_HASHL4;
  1357. LAGG_XUNLOCK(sc);
  1358. break;
  1359. case SIOCSLAGGHASH:
  1360. error = priv_check(td, PRIV_NET_LAGG);
  1361. if (error)
  1362. break;
  1363. if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
  1364. error = EINVAL;
  1365. break;
  1366. }
  1367. LAGG_XLOCK(sc);
  1368. sc->sc_flags = 0;
  1369. if (rf->rf_flags & LAGG_F_HASHL2)
  1370. sc->sc_flags |= MBUF_HASHFLAG_L2;
  1371. if (rf->rf_flags & LAGG_F_HASHL3)
  1372. sc->sc_flags |= MBUF_HASHFLAG_L3;
  1373. if (rf->rf_flags & LAGG_F_HASHL4)
  1374. sc->sc_flags |= MBUF_HASHFLAG_L4;
  1375. LAGG_XUNLOCK(sc);
  1376. break;
  1377. case SIOCGLAGGPORT:
  1378. if (rp->rp_portname[0] == '\0' ||
  1379. (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
  1380. error = EINVAL;
  1381. break;
  1382. }
  1383. LAGG_SLOCK(sc);
  1384. if (__predict_true((lp = tpif->if_lagg) != NULL &&
  1385. lp->lp_softc == sc))
  1386. lagg_port2req(lp, rp);
  1387. else
  1388. error = ENOENT; /* XXXGL: can happen? */
  1389. LAGG_SUNLOCK(sc);
  1390. if_rele(tpif);
  1391. break;
  1392. case SIOCSLAGGPORT:
  1393. error = priv_check(td, PRIV_NET_LAGG);
  1394. if (error)
  1395. break;
  1396. if (rp->rp_portname[0] == '\0' ||
  1397. (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
  1398. error = EINVAL;
  1399. break;
  1400. }
  1401. #ifdef INET6
  1402. /*
  1403. * A laggport interface should not have inet6 address
  1404. * because two interfaces with a valid link-local
  1405. * scope zone must not be merged in any form. This
  1406. * restriction is needed to prevent violation of
  1407. * link-local scope zone. Attempts to add a laggport
  1408. * interface which has inet6 addresses triggers
  1409. * removal of all inet6 addresses on the member
  1410. * interface.
  1411. */
  1412. if (in6ifa_llaonifp(tpif)) {
  1413. in6_ifdetach(tpif);
  1414. if_printf(sc->sc_ifp,
  1415. "IPv6 addresses on %s have been removed "
  1416. "before adding it as a member to prevent "
  1417. "IPv6 address scope violation.\n",
  1418. tpif->if_xname);
  1419. }
  1420. #endif
  1421. oldmtu = ifp->if_mtu;
  1422. LAGG_XLOCK(sc);
  1423. error = lagg_port_create(sc, tpif);
  1424. LAGG_XUNLOCK(sc);
  1425. if_rele(tpif);
  1426. /*
  1427. * LAGG MTU may change during addition of the first port.
  1428. * If it did, do network layer specific procedure.
  1429. */
  1430. if (ifp->if_mtu != oldmtu)
  1431. if_notifymtu(ifp);
  1432. VLAN_CAPABILITIES(ifp);
  1433. break;
  1434. case SIOCSLAGGDELPORT:
  1435. error = priv_check(td, PRIV_NET_LAGG);
  1436. if (error)
  1437. break;
  1438. if (rp->rp_portname[0] == '\0' ||
  1439. (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
  1440. error = EINVAL;
  1441. break;
  1442. }
  1443. LAGG_XLOCK(sc);
  1444. if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
  1445. lp->lp_softc != sc) {
  1446. error = ENOENT;
  1447. LAGG_XUNLOCK(sc);
  1448. if_rele(tpif);
  1449. break;
  1450. }
  1451. error = lagg_port_destroy(lp, 1);
  1452. LAGG_XUNLOCK(sc);
  1453. if_rele(tpif);
  1454. VLAN_CAPABILITIES(ifp);
  1455. break;
  1456. case SIOCSIFFLAGS:
  1457. /* Set flags on ports too */
  1458. LAGG_XLOCK(sc);
  1459. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1460. lagg_setflags(lp, 1);
  1461. }
  1462. if (!(ifp->if_flags & IFF_UP) &&
  1463. (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
  1464. /*
  1465. * If interface is marked down and it is running,
  1466. * then stop and disable it.
  1467. */
  1468. lagg_stop(sc);
  1469. LAGG_XUNLOCK(sc);
  1470. } else if ((ifp->if_flags & IFF_UP) &&
  1471. !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
  1472. /*
  1473. * If interface is marked up and it is stopped, then
  1474. * start it.
  1475. */
  1476. LAGG_XUNLOCK(sc);
  1477. (*ifp->if_init)(sc);
  1478. } else
  1479. LAGG_XUNLOCK(sc);
  1480. break;
  1481. case SIOCADDMULTI:
  1482. case SIOCDELMULTI:
  1483. LAGG_XLOCK(sc);
  1484. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1485. lagg_clrmulti(lp);
  1486. lagg_setmulti(lp);
  1487. }
  1488. LAGG_XUNLOCK(sc);
  1489. error = 0;
  1490. break;
  1491. case SIOCSIFMEDIA:
  1492. case SIOCGIFMEDIA:
  1493. if (ifp->if_type == IFT_INFINIBAND)
  1494. error = EINVAL;
  1495. else
  1496. error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
  1497. break;
  1498. case SIOCSIFCAP:
  1499. case SIOCSIFCAPNV:
  1500. LAGG_XLOCK(sc);
  1501. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1502. if (lp->lp_ioctl != NULL)
  1503. (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
  1504. }
  1505. lagg_capabilities(sc);
  1506. LAGG_XUNLOCK(sc);
  1507. VLAN_CAPABILITIES(ifp);
  1508. error = 0;
  1509. break;
  1510. case SIOCGIFCAPNV:
  1511. error = 0;
  1512. break;
  1513. case SIOCSIFMTU:
  1514. LAGG_XLOCK(sc);
  1515. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1516. if (lp->lp_ioctl != NULL)
  1517. error = (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
  1518. else
  1519. error = EINVAL;
  1520. if (error != 0) {
  1521. if_printf(ifp,
  1522. "failed to change MTU to %d on port %s, "
  1523. "reverting all ports to original MTU (%d)\n",
  1524. ifr->ifr_mtu, lp->lp_ifp->if_xname, ifp->if_mtu);
  1525. break;
  1526. }
  1527. }
  1528. if (error == 0) {
  1529. ifp->if_mtu = ifr->ifr_mtu;
  1530. } else {
  1531. /* set every port back to the original MTU */
  1532. ifr->ifr_mtu = ifp->if_mtu;
  1533. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1534. if (lp->lp_ioctl != NULL)
  1535. (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
  1536. }
  1537. }
  1538. lagg_capabilities(sc);
  1539. LAGG_XUNLOCK(sc);
  1540. VLAN_CAPABILITIES(ifp);
  1541. break;
  1542. default:
  1543. error = ether_ioctl(ifp, cmd, data);
  1544. break;
  1545. }
  1546. return (error);
  1547. }
  1548. #if defined(KERN_TLS) || defined(RATELIMIT)
  1549. #ifdef RATELIMIT
  1550. static const struct if_snd_tag_sw lagg_snd_tag_ul_sw = {
  1551. .snd_tag_modify = lagg_snd_tag_modify,
  1552. .snd_tag_query = lagg_snd_tag_query,
  1553. .snd_tag_free = lagg_snd_tag_free,
  1554. .next_snd_tag = lagg_next_snd_tag,
  1555. .type = IF_SND_TAG_TYPE_UNLIMITED
  1556. };
  1557. static const struct if_snd_tag_sw lagg_snd_tag_rl_sw = {
  1558. .snd_tag_modify = lagg_snd_tag_modify,
  1559. .snd_tag_query = lagg_snd_tag_query,
  1560. .snd_tag_free = lagg_snd_tag_free,
  1561. .next_snd_tag = lagg_next_snd_tag,
  1562. .type = IF_SND_TAG_TYPE_RATE_LIMIT
  1563. };
  1564. #endif
  1565. #ifdef KERN_TLS
  1566. static const struct if_snd_tag_sw lagg_snd_tag_tls_sw = {
  1567. .snd_tag_modify = lagg_snd_tag_modify,
  1568. .snd_tag_query = lagg_snd_tag_query,
  1569. .snd_tag_free = lagg_snd_tag_free,
  1570. .next_snd_tag = lagg_next_snd_tag,
  1571. .type = IF_SND_TAG_TYPE_TLS
  1572. };
  1573. #ifdef RATELIMIT
  1574. static const struct if_snd_tag_sw lagg_snd_tag_tls_rl_sw = {
  1575. .snd_tag_modify = lagg_snd_tag_modify,
  1576. .snd_tag_query = lagg_snd_tag_query,
  1577. .snd_tag_free = lagg_snd_tag_free,
  1578. .next_snd_tag = lagg_next_snd_tag,
  1579. .type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT
  1580. };
  1581. #endif
  1582. #endif
  1583. static inline struct lagg_snd_tag *
  1584. mst_to_lst(struct m_snd_tag *mst)
  1585. {
  1586. return (__containerof(mst, struct lagg_snd_tag, com));
  1587. }
  1588. /*
  1589. * Look up the port used by a specific flow. This only works for lagg
  1590. * protocols with deterministic port mappings (e.g. not roundrobin).
  1591. * In addition protocols which use a hash to map flows to ports must
  1592. * be configured to use the mbuf flowid rather than hashing packet
  1593. * contents.
  1594. */
  1595. static struct lagg_port *
  1596. lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype,
  1597. uint8_t numa_domain)
  1598. {
  1599. struct lagg_softc *sc;
  1600. struct lagg_port *lp;
  1601. struct lagg_lb *lb;
  1602. uint32_t hash, p;
  1603. int err;
  1604. sc = ifp->if_softc;
  1605. switch (sc->sc_proto) {
  1606. case LAGG_PROTO_FAILOVER:
  1607. return (lagg_link_active(sc, sc->sc_primary));
  1608. case LAGG_PROTO_LOADBALANCE:
  1609. if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
  1610. flowtype == M_HASHTYPE_NONE)
  1611. return (NULL);
  1612. p = flowid >> sc->flowid_shift;
  1613. p %= sc->sc_count;
  1614. lb = (struct lagg_lb *)sc->sc_psc;
  1615. lp = lb->lb_ports[p];
  1616. return (lagg_link_active(sc, lp));
  1617. case LAGG_PROTO_LACP:
  1618. if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
  1619. flowtype == M_HASHTYPE_NONE)
  1620. return (NULL);
  1621. hash = flowid >> sc->flowid_shift;
  1622. return (lacp_select_tx_port_by_hash(sc, hash, numa_domain, &err));
  1623. default:
  1624. return (NULL);
  1625. }
  1626. }
  1627. static int
  1628. lagg_snd_tag_alloc(struct ifnet *ifp,
  1629. union if_snd_tag_alloc_params *params,
  1630. struct m_snd_tag **ppmt)
  1631. {
  1632. struct epoch_tracker et;
  1633. const struct if_snd_tag_sw *sw;
  1634. struct lagg_snd_tag *lst;
  1635. struct lagg_port *lp;
  1636. struct ifnet *lp_ifp;
  1637. struct m_snd_tag *mst;
  1638. int error;
  1639. switch (params->hdr.type) {
  1640. #ifdef RATELIMIT
  1641. case IF_SND_TAG_TYPE_UNLIMITED:
  1642. sw = &lagg_snd_tag_ul_sw;
  1643. break;
  1644. case IF_SND_TAG_TYPE_RATE_LIMIT:
  1645. sw = &lagg_snd_tag_rl_sw;
  1646. break;
  1647. #endif
  1648. #ifdef KERN_TLS
  1649. case IF_SND_TAG_TYPE_TLS:
  1650. sw = &lagg_snd_tag_tls_sw;
  1651. break;
  1652. case IF_SND_TAG_TYPE_TLS_RX:
  1653. /* Return tag from port interface directly. */
  1654. sw = NULL;
  1655. break;
  1656. #ifdef RATELIMIT
  1657. case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
  1658. sw = &lagg_snd_tag_tls_rl_sw;
  1659. break;
  1660. #endif
  1661. #endif
  1662. default:
  1663. return (EOPNOTSUPP);
  1664. }
  1665. NET_EPOCH_ENTER(et);
  1666. lp = lookup_snd_tag_port(ifp, params->hdr.flowid,
  1667. params->hdr.flowtype, params->hdr.numa_domain);
  1668. if (lp == NULL) {
  1669. NET_EPOCH_EXIT(et);
  1670. return (EOPNOTSUPP);
  1671. }
  1672. if (lp->lp_ifp == NULL) {
  1673. NET_EPOCH_EXIT(et);
  1674. return (EOPNOTSUPP);
  1675. }
  1676. lp_ifp = lp->lp_ifp;
  1677. if_ref(lp_ifp);
  1678. NET_EPOCH_EXIT(et);
  1679. if (sw != NULL) {
  1680. lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT);
  1681. if (lst == NULL) {
  1682. if_rele(lp_ifp);
  1683. return (ENOMEM);
  1684. }
  1685. } else
  1686. lst = NULL;
  1687. error = m_snd_tag_alloc(lp_ifp, params, &mst);
  1688. if_rele(lp_ifp);
  1689. if (error) {
  1690. free(lst, M_LAGG);
  1691. return (error);
  1692. }
  1693. if (sw != NULL) {
  1694. m_snd_tag_init(&lst->com, ifp, sw);
  1695. lst->tag = mst;
  1696. *ppmt = &lst->com;
  1697. } else
  1698. *ppmt = mst;
  1699. return (0);
  1700. }
  1701. static struct m_snd_tag *
  1702. lagg_next_snd_tag(struct m_snd_tag *mst)
  1703. {
  1704. struct lagg_snd_tag *lst;
  1705. lst = mst_to_lst(mst);
  1706. return (lst->tag);
  1707. }
  1708. static int
  1709. lagg_snd_tag_modify(struct m_snd_tag *mst,
  1710. union if_snd_tag_modify_params *params)
  1711. {
  1712. struct lagg_snd_tag *lst;
  1713. lst = mst_to_lst(mst);
  1714. return (lst->tag->sw->snd_tag_modify(lst->tag, params));
  1715. }
  1716. static int
  1717. lagg_snd_tag_query(struct m_snd_tag *mst,
  1718. union if_snd_tag_query_params *params)
  1719. {
  1720. struct lagg_snd_tag *lst;
  1721. lst = mst_to_lst(mst);
  1722. return (lst->tag->sw->snd_tag_query(lst->tag, params));
  1723. }
  1724. static void
  1725. lagg_snd_tag_free(struct m_snd_tag *mst)
  1726. {
  1727. struct lagg_snd_tag *lst;
  1728. lst = mst_to_lst(mst);
  1729. m_snd_tag_rele(lst->tag);
  1730. free(lst, M_LAGG);
  1731. }
  1732. static void
  1733. lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
  1734. {
  1735. /*
  1736. * For lagg, we have an indirect
  1737. * interface. The caller needs to
  1738. * get a ratelimit tag on the actual
  1739. * interface the flow will go on.
  1740. */
  1741. q->rate_table = NULL;
  1742. q->flags = RT_IS_INDIRECT;
  1743. q->max_flows = 0;
  1744. q->number_of_rates = 0;
  1745. }
  1746. #endif
  1747. static int
  1748. lagg_setmulti(struct lagg_port *lp)
  1749. {
  1750. struct lagg_softc *sc = lp->lp_softc;
  1751. struct ifnet *ifp = lp->lp_ifp;
  1752. struct ifnet *scifp = sc->sc_ifp;
  1753. struct lagg_mc *mc;
  1754. struct ifmultiaddr *ifma;
  1755. int error;
  1756. IF_ADDR_WLOCK(scifp);
  1757. CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
  1758. if (ifma->ifma_addr->sa_family != AF_LINK)
  1759. continue;
  1760. mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT);
  1761. if (mc == NULL) {
  1762. IF_ADDR_WUNLOCK(scifp);
  1763. return (ENOMEM);
  1764. }
  1765. bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
  1766. mc->mc_addr.sdl_index = ifp->if_index;
  1767. mc->mc_ifma = NULL;
  1768. SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
  1769. }
  1770. IF_ADDR_WUNLOCK(scifp);
  1771. SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
  1772. error = if_addmulti(ifp,
  1773. (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
  1774. if (error)
  1775. return (error);
  1776. }
  1777. return (0);
  1778. }
  1779. static int
  1780. lagg_clrmulti(struct lagg_port *lp)
  1781. {
  1782. struct lagg_mc *mc;
  1783. LAGG_XLOCK_ASSERT(lp->lp_softc);
  1784. while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
  1785. SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
  1786. if (mc->mc_ifma && lp->lp_detaching == 0)
  1787. if_delmulti_ifma(mc->mc_ifma);
  1788. free(mc, M_LAGG);
  1789. }
  1790. return (0);
  1791. }
  1792. static void
  1793. lagg_setcaps(struct lagg_port *lp, int cap, int cap2)
  1794. {
  1795. struct ifreq ifr;
  1796. struct siocsifcapnv_driver_data drv_ioctl_data;
  1797. if (lp->lp_ifp->if_capenable == cap &&
  1798. lp->lp_ifp->if_capenable2 == cap2)
  1799. return;
  1800. if (lp->lp_ioctl == NULL)
  1801. return;
  1802. /* XXX */
  1803. if ((lp->lp_ifp->if_capabilities & IFCAP_NV) != 0) {
  1804. drv_ioctl_data.reqcap = cap;
  1805. drv_ioctl_data.reqcap2 = cap2;
  1806. drv_ioctl_data.nvcap = NULL;
  1807. (*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAPNV,
  1808. (caddr_t)&drv_ioctl_data);
  1809. } else {
  1810. ifr.ifr_reqcap = cap;
  1811. (*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAP, (caddr_t)&ifr);
  1812. }
  1813. }
  1814. /* Handle a ref counted flag that should be set on the lagg port as well */
  1815. static int
  1816. lagg_setflag(struct lagg_port *lp, int flag, int status,
  1817. int (*func)(struct ifnet *, int))
  1818. {
  1819. struct lagg_softc *sc = lp->lp_softc;
  1820. struct ifnet *scifp = sc->sc_ifp;
  1821. struct ifnet *ifp = lp->lp_ifp;
  1822. int error;
  1823. LAGG_XLOCK_ASSERT(sc);
  1824. status = status ? (scifp->if_flags & flag) : 0;
  1825. /* Now "status" contains the flag value or 0 */
  1826. /*
  1827. * See if recorded ports status is different from what
  1828. * we want it to be. If it is, flip it. We record ports
  1829. * status in lp_ifflags so that we won't clear ports flag
  1830. * we haven't set. In fact, we don't clear or set ports
  1831. * flags directly, but get or release references to them.
  1832. * That's why we can be sure that recorded flags still are
  1833. * in accord with actual ports flags.
  1834. */
  1835. if (status != (lp->lp_ifflags & flag)) {
  1836. error = (*func)(ifp, status);
  1837. if (error)
  1838. return (error);
  1839. lp->lp_ifflags &= ~flag;
  1840. lp->lp_ifflags |= status;
  1841. }
  1842. return (0);
  1843. }
  1844. /*
  1845. * Handle IFF_* flags that require certain changes on the lagg port
  1846. * if "status" is true, update ports flags respective to the lagg
  1847. * if "status" is false, forcedly clear the flags set on port.
  1848. */
  1849. static int
  1850. lagg_setflags(struct lagg_port *lp, int status)
  1851. {
  1852. int error, i;
  1853. for (i = 0; lagg_pflags[i].flag; i++) {
  1854. error = lagg_setflag(lp, lagg_pflags[i].flag,
  1855. status, lagg_pflags[i].func);
  1856. if (error)
  1857. return (error);
  1858. }
  1859. return (0);
  1860. }
  1861. static int
  1862. lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m)
  1863. {
  1864. struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  1865. NET_EPOCH_ASSERT();
  1866. #if defined(KERN_TLS) || defined(RATELIMIT)
  1867. if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
  1868. MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
  1869. #endif
  1870. /* We need a Tx algorithm and at least one port */
  1871. if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
  1872. m_freem(m);
  1873. if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  1874. return (ENXIO);
  1875. }
  1876. ETHER_BPF_MTAP(ifp, m);
  1877. return (lagg_proto_start(sc, m));
  1878. }
  1879. static int
  1880. lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m)
  1881. {
  1882. struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  1883. NET_EPOCH_ASSERT();
  1884. #if defined(KERN_TLS) || defined(RATELIMIT)
  1885. if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
  1886. MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
  1887. #endif
  1888. /* We need a Tx algorithm and at least one port */
  1889. if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
  1890. m_freem(m);
  1891. if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  1892. return (ENXIO);
  1893. }
  1894. infiniband_bpf_mtap(ifp, m);
  1895. return (lagg_proto_start(sc, m));
  1896. }
  1897. /*
  1898. * The ifp->if_qflush entry point for lagg(4) is no-op.
  1899. */
  1900. static void
  1901. lagg_qflush(struct ifnet *ifp __unused)
  1902. {
  1903. }
  1904. static struct mbuf *
  1905. lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m)
  1906. {
  1907. struct lagg_port *lp = ifp->if_lagg;
  1908. struct lagg_softc *sc = lp->lp_softc;
  1909. struct ifnet *scifp = sc->sc_ifp;
  1910. NET_EPOCH_ASSERT();
  1911. if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
  1912. lp->lp_detaching != 0 ||
  1913. sc->sc_proto == LAGG_PROTO_NONE) {
  1914. m_freem(m);
  1915. return (NULL);
  1916. }
  1917. m = lagg_proto_input(sc, lp, m);
  1918. if (m != NULL) {
  1919. ETHER_BPF_MTAP(scifp, m);
  1920. if ((scifp->if_flags & IFF_MONITOR) != 0) {
  1921. m_freem(m);
  1922. m = NULL;
  1923. }
  1924. }
  1925. #ifdef DEV_NETMAP
  1926. if (m != NULL && scifp->if_capenable & IFCAP_NETMAP) {
  1927. scifp->if_input(scifp, m);
  1928. m = NULL;
  1929. }
  1930. #endif /* DEV_NETMAP */
  1931. return (m);
  1932. }
  1933. static struct mbuf *
  1934. lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m)
  1935. {
  1936. struct lagg_port *lp = ifp->if_lagg;
  1937. struct lagg_softc *sc = lp->lp_softc;
  1938. struct ifnet *scifp = sc->sc_ifp;
  1939. NET_EPOCH_ASSERT();
  1940. if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
  1941. lp->lp_detaching != 0 ||
  1942. sc->sc_proto == LAGG_PROTO_NONE) {
  1943. m_freem(m);
  1944. return (NULL);
  1945. }
  1946. m = lagg_proto_input(sc, lp, m);
  1947. if (m != NULL) {
  1948. infiniband_bpf_mtap(scifp, m);
  1949. if ((scifp->if_flags & IFF_MONITOR) != 0) {
  1950. m_freem(m);
  1951. m = NULL;
  1952. }
  1953. }
  1954. return (m);
  1955. }
  1956. static int
  1957. lagg_media_change(struct ifnet *ifp)
  1958. {
  1959. struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  1960. if (sc->sc_ifflags & IFF_DEBUG)
  1961. printf("%s\n", __func__);
  1962. /* Ignore */
  1963. return (0);
  1964. }
  1965. static void
  1966. lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
  1967. {
  1968. struct epoch_tracker et;
  1969. struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  1970. struct lagg_port *lp;
  1971. imr->ifm_status = IFM_AVALID;
  1972. imr->ifm_active = IFM_ETHER | IFM_AUTO;
  1973. NET_EPOCH_ENTER(et);
  1974. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1975. if (LAGG_PORTACTIVE(lp))
  1976. imr->ifm_status |= IFM_ACTIVE;
  1977. }
  1978. NET_EPOCH_EXIT(et);
  1979. }
  1980. static void
  1981. lagg_linkstate(struct lagg_softc *sc)
  1982. {
  1983. struct epoch_tracker et;
  1984. struct lagg_port *lp;
  1985. int new_link = LINK_STATE_DOWN;
  1986. uint64_t speed;
  1987. LAGG_XLOCK_ASSERT(sc);
  1988. /* LACP handles link state itself */
  1989. if (sc->sc_proto == LAGG_PROTO_LACP)
  1990. return;
  1991. /* Our link is considered up if at least one of our ports is active */
  1992. NET_EPOCH_ENTER(et);
  1993. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  1994. if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
  1995. new_link = LINK_STATE_UP;
  1996. break;
  1997. }
  1998. }
  1999. NET_EPOCH_EXIT(et);
  2000. if_link_state_change(sc->sc_ifp, new_link);
  2001. /* Update if_baudrate to reflect the max possible speed */
  2002. switch (sc->sc_proto) {
  2003. case LAGG_PROTO_FAILOVER:
  2004. sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
  2005. sc->sc_primary->lp_ifp->if_baudrate : 0;
  2006. break;
  2007. case LAGG_PROTO_ROUNDROBIN:
  2008. case LAGG_PROTO_LOADBALANCE:
  2009. case LAGG_PROTO_BROADCAST:
  2010. speed = 0;
  2011. NET_EPOCH_ENTER(et);
  2012. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  2013. speed += lp->lp_ifp->if_baudrate;
  2014. NET_EPOCH_EXIT(et);
  2015. sc->sc_ifp->if_baudrate = speed;
  2016. break;
  2017. case LAGG_PROTO_LACP:
  2018. /* LACP updates if_baudrate itself */
  2019. break;
  2020. }
  2021. }
  2022. static void
  2023. lagg_port_state(struct ifnet *ifp, int state)
  2024. {
  2025. struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
  2026. struct lagg_softc *sc = NULL;
  2027. if (lp != NULL)
  2028. sc = lp->lp_softc;
  2029. if (sc == NULL)
  2030. return;
  2031. LAGG_XLOCK(sc);
  2032. lagg_linkstate(sc);
  2033. lagg_proto_linkstate(sc, lp);
  2034. LAGG_XUNLOCK(sc);
  2035. }
  2036. struct lagg_port *
  2037. lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
  2038. {
  2039. struct lagg_port *lp_next, *rval = NULL;
  2040. /*
  2041. * Search a port which reports an active link state.
  2042. */
  2043. #ifdef INVARIANTS
  2044. /*
  2045. * This is called with either in the network epoch
  2046. * or with LAGG_XLOCK(sc) held.
  2047. */
  2048. if (!in_epoch(net_epoch_preempt))
  2049. LAGG_XLOCK_ASSERT(sc);
  2050. #endif
  2051. if (lp == NULL)
  2052. goto search;
  2053. if (LAGG_PORTACTIVE(lp)) {
  2054. rval = lp;
  2055. goto found;
  2056. }
  2057. if ((lp_next = CK_SLIST_NEXT(lp, lp_entries)) != NULL &&
  2058. LAGG_PORTACTIVE(lp_next)) {
  2059. rval = lp_next;
  2060. goto found;
  2061. }
  2062. search:
  2063. CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
  2064. if (LAGG_PORTACTIVE(lp_next)) {
  2065. return (lp_next);
  2066. }
  2067. }
  2068. found:
  2069. return (rval);
  2070. }
  2071. int
  2072. lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
  2073. {
  2074. #if defined(KERN_TLS) || defined(RATELIMIT)
  2075. if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
  2076. struct lagg_snd_tag *lst;
  2077. struct m_snd_tag *mst;
  2078. mst = m->m_pkthdr.snd_tag;
  2079. lst = mst_to_lst(mst);
  2080. if (lst->tag->ifp != ifp) {
  2081. m_freem(m);
  2082. return (EAGAIN);
  2083. }
  2084. m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag);
  2085. m_snd_tag_rele(mst);
  2086. }
  2087. #endif
  2088. return (ifp->if_transmit)(ifp, m);
  2089. }
  2090. /*
  2091. * Simple round robin aggregation
  2092. */
  2093. static void
  2094. lagg_rr_attach(struct lagg_softc *sc)
  2095. {
  2096. sc->sc_seq = 0;
  2097. sc->sc_stride = 1;
  2098. }
  2099. static int
  2100. lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
  2101. {
  2102. struct lagg_port *lp;
  2103. uint32_t p;
  2104. p = atomic_fetchadd_32(&sc->sc_seq, 1);
  2105. p /= sc->sc_stride;
  2106. p %= sc->sc_count;
  2107. lp = CK_SLIST_FIRST(&sc->sc_ports);
  2108. while (p--)
  2109. lp = CK_SLIST_NEXT(lp, lp_entries);
  2110. /*
  2111. * Check the port's link state. This will return the next active
  2112. * port if the link is down or the port is NULL.
  2113. */
  2114. if ((lp = lagg_link_active(sc, lp)) == NULL) {
  2115. if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
  2116. m_freem(m);
  2117. return (ENETDOWN);
  2118. }
  2119. /* Send mbuf */
  2120. return (lagg_enqueue(lp->lp_ifp, m));
  2121. }
  2122. /*
  2123. * Broadcast mode
  2124. */
  2125. static int
  2126. lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
  2127. {
  2128. int errors = 0;
  2129. int ret;
  2130. struct lagg_port *lp, *last = NULL;
  2131. struct mbuf *m0;
  2132. NET_EPOCH_ASSERT();
  2133. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  2134. if (!LAGG_PORTACTIVE(lp))
  2135. continue;
  2136. if (last != NULL) {
  2137. m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
  2138. if (m0 == NULL) {
  2139. ret = ENOBUFS;
  2140. errors++;
  2141. break;
  2142. }
  2143. lagg_enqueue(last->lp_ifp, m0);
  2144. }
  2145. last = lp;
  2146. }
  2147. if (last == NULL) {
  2148. if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
  2149. m_freem(m);
  2150. return (ENOENT);
  2151. }
  2152. if ((last = lagg_link_active(sc, last)) == NULL) {
  2153. errors++;
  2154. if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors);
  2155. m_freem(m);
  2156. return (ENETDOWN);
  2157. }
  2158. ret = lagg_enqueue(last->lp_ifp, m);
  2159. if (errors != 0)
  2160. if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors);
  2161. return (ret);
  2162. }
  2163. /*
  2164. * Active failover
  2165. */
  2166. static int
  2167. lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
  2168. {
  2169. struct lagg_port *lp;
  2170. /* Use the master port if active or the next available port */
  2171. if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
  2172. if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
  2173. m_freem(m);
  2174. return (ENETDOWN);
  2175. }
  2176. /* Send mbuf */
  2177. return (lagg_enqueue(lp->lp_ifp, m));
  2178. }
  2179. static struct mbuf *
  2180. lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
  2181. {
  2182. struct ifnet *ifp = sc->sc_ifp;
  2183. struct lagg_port *tmp_tp;
  2184. if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
  2185. m->m_pkthdr.rcvif = ifp;
  2186. return (m);
  2187. }
  2188. if (!LAGG_PORTACTIVE(sc->sc_primary)) {
  2189. tmp_tp = lagg_link_active(sc, sc->sc_primary);
  2190. /*
  2191. * If tmp_tp is null, we've received a packet when all
  2192. * our links are down. Weird, but process it anyways.
  2193. */
  2194. if (tmp_tp == NULL || tmp_tp == lp) {
  2195. m->m_pkthdr.rcvif = ifp;
  2196. return (m);
  2197. }
  2198. }
  2199. m_freem(m);
  2200. return (NULL);
  2201. }
  2202. /*
  2203. * Loadbalancing
  2204. */
  2205. static void
  2206. lagg_lb_attach(struct lagg_softc *sc)
  2207. {
  2208. struct lagg_port *lp;
  2209. struct lagg_lb *lb;
  2210. LAGG_XLOCK_ASSERT(sc);
  2211. lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO);
  2212. lb->lb_key = m_ether_tcpip_hash_init();
  2213. sc->sc_psc = lb;
  2214. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  2215. lagg_lb_port_create(lp);
  2216. }
  2217. static void
  2218. lagg_lb_detach(struct lagg_softc *sc)
  2219. {
  2220. struct lagg_lb *lb;
  2221. lb = (struct lagg_lb *)sc->sc_psc;
  2222. if (lb != NULL)
  2223. free(lb, M_LAGG);
  2224. }
  2225. static int
  2226. lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
  2227. {
  2228. struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
  2229. struct lagg_port *lp_next;
  2230. int i = 0, rv;
  2231. rv = 0;
  2232. bzero(&lb->lb_ports, sizeof(lb->lb_ports));
  2233. LAGG_XLOCK_ASSERT(sc);
  2234. CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
  2235. if (lp_next == lp)
  2236. continue;
  2237. if (i >= LAGG_MAX_PORTS) {
  2238. rv = EINVAL;
  2239. break;
  2240. }
  2241. if (sc->sc_ifflags & IFF_DEBUG)
  2242. printf("%s: port %s at index %d\n",
  2243. sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
  2244. lb->lb_ports[i++] = lp_next;
  2245. }
  2246. return (rv);
  2247. }
  2248. static int
  2249. lagg_lb_port_create(struct lagg_port *lp)
  2250. {
  2251. struct lagg_softc *sc = lp->lp_softc;
  2252. return (lagg_lb_porttable(sc, NULL));
  2253. }
  2254. static void
  2255. lagg_lb_port_destroy(struct lagg_port *lp)
  2256. {
  2257. struct lagg_softc *sc = lp->lp_softc;
  2258. lagg_lb_porttable(sc, lp);
  2259. }
  2260. static int
  2261. lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
  2262. {
  2263. struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
  2264. struct lagg_port *lp = NULL;
  2265. uint32_t p = 0;
  2266. if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
  2267. M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
  2268. p = m->m_pkthdr.flowid >> sc->flowid_shift;
  2269. else
  2270. p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
  2271. p %= sc->sc_count;
  2272. lp = lb->lb_ports[p];
  2273. /*
  2274. * Check the port's link state. This will return the next active
  2275. * port if the link is down or the port is NULL.
  2276. */
  2277. if ((lp = lagg_link_active(sc, lp)) == NULL) {
  2278. if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
  2279. m_freem(m);
  2280. return (ENETDOWN);
  2281. }
  2282. /* Send mbuf */
  2283. return (lagg_enqueue(lp->lp_ifp, m));
  2284. }
  2285. /*
  2286. * 802.3ad LACP
  2287. */
  2288. static void
  2289. lagg_lacp_attach(struct lagg_softc *sc)
  2290. {
  2291. struct lagg_port *lp;
  2292. lacp_attach(sc);
  2293. LAGG_XLOCK_ASSERT(sc);
  2294. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  2295. lacp_port_create(lp);
  2296. }
  2297. static void
  2298. lagg_lacp_detach(struct lagg_softc *sc)
  2299. {
  2300. struct lagg_port *lp;
  2301. void *psc;
  2302. LAGG_XLOCK_ASSERT(sc);
  2303. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  2304. lacp_port_destroy(lp);
  2305. psc = sc->sc_psc;
  2306. sc->sc_psc = NULL;
  2307. lacp_detach(psc);
  2308. }
  2309. static void
  2310. lagg_lacp_lladdr(struct lagg_softc *sc)
  2311. {
  2312. struct lagg_port *lp;
  2313. LAGG_SXLOCK_ASSERT(sc);
  2314. /* purge all the lacp ports */
  2315. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  2316. lacp_port_destroy(lp);
  2317. /* add them back in */
  2318. CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  2319. lacp_port_create(lp);
  2320. }
  2321. static int
  2322. lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
  2323. {
  2324. struct lagg_port *lp;
  2325. int err;
  2326. lp = lacp_select_tx_port(sc, m, &err);
  2327. if (lp == NULL) {
  2328. if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
  2329. m_freem(m);
  2330. return (err);
  2331. }
  2332. /* Send mbuf */
  2333. return (lagg_enqueue(lp->lp_ifp, m));
  2334. }
  2335. static struct mbuf *
  2336. lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
  2337. {
  2338. struct ifnet *ifp = sc->sc_ifp;
  2339. struct ether_header *eh;
  2340. u_short etype;
  2341. eh = mtod(m, struct ether_header *);
  2342. etype = ntohs(eh->ether_type);
  2343. /* Tap off LACP control messages */
  2344. if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
  2345. m = lacp_input(lp, m);
  2346. if (m == NULL)
  2347. return (NULL);
  2348. }
  2349. /*
  2350. * If the port is not collecting or not in the active aggregator then
  2351. * free and return.
  2352. */
  2353. if (!lacp_iscollecting(lp) || !lacp_isactive(lp)) {
  2354. m_freem(m);
  2355. return (NULL);
  2356. }
  2357. m->m_pkthdr.rcvif = ifp;
  2358. return (m);
  2359. }
  2360. /* Default input */
  2361. static struct mbuf *
  2362. lagg_default_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
  2363. {
  2364. struct ifnet *ifp = sc->sc_ifp;
  2365. /* Just pass in the packet to our lagg device */
  2366. m->m_pkthdr.rcvif = ifp;
  2367. return (m);
  2368. }