ieee8023ad_lacp.c 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216
  1. /* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */
  2. /*-
  3. * SPDX-License-Identifier: BSD-2-Clause
  4. *
  5. * Copyright (c)2005 YAMAMOTO Takashi,
  6. * Copyright (c)2008 Andrew Thompson <thompsa@FreeBSD.org>
  7. * All rights reserved.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions
  11. * are met:
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in the
  16. * documentation and/or other materials provided with the distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  19. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  22. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  24. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  25. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  26. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  27. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  28. * SUCH DAMAGE.
  29. */
  30. #include <sys/cdefs.h>
  31. #include "opt_kern_tls.h"
  32. #include "opt_ratelimit.h"
  33. #include <sys/param.h>
  34. #include <sys/callout.h>
  35. #include <sys/eventhandler.h>
  36. #include <sys/mbuf.h>
  37. #include <sys/systm.h>
  38. #include <sys/malloc.h>
  39. #include <sys/kernel.h> /* hz */
  40. #include <sys/socket.h> /* for net/if.h */
  41. #include <sys/sockio.h>
  42. #include <sys/sysctl.h>
  43. #include <machine/stdarg.h>
  44. #include <sys/lock.h>
  45. #include <sys/rwlock.h>
  46. #include <sys/taskqueue.h>
  47. #include <sys/time.h>
  48. #include <net/if.h>
  49. #include <net/if_var.h>
  50. #include <net/if_private.h>
  51. #include <net/if_dl.h>
  52. #include <net/ethernet.h>
  53. #include <net/infiniband.h>
  54. #include <net/if_media.h>
  55. #include <net/if_types.h>
  56. #include <net/if_lagg.h>
  57. #include <net/ieee8023ad_lacp.h>
  58. /*
  59. * actor system priority and port priority.
  60. * XXX should be configurable.
  61. */
  62. #define LACP_SYSTEM_PRIO 0x8000
  63. #define LACP_PORT_PRIO 0x8000
  64. const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
  65. { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
  66. static const struct tlv_template lacp_info_tlv_template[] = {
  67. { LACP_TYPE_ACTORINFO,
  68. sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
  69. { LACP_TYPE_PARTNERINFO,
  70. sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
  71. { LACP_TYPE_COLLECTORINFO,
  72. sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
  73. { 0, 0 },
  74. };
  75. static const struct tlv_template marker_info_tlv_template[] = {
  76. { MARKER_TYPE_INFO,
  77. sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
  78. { 0, 0 },
  79. };
  80. static const struct tlv_template marker_response_tlv_template[] = {
  81. { MARKER_TYPE_RESPONSE,
  82. sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
  83. { 0, 0 },
  84. };
  85. typedef void (*lacp_timer_func_t)(struct lacp_port *);
  86. static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
  87. static void lacp_fill_markerinfo(struct lacp_port *,
  88. struct lacp_markerinfo *);
  89. static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *);
  90. static void lacp_suppress_distributing(struct lacp_softc *,
  91. struct lacp_aggregator *);
  92. static void lacp_transit_expire(void *);
  93. static void lacp_update_portmap(struct lacp_softc *);
  94. static void lacp_select_active_aggregator(struct lacp_softc *);
  95. static uint16_t lacp_compose_key(struct lacp_port *);
  96. static int tlv_check(const void *, size_t, const struct tlvhdr *,
  97. const struct tlv_template *, boolean_t);
  98. static void lacp_tick(void *);
  99. static void lacp_fill_aggregator_id(struct lacp_aggregator *,
  100. const struct lacp_port *);
  101. static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
  102. const struct lacp_peerinfo *);
  103. static bool lacp_aggregator_is_compatible(const struct lacp_aggregator *,
  104. const struct lacp_port *);
  105. static bool lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
  106. const struct lacp_peerinfo *);
  107. static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
  108. struct lacp_port *);
  109. static void lacp_aggregator_addref(struct lacp_softc *,
  110. struct lacp_aggregator *);
  111. static void lacp_aggregator_delref(struct lacp_softc *,
  112. struct lacp_aggregator *);
  113. /* receive machine */
  114. static int lacp_pdu_input(struct lacp_port *, struct mbuf *);
  115. static int lacp_marker_input(struct lacp_port *, struct mbuf *);
  116. static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
  117. static void lacp_sm_rx_timer(struct lacp_port *);
  118. static void lacp_sm_rx_set_expired(struct lacp_port *);
  119. static void lacp_sm_rx_update_ntt(struct lacp_port *,
  120. const struct lacpdu *);
  121. static void lacp_sm_rx_record_pdu(struct lacp_port *,
  122. const struct lacpdu *);
  123. static void lacp_sm_rx_update_selected(struct lacp_port *,
  124. const struct lacpdu *);
  125. static void lacp_sm_rx_record_default(struct lacp_port *);
  126. static void lacp_sm_rx_update_default_selected(struct lacp_port *);
  127. static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
  128. const struct lacp_peerinfo *);
  129. /* mux machine */
  130. static void lacp_sm_mux(struct lacp_port *);
  131. static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
  132. static void lacp_sm_mux_timer(struct lacp_port *);
  133. /* periodic transmit machine */
  134. static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
  135. static void lacp_sm_ptx_tx_schedule(struct lacp_port *);
  136. static void lacp_sm_ptx_timer(struct lacp_port *);
  137. /* transmit machine */
  138. static void lacp_sm_tx(struct lacp_port *);
  139. static void lacp_sm_assert_ntt(struct lacp_port *);
  140. static void lacp_run_timers(struct lacp_port *);
  141. static int lacp_compare_peerinfo(const struct lacp_peerinfo *,
  142. const struct lacp_peerinfo *);
  143. static int lacp_compare_systemid(const struct lacp_systemid *,
  144. const struct lacp_systemid *);
  145. static void lacp_port_enable(struct lacp_port *);
  146. static void lacp_port_disable(struct lacp_port *);
  147. static void lacp_select(struct lacp_port *);
  148. static void lacp_unselect(struct lacp_port *);
  149. static void lacp_disable_collecting(struct lacp_port *);
  150. static void lacp_enable_collecting(struct lacp_port *);
  151. static void lacp_disable_distributing(struct lacp_port *);
  152. static void lacp_enable_distributing(struct lacp_port *);
  153. static int lacp_xmit_lacpdu(struct lacp_port *);
  154. static int lacp_xmit_marker(struct lacp_port *);
  155. /* Debugging */
  156. static void lacp_dump_lacpdu(const struct lacpdu *);
  157. static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
  158. size_t);
  159. static const char *lacp_format_lagid(const struct lacp_peerinfo *,
  160. const struct lacp_peerinfo *, char *, size_t);
  161. static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
  162. char *, size_t);
  163. static const char *lacp_format_state(uint8_t, char *, size_t);
  164. static const char *lacp_format_mac(const uint8_t *, char *, size_t);
  165. static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
  166. size_t);
  167. static const char *lacp_format_portid(const struct lacp_portid *, char *,
  168. size_t);
  169. static void lacp_dprintf(const struct lacp_port *, const char *, ...)
  170. __attribute__((__format__(__printf__, 2, 3)));
  171. VNET_DEFINE_STATIC(int, lacp_debug);
  172. #define V_lacp_debug VNET(lacp_debug)
  173. SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  174. "ieee802.3ad");
  175. SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET,
  176. &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)");
  177. VNET_DEFINE_STATIC(int, lacp_default_strict_mode) = 1;
  178. SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode,
  179. CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(lacp_default_strict_mode), 0,
  180. "LACP strict protocol compliance default");
  181. #define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; }
  182. #define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); }
  183. #define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; }
  184. /*
  185. * partner administration variables.
  186. * XXX should be configurable.
  187. */
  188. static const struct lacp_peerinfo lacp_partner_admin_optimistic = {
  189. .lip_systemid = { .lsi_prio = 0xffff },
  190. .lip_portid = { .lpi_prio = 0xffff },
  191. .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
  192. LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
  193. };
  194. static const struct lacp_peerinfo lacp_partner_admin_strict = {
  195. .lip_systemid = { .lsi_prio = 0xffff },
  196. .lip_portid = { .lpi_prio = 0xffff },
  197. .lip_state = 0,
  198. };
  199. static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
  200. [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
  201. [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
  202. [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
  203. };
  204. struct mbuf *
  205. lacp_input(struct lagg_port *lgp, struct mbuf *m)
  206. {
  207. struct lacp_port *lp = LACP_PORT(lgp);
  208. uint8_t subtype;
  209. if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) {
  210. m_freem(m);
  211. return (NULL);
  212. }
  213. m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype);
  214. switch (subtype) {
  215. case SLOWPROTOCOLS_SUBTYPE_LACP:
  216. lacp_pdu_input(lp, m);
  217. return (NULL);
  218. case SLOWPROTOCOLS_SUBTYPE_MARKER:
  219. lacp_marker_input(lp, m);
  220. return (NULL);
  221. }
  222. /* Not a subtype we are interested in */
  223. return (m);
  224. }
  225. /*
  226. * lacp_pdu_input: process lacpdu
  227. */
  228. static int
  229. lacp_pdu_input(struct lacp_port *lp, struct mbuf *m)
  230. {
  231. struct lacp_softc *lsc = lp->lp_lsc;
  232. struct lacpdu *du;
  233. int error = 0;
  234. if (m->m_pkthdr.len != sizeof(*du)) {
  235. goto bad;
  236. }
  237. if ((m->m_flags & M_MCAST) == 0) {
  238. goto bad;
  239. }
  240. if (m->m_len < sizeof(*du)) {
  241. m = m_pullup(m, sizeof(*du));
  242. if (m == NULL) {
  243. return (ENOMEM);
  244. }
  245. }
  246. du = mtod(m, struct lacpdu *);
  247. if (memcmp(&du->ldu_eh.ether_dhost,
  248. &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
  249. goto bad;
  250. }
  251. /*
  252. * ignore the version for compatibility with
  253. * the future protocol revisions.
  254. */
  255. #if 0
  256. if (du->ldu_sph.sph_version != 1) {
  257. goto bad;
  258. }
  259. #endif
  260. /*
  261. * ignore tlv types for compatibility with
  262. * the future protocol revisions.
  263. */
  264. if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
  265. lacp_info_tlv_template, FALSE)) {
  266. goto bad;
  267. }
  268. if (V_lacp_debug > 0) {
  269. lacp_dprintf(lp, "lacpdu receive\n");
  270. lacp_dump_lacpdu(du);
  271. }
  272. if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) {
  273. LACP_TPRINTF((lp, "Dropping RX PDU\n"));
  274. goto bad;
  275. }
  276. LACP_LOCK(lsc);
  277. lacp_sm_rx(lp, du);
  278. LACP_UNLOCK(lsc);
  279. m_freem(m);
  280. return (error);
  281. bad:
  282. m_freem(m);
  283. return (EINVAL);
  284. }
  285. static void
  286. lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
  287. {
  288. struct lagg_port *lgp = lp->lp_lagg;
  289. struct lagg_softc *sc = lgp->lp_softc;
  290. info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
  291. memcpy(&info->lip_systemid.lsi_mac,
  292. IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
  293. info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
  294. info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
  295. info->lip_state = lp->lp_state;
  296. }
  297. static void
  298. lacp_fill_markerinfo(struct lacp_port *lp, struct lacp_markerinfo *info)
  299. {
  300. struct ifnet *ifp = lp->lp_ifp;
  301. /* Fill in the port index and system id (encoded as the MAC) */
  302. info->mi_rq_port = htons(ifp->if_index);
  303. memcpy(&info->mi_rq_system, lp->lp_systemid.lsi_mac, ETHER_ADDR_LEN);
  304. info->mi_rq_xid = htonl(0);
  305. }
  306. static int
  307. lacp_xmit_lacpdu(struct lacp_port *lp)
  308. {
  309. struct lagg_port *lgp = lp->lp_lagg;
  310. struct mbuf *m;
  311. struct lacpdu *du;
  312. int error;
  313. LACP_LOCK_ASSERT(lp->lp_lsc);
  314. m = m_gethdr(M_NOWAIT, MT_DATA);
  315. if (m == NULL) {
  316. return (ENOMEM);
  317. }
  318. m->m_len = m->m_pkthdr.len = sizeof(*du);
  319. du = mtod(m, struct lacpdu *);
  320. memset(du, 0, sizeof(*du));
  321. memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
  322. ETHER_ADDR_LEN);
  323. memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
  324. du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
  325. du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
  326. du->ldu_sph.sph_version = 1;
  327. TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
  328. du->ldu_actor = lp->lp_actor;
  329. TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
  330. sizeof(du->ldu_partner));
  331. du->ldu_partner = lp->lp_partner;
  332. TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
  333. sizeof(du->ldu_collector));
  334. du->ldu_collector.lci_maxdelay = 0;
  335. if (V_lacp_debug > 0) {
  336. lacp_dprintf(lp, "lacpdu transmit\n");
  337. lacp_dump_lacpdu(du);
  338. }
  339. m->m_flags |= M_MCAST;
  340. /*
  341. * XXX should use higher priority queue.
  342. * otherwise network congestion can break aggregation.
  343. */
  344. error = lagg_enqueue(lp->lp_ifp, m);
  345. return (error);
  346. }
  347. static int
  348. lacp_xmit_marker(struct lacp_port *lp)
  349. {
  350. struct lagg_port *lgp = lp->lp_lagg;
  351. struct mbuf *m;
  352. struct markerdu *mdu;
  353. int error;
  354. LACP_LOCK_ASSERT(lp->lp_lsc);
  355. m = m_gethdr(M_NOWAIT, MT_DATA);
  356. if (m == NULL) {
  357. return (ENOMEM);
  358. }
  359. m->m_len = m->m_pkthdr.len = sizeof(*mdu);
  360. mdu = mtod(m, struct markerdu *);
  361. memset(mdu, 0, sizeof(*mdu));
  362. memcpy(&mdu->mdu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
  363. ETHER_ADDR_LEN);
  364. memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
  365. mdu->mdu_eh.ether_type = htons(ETHERTYPE_SLOW);
  366. mdu->mdu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_MARKER;
  367. mdu->mdu_sph.sph_version = 1;
  368. /* Bump the transaction id and copy over the marker info */
  369. lp->lp_marker.mi_rq_xid = htonl(ntohl(lp->lp_marker.mi_rq_xid) + 1);
  370. TLV_SET(&mdu->mdu_tlv, MARKER_TYPE_INFO, sizeof(mdu->mdu_info));
  371. mdu->mdu_info = lp->lp_marker;
  372. LACP_DPRINTF((lp, "marker transmit, port=%u, sys=%6D, id=%u\n",
  373. ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":",
  374. ntohl(mdu->mdu_info.mi_rq_xid)));
  375. m->m_flags |= M_MCAST;
  376. error = lagg_enqueue(lp->lp_ifp, m);
  377. return (error);
  378. }
  379. void
  380. lacp_linkstate(struct lagg_port *lgp)
  381. {
  382. struct lacp_port *lp = LACP_PORT(lgp);
  383. struct lacp_softc *lsc = lp->lp_lsc;
  384. struct ifnet *ifp = lgp->lp_ifp;
  385. struct ifmediareq ifmr;
  386. int error = 0;
  387. u_int media;
  388. uint8_t old_state;
  389. uint16_t old_key;
  390. bzero((char *)&ifmr, sizeof(ifmr));
  391. error = (*ifp->if_ioctl)(ifp, SIOCGIFXMEDIA, (caddr_t)&ifmr);
  392. if (error != 0) {
  393. bzero((char *)&ifmr, sizeof(ifmr));
  394. error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
  395. }
  396. if (error != 0)
  397. return;
  398. LACP_LOCK(lsc);
  399. media = ifmr.ifm_active;
  400. LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, "
  401. "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER,
  402. (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP));
  403. old_state = lp->lp_state;
  404. old_key = lp->lp_key;
  405. lp->lp_media = media;
  406. /*
  407. * If the port is not an active full duplex Ethernet link then it can
  408. * not be aggregated.
  409. */
  410. if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 ||
  411. ifp->if_link_state != LINK_STATE_UP) {
  412. lacp_port_disable(lp);
  413. } else {
  414. lacp_port_enable(lp);
  415. }
  416. lp->lp_key = lacp_compose_key(lp);
  417. if (old_state != lp->lp_state || old_key != lp->lp_key) {
  418. LACP_DPRINTF((lp, "-> UNSELECTED\n"));
  419. lp->lp_selected = LACP_UNSELECTED;
  420. }
  421. LACP_UNLOCK(lsc);
  422. }
  423. static void
  424. lacp_tick(void *arg)
  425. {
  426. struct lacp_softc *lsc = arg;
  427. struct lacp_port *lp;
  428. LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
  429. if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
  430. continue;
  431. CURVNET_SET(lp->lp_ifp->if_vnet);
  432. lacp_run_timers(lp);
  433. lacp_select(lp);
  434. lacp_sm_mux(lp);
  435. lacp_sm_tx(lp);
  436. lacp_sm_ptx_tx_schedule(lp);
  437. CURVNET_RESTORE();
  438. }
  439. callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
  440. }
  441. int
  442. lacp_port_create(struct lagg_port *lgp)
  443. {
  444. struct lagg_softc *sc = lgp->lp_softc;
  445. struct lacp_softc *lsc = LACP_SOFTC(sc);
  446. struct lacp_port *lp;
  447. struct ifnet *ifp = lgp->lp_ifp;
  448. struct sockaddr_dl sdl;
  449. struct ifmultiaddr *rifma = NULL;
  450. int error;
  451. link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER);
  452. sdl.sdl_alen = ETHER_ADDR_LEN;
  453. bcopy(&ethermulticastaddr_slowprotocols,
  454. LLADDR(&sdl), ETHER_ADDR_LEN);
  455. error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
  456. if (error) {
  457. printf("%s: ADDMULTI failed on %s\n", __func__,
  458. lgp->lp_ifp->if_xname);
  459. return (error);
  460. }
  461. lp = malloc(sizeof(struct lacp_port),
  462. M_DEVBUF, M_NOWAIT|M_ZERO);
  463. if (lp == NULL)
  464. return (ENOMEM);
  465. LACP_LOCK(lsc);
  466. lgp->lp_psc = lp;
  467. lp->lp_ifp = ifp;
  468. lp->lp_lagg = lgp;
  469. lp->lp_lsc = lsc;
  470. lp->lp_ifma = rifma;
  471. LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
  472. lacp_fill_actorinfo(lp, &lp->lp_actor);
  473. lacp_fill_markerinfo(lp, &lp->lp_marker);
  474. lp->lp_state = LACP_STATE_ACTIVITY;
  475. lp->lp_aggregator = NULL;
  476. lacp_sm_rx_set_expired(lp);
  477. LACP_UNLOCK(lsc);
  478. lacp_linkstate(lgp);
  479. return (0);
  480. }
  481. void
  482. lacp_port_destroy(struct lagg_port *lgp)
  483. {
  484. struct lacp_port *lp = LACP_PORT(lgp);
  485. struct lacp_softc *lsc = lp->lp_lsc;
  486. int i;
  487. LACP_LOCK(lsc);
  488. for (i = 0; i < LACP_NTIMER; i++) {
  489. LACP_TIMER_DISARM(lp, i);
  490. }
  491. lacp_disable_collecting(lp);
  492. lacp_disable_distributing(lp);
  493. lacp_unselect(lp);
  494. LIST_REMOVE(lp, lp_next);
  495. LACP_UNLOCK(lsc);
  496. /* The address may have already been removed by if_purgemaddrs() */
  497. if (!lgp->lp_detaching)
  498. if_delmulti_ifma(lp->lp_ifma);
  499. free(lp, M_DEVBUF);
  500. }
  501. void
  502. lacp_req(struct lagg_softc *sc, void *data)
  503. {
  504. struct lacp_opreq *req = (struct lacp_opreq *)data;
  505. struct lacp_softc *lsc = LACP_SOFTC(sc);
  506. struct lacp_aggregator *la;
  507. bzero(req, sizeof(struct lacp_opreq));
  508. /*
  509. * If the LACP softc is NULL, return with the opreq structure full of
  510. * zeros. It is normal for the softc to be NULL while the lagg is
  511. * being destroyed.
  512. */
  513. if (NULL == lsc)
  514. return;
  515. la = lsc->lsc_active_aggregator;
  516. LACP_LOCK(lsc);
  517. if (la != NULL) {
  518. req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio);
  519. memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac,
  520. ETHER_ADDR_LEN);
  521. req->actor_key = ntohs(la->la_actor.lip_key);
  522. req->actor_portprio = ntohs(la->la_actor.lip_portid.lpi_prio);
  523. req->actor_portno = ntohs(la->la_actor.lip_portid.lpi_portno);
  524. req->actor_state = la->la_actor.lip_state;
  525. req->partner_prio = ntohs(la->la_partner.lip_systemid.lsi_prio);
  526. memcpy(&req->partner_mac, &la->la_partner.lip_systemid.lsi_mac,
  527. ETHER_ADDR_LEN);
  528. req->partner_key = ntohs(la->la_partner.lip_key);
  529. req->partner_portprio = ntohs(la->la_partner.lip_portid.lpi_prio);
  530. req->partner_portno = ntohs(la->la_partner.lip_portid.lpi_portno);
  531. req->partner_state = la->la_partner.lip_state;
  532. }
  533. LACP_UNLOCK(lsc);
  534. }
  535. void
  536. lacp_portreq(struct lagg_port *lgp, void *data)
  537. {
  538. struct lacp_opreq *req = (struct lacp_opreq *)data;
  539. struct lacp_port *lp = LACP_PORT(lgp);
  540. struct lacp_softc *lsc = lp->lp_lsc;
  541. LACP_LOCK(lsc);
  542. req->actor_prio = ntohs(lp->lp_actor.lip_systemid.lsi_prio);
  543. memcpy(&req->actor_mac, &lp->lp_actor.lip_systemid.lsi_mac,
  544. ETHER_ADDR_LEN);
  545. req->actor_key = ntohs(lp->lp_actor.lip_key);
  546. req->actor_portprio = ntohs(lp->lp_actor.lip_portid.lpi_prio);
  547. req->actor_portno = ntohs(lp->lp_actor.lip_portid.lpi_portno);
  548. req->actor_state = lp->lp_actor.lip_state;
  549. req->partner_prio = ntohs(lp->lp_partner.lip_systemid.lsi_prio);
  550. memcpy(&req->partner_mac, &lp->lp_partner.lip_systemid.lsi_mac,
  551. ETHER_ADDR_LEN);
  552. req->partner_key = ntohs(lp->lp_partner.lip_key);
  553. req->partner_portprio = ntohs(lp->lp_partner.lip_portid.lpi_prio);
  554. req->partner_portno = ntohs(lp->lp_partner.lip_portid.lpi_portno);
  555. req->partner_state = lp->lp_partner.lip_state;
  556. LACP_UNLOCK(lsc);
  557. }
  558. static void
  559. lacp_disable_collecting(struct lacp_port *lp)
  560. {
  561. LACP_DPRINTF((lp, "collecting disabled\n"));
  562. lp->lp_state &= ~LACP_STATE_COLLECTING;
  563. }
  564. static void
  565. lacp_enable_collecting(struct lacp_port *lp)
  566. {
  567. LACP_DPRINTF((lp, "collecting enabled\n"));
  568. lp->lp_state |= LACP_STATE_COLLECTING;
  569. }
  570. static void
  571. lacp_disable_distributing(struct lacp_port *lp)
  572. {
  573. struct lacp_aggregator *la = lp->lp_aggregator;
  574. struct lacp_softc *lsc = lp->lp_lsc;
  575. struct lagg_softc *sc = lsc->lsc_softc;
  576. char buf[LACP_LAGIDSTR_MAX+1];
  577. LACP_LOCK_ASSERT(lsc);
  578. if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
  579. return;
  580. }
  581. KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
  582. KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
  583. KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
  584. LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
  585. "nports %d -> %d\n",
  586. lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
  587. la->la_nports, la->la_nports - 1));
  588. TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
  589. la->la_nports--;
  590. sc->sc_active = la->la_nports;
  591. if (lsc->lsc_active_aggregator == la) {
  592. lacp_suppress_distributing(lsc, la);
  593. lacp_select_active_aggregator(lsc);
  594. /* regenerate the port map, the active aggregator has changed */
  595. lacp_update_portmap(lsc);
  596. }
  597. lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
  598. if_link_state_change(sc->sc_ifp,
  599. sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN);
  600. }
  601. static void
  602. lacp_enable_distributing(struct lacp_port *lp)
  603. {
  604. struct lacp_aggregator *la = lp->lp_aggregator;
  605. struct lacp_softc *lsc = lp->lp_lsc;
  606. struct lagg_softc *sc = lsc->lsc_softc;
  607. char buf[LACP_LAGIDSTR_MAX+1];
  608. LACP_LOCK_ASSERT(lsc);
  609. if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
  610. return;
  611. }
  612. LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
  613. "nports %d -> %d\n",
  614. lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
  615. la->la_nports, la->la_nports + 1));
  616. KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
  617. TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
  618. la->la_nports++;
  619. sc->sc_active = la->la_nports;
  620. lp->lp_state |= LACP_STATE_DISTRIBUTING;
  621. if (lsc->lsc_active_aggregator == la) {
  622. lacp_suppress_distributing(lsc, la);
  623. lacp_update_portmap(lsc);
  624. } else
  625. /* try to become the active aggregator */
  626. lacp_select_active_aggregator(lsc);
  627. if_link_state_change(sc->sc_ifp,
  628. sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN);
  629. }
  630. static void
  631. lacp_transit_expire(void *vp)
  632. {
  633. struct lacp_softc *lsc = vp;
  634. LACP_LOCK_ASSERT(lsc);
  635. CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet);
  636. LACP_TRACE(NULL);
  637. CURVNET_RESTORE();
  638. lsc->lsc_suppress_distributing = FALSE;
  639. }
  640. void
  641. lacp_attach(struct lagg_softc *sc)
  642. {
  643. struct lacp_softc *lsc;
  644. lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO);
  645. sc->sc_psc = lsc;
  646. lsc->lsc_softc = sc;
  647. lsc->lsc_hashkey = m_ether_tcpip_hash_init();
  648. lsc->lsc_active_aggregator = NULL;
  649. lsc->lsc_strict_mode = VNET(lacp_default_strict_mode);
  650. LACP_LOCK_INIT(lsc);
  651. TAILQ_INIT(&lsc->lsc_aggregators);
  652. LIST_INIT(&lsc->lsc_ports);
  653. callout_init_mtx(&lsc->lsc_transit_callout, &lsc->lsc_mtx, 0);
  654. callout_init_mtx(&lsc->lsc_callout, &lsc->lsc_mtx, 0);
  655. /* if the lagg is already up then do the same */
  656. if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
  657. lacp_init(sc);
  658. }
  659. void
  660. lacp_detach(void *psc)
  661. {
  662. struct lacp_softc *lsc = (struct lacp_softc *)psc;
  663. KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
  664. ("aggregators still active"));
  665. KASSERT(lsc->lsc_active_aggregator == NULL,
  666. ("aggregator still attached"));
  667. callout_drain(&lsc->lsc_transit_callout);
  668. callout_drain(&lsc->lsc_callout);
  669. LACP_LOCK_DESTROY(lsc);
  670. free(lsc, M_DEVBUF);
  671. }
  672. void
  673. lacp_init(struct lagg_softc *sc)
  674. {
  675. struct lacp_softc *lsc = LACP_SOFTC(sc);
  676. LACP_LOCK(lsc);
  677. callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
  678. LACP_UNLOCK(lsc);
  679. }
  680. void
  681. lacp_stop(struct lagg_softc *sc)
  682. {
  683. struct lacp_softc *lsc = LACP_SOFTC(sc);
  684. LACP_LOCK(lsc);
  685. callout_stop(&lsc->lsc_transit_callout);
  686. callout_stop(&lsc->lsc_callout);
  687. LACP_UNLOCK(lsc);
  688. }
  689. struct lagg_port *
  690. lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash,
  691. uint8_t numa_domain, int *err)
  692. {
  693. struct lacp_softc *lsc = LACP_SOFTC(sc);
  694. struct lacp_portmap *pm;
  695. struct lacp_port *lp;
  696. struct lacp_port **map;
  697. int count;
  698. if (__predict_false(lsc->lsc_suppress_distributing)) {
  699. LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
  700. *err = ENOBUFS;
  701. return (NULL);
  702. }
  703. pm = &lsc->lsc_pmap[lsc->lsc_activemap];
  704. if (pm->pm_count == 0) {
  705. LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
  706. *err = ENETDOWN;
  707. return (NULL);
  708. }
  709. #ifdef NUMA
  710. if ((sc->sc_opts & LAGG_OPT_USE_NUMA) &&
  711. pm->pm_num_dom > 1 && numa_domain < MAXMEMDOM) {
  712. count = pm->pm_numa[numa_domain].count;
  713. if (count > 0) {
  714. map = pm->pm_numa[numa_domain].map;
  715. } else {
  716. /* No ports on this domain; use global hash. */
  717. map = pm->pm_map;
  718. count = pm->pm_count;
  719. }
  720. } else
  721. #endif
  722. {
  723. map = pm->pm_map;
  724. count = pm->pm_count;
  725. }
  726. hash %= count;
  727. lp = map[hash];
  728. return (lp->lp_lagg);
  729. }
  730. struct lagg_port *
  731. lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m, int *err)
  732. {
  733. struct lacp_softc *lsc = LACP_SOFTC(sc);
  734. uint32_t hash;
  735. uint8_t numa_domain;
  736. if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
  737. M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
  738. hash = m->m_pkthdr.flowid >> sc->flowid_shift;
  739. else
  740. hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
  741. numa_domain = m->m_pkthdr.numa_domain;
  742. return (lacp_select_tx_port_by_hash(sc, hash, numa_domain, err));
  743. }
  744. /*
  745. * lacp_suppress_distributing: drop transmit packets for a while
  746. * to preserve packet ordering.
  747. */
  748. static void
  749. lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
  750. {
  751. struct lacp_port *lp;
  752. if (lsc->lsc_active_aggregator != la) {
  753. return;
  754. }
  755. LACP_TRACE(NULL);
  756. lsc->lsc_suppress_distributing = TRUE;
  757. /* send a marker frame down each port to verify the queues are empty */
  758. LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
  759. lp->lp_flags |= LACP_PORT_MARK;
  760. if (lacp_xmit_marker(lp) != 0)
  761. lp->lp_flags &= ~LACP_PORT_MARK;
  762. }
  763. /* set a timeout for the marker frames */
  764. callout_reset(&lsc->lsc_transit_callout,
  765. LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
  766. }
  767. static int
  768. lacp_compare_peerinfo(const struct lacp_peerinfo *a,
  769. const struct lacp_peerinfo *b)
  770. {
  771. return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
  772. }
  773. static int
  774. lacp_compare_systemid(const struct lacp_systemid *a,
  775. const struct lacp_systemid *b)
  776. {
  777. return (memcmp(a, b, sizeof(*a)));
  778. }
  779. #if 0 /* unused */
  780. static int
  781. lacp_compare_portid(const struct lacp_portid *a,
  782. const struct lacp_portid *b)
  783. {
  784. return (memcmp(a, b, sizeof(*a)));
  785. }
  786. #endif
  787. static uint64_t
  788. lacp_aggregator_bandwidth(struct lacp_aggregator *la)
  789. {
  790. struct lacp_port *lp;
  791. uint64_t speed;
  792. lp = TAILQ_FIRST(&la->la_ports);
  793. if (lp == NULL) {
  794. return (0);
  795. }
  796. speed = ifmedia_baudrate(lp->lp_media);
  797. speed *= la->la_nports;
  798. if (speed == 0) {
  799. LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
  800. lp->lp_media, la->la_nports));
  801. }
  802. return (speed);
  803. }
  804. /*
  805. * lacp_select_active_aggregator: select an aggregator to be used to transmit
  806. * packets from lagg(4) interface.
  807. */
  808. static void
  809. lacp_select_active_aggregator(struct lacp_softc *lsc)
  810. {
  811. struct lacp_aggregator *la;
  812. struct lacp_aggregator *best_la = NULL;
  813. uint64_t best_speed = 0;
  814. char buf[LACP_LAGIDSTR_MAX+1];
  815. LACP_TRACE(NULL);
  816. TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
  817. uint64_t speed;
  818. if (la->la_nports == 0) {
  819. continue;
  820. }
  821. speed = lacp_aggregator_bandwidth(la);
  822. LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
  823. lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
  824. speed, la->la_nports));
  825. /*
  826. * This aggregator is chosen if the partner has a better
  827. * system priority or, the total aggregated speed is higher
  828. * or, it is already the chosen aggregator
  829. */
  830. if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) <
  831. LACP_SYS_PRI(best_la->la_partner)) ||
  832. speed > best_speed ||
  833. (speed == best_speed &&
  834. la == lsc->lsc_active_aggregator)) {
  835. best_la = la;
  836. best_speed = speed;
  837. }
  838. }
  839. KASSERT(best_la == NULL || best_la->la_nports > 0,
  840. ("invalid aggregator refcnt"));
  841. KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
  842. ("invalid aggregator list"));
  843. if (lsc->lsc_active_aggregator != best_la) {
  844. LACP_DPRINTF((NULL, "active aggregator changed\n"));
  845. LACP_DPRINTF((NULL, "old %s\n",
  846. lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
  847. buf, sizeof(buf))));
  848. } else {
  849. LACP_DPRINTF((NULL, "active aggregator not changed\n"));
  850. }
  851. LACP_DPRINTF((NULL, "new %s\n",
  852. lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
  853. if (lsc->lsc_active_aggregator != best_la) {
  854. lsc->lsc_active_aggregator = best_la;
  855. lacp_update_portmap(lsc);
  856. if (best_la) {
  857. lacp_suppress_distributing(lsc, best_la);
  858. }
  859. }
  860. }
  861. /*
  862. * Updated the inactive portmap array with the new list of ports and
  863. * make it live.
  864. */
  865. static void
  866. lacp_update_portmap(struct lacp_softc *lsc)
  867. {
  868. struct lagg_softc *sc = lsc->lsc_softc;
  869. struct lacp_aggregator *la;
  870. struct lacp_portmap *p;
  871. struct lacp_port *lp;
  872. uint64_t speed;
  873. u_int newmap;
  874. int i;
  875. #ifdef NUMA
  876. int count;
  877. uint8_t domain;
  878. #endif
  879. newmap = lsc->lsc_activemap == 0 ? 1 : 0;
  880. p = &lsc->lsc_pmap[newmap];
  881. la = lsc->lsc_active_aggregator;
  882. speed = 0;
  883. bzero(p, sizeof(struct lacp_portmap));
  884. if (la != NULL && la->la_nports > 0) {
  885. p->pm_count = la->la_nports;
  886. i = 0;
  887. TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) {
  888. p->pm_map[i++] = lp;
  889. #ifdef NUMA
  890. domain = lp->lp_ifp->if_numa_domain;
  891. if (domain >= MAXMEMDOM)
  892. continue;
  893. count = p->pm_numa[domain].count;
  894. p->pm_numa[domain].map[count] = lp;
  895. p->pm_numa[domain].count++;
  896. #endif
  897. }
  898. KASSERT(i == p->pm_count, ("Invalid port count"));
  899. #ifdef NUMA
  900. for (i = 0; i < MAXMEMDOM; i++) {
  901. if (p->pm_numa[i].count != 0)
  902. p->pm_num_dom++;
  903. }
  904. #endif
  905. speed = lacp_aggregator_bandwidth(la);
  906. }
  907. sc->sc_ifp->if_baudrate = speed;
  908. EVENTHANDLER_INVOKE(ifnet_event, sc->sc_ifp,
  909. IFNET_EVENT_UPDATE_BAUDRATE);
  910. /* switch the active portmap over */
  911. atomic_store_rel_int(&lsc->lsc_activemap, newmap);
  912. LACP_DPRINTF((NULL, "Set table %d with %d ports\n",
  913. lsc->lsc_activemap,
  914. lsc->lsc_pmap[lsc->lsc_activemap].pm_count));
  915. }
  916. static uint16_t
  917. lacp_compose_key(struct lacp_port *lp)
  918. {
  919. struct lagg_port *lgp = lp->lp_lagg;
  920. struct lagg_softc *sc = lgp->lp_softc;
  921. u_int media = lp->lp_media;
  922. uint16_t key;
  923. if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) {
  924. /*
  925. * non-aggregatable links should have unique keys.
  926. *
  927. * XXX this isn't really unique as if_index is 16 bit.
  928. */
  929. /* bit 0..14: (some bits of) if_index of this port */
  930. key = lp->lp_ifp->if_index;
  931. /* bit 15: 1 */
  932. key |= 0x8000;
  933. } else {
  934. u_int subtype = IFM_SUBTYPE(media);
  935. KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type"));
  936. KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface"));
  937. /* bit 0..4: IFM_SUBTYPE modulo speed */
  938. switch (subtype) {
  939. case IFM_10_T:
  940. case IFM_10_2:
  941. case IFM_10_5:
  942. case IFM_10_STP:
  943. case IFM_10_FL:
  944. key = IFM_10_T;
  945. break;
  946. case IFM_100_TX:
  947. case IFM_100_FX:
  948. case IFM_100_T4:
  949. case IFM_100_VG:
  950. case IFM_100_T2:
  951. case IFM_100_T:
  952. case IFM_100_SGMII:
  953. key = IFM_100_TX;
  954. break;
  955. case IFM_1000_SX:
  956. case IFM_1000_LX:
  957. case IFM_1000_CX:
  958. case IFM_1000_T:
  959. case IFM_1000_KX:
  960. case IFM_1000_SGMII:
  961. case IFM_1000_CX_SGMII:
  962. key = IFM_1000_SX;
  963. break;
  964. case IFM_10G_LR:
  965. case IFM_10G_SR:
  966. case IFM_10G_CX4:
  967. case IFM_10G_TWINAX:
  968. case IFM_10G_TWINAX_LONG:
  969. case IFM_10G_LRM:
  970. case IFM_10G_T:
  971. case IFM_10G_KX4:
  972. case IFM_10G_KR:
  973. case IFM_10G_CR1:
  974. case IFM_10G_ER:
  975. case IFM_10G_SFI:
  976. case IFM_10G_AOC:
  977. key = IFM_10G_LR;
  978. break;
  979. case IFM_20G_KR2:
  980. key = IFM_20G_KR2;
  981. break;
  982. case IFM_2500_KX:
  983. case IFM_2500_T:
  984. case IFM_2500_X:
  985. key = IFM_2500_KX;
  986. break;
  987. case IFM_5000_T:
  988. case IFM_5000_KR:
  989. case IFM_5000_KR_S:
  990. case IFM_5000_KR1:
  991. key = IFM_5000_T;
  992. break;
  993. case IFM_50G_PCIE:
  994. case IFM_50G_CR2:
  995. case IFM_50G_KR2:
  996. case IFM_50G_KR4:
  997. case IFM_50G_SR2:
  998. case IFM_50G_LR2:
  999. case IFM_50G_LAUI2_AC:
  1000. case IFM_50G_LAUI2:
  1001. case IFM_50G_AUI2_AC:
  1002. case IFM_50G_AUI2:
  1003. case IFM_50G_CP:
  1004. case IFM_50G_SR:
  1005. case IFM_50G_LR:
  1006. case IFM_50G_FR:
  1007. case IFM_50G_KR_PAM4:
  1008. case IFM_50G_AUI1_AC:
  1009. case IFM_50G_AUI1:
  1010. key = IFM_50G_PCIE;
  1011. break;
  1012. case IFM_56G_R4:
  1013. key = IFM_56G_R4;
  1014. break;
  1015. case IFM_25G_PCIE:
  1016. case IFM_25G_CR:
  1017. case IFM_25G_KR:
  1018. case IFM_25G_SR:
  1019. case IFM_25G_LR:
  1020. case IFM_25G_ACC:
  1021. case IFM_25G_AOC:
  1022. case IFM_25G_T:
  1023. case IFM_25G_CR_S:
  1024. case IFM_25G_CR1:
  1025. case IFM_25G_KR_S:
  1026. case IFM_25G_AUI:
  1027. case IFM_25G_KR1:
  1028. key = IFM_25G_PCIE;
  1029. break;
  1030. case IFM_40G_CR4:
  1031. case IFM_40G_SR4:
  1032. case IFM_40G_LR4:
  1033. case IFM_40G_LM4:
  1034. case IFM_40G_XLPPI:
  1035. case IFM_40G_KR4:
  1036. case IFM_40G_XLAUI:
  1037. case IFM_40G_XLAUI_AC:
  1038. case IFM_40G_ER4:
  1039. key = IFM_40G_CR4;
  1040. break;
  1041. case IFM_100G_CR4:
  1042. case IFM_100G_SR4:
  1043. case IFM_100G_KR4:
  1044. case IFM_100G_LR4:
  1045. case IFM_100G_CAUI4_AC:
  1046. case IFM_100G_CAUI4:
  1047. case IFM_100G_AUI4_AC:
  1048. case IFM_100G_AUI4:
  1049. case IFM_100G_CR_PAM4:
  1050. case IFM_100G_KR_PAM4:
  1051. case IFM_100G_CP2:
  1052. case IFM_100G_SR2:
  1053. case IFM_100G_DR:
  1054. case IFM_100G_KR2_PAM4:
  1055. case IFM_100G_CAUI2_AC:
  1056. case IFM_100G_CAUI2:
  1057. case IFM_100G_AUI2_AC:
  1058. case IFM_100G_AUI2:
  1059. key = IFM_100G_CR4;
  1060. break;
  1061. case IFM_200G_CR4_PAM4:
  1062. case IFM_200G_SR4:
  1063. case IFM_200G_FR4:
  1064. case IFM_200G_LR4:
  1065. case IFM_200G_DR4:
  1066. case IFM_200G_KR4_PAM4:
  1067. case IFM_200G_AUI4_AC:
  1068. case IFM_200G_AUI4:
  1069. case IFM_200G_AUI8_AC:
  1070. case IFM_200G_AUI8:
  1071. key = IFM_200G_CR4_PAM4;
  1072. break;
  1073. case IFM_400G_FR8:
  1074. case IFM_400G_LR8:
  1075. case IFM_400G_DR4:
  1076. case IFM_400G_AUI8_AC:
  1077. case IFM_400G_AUI8:
  1078. key = IFM_400G_FR8;
  1079. break;
  1080. default:
  1081. key = subtype;
  1082. break;
  1083. }
  1084. /* bit 5..14: (some bits of) if_index of lagg device */
  1085. key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
  1086. /* bit 15: 0 */
  1087. }
  1088. return (htons(key));
  1089. }
  1090. static void
  1091. lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
  1092. {
  1093. char buf[LACP_LAGIDSTR_MAX+1];
  1094. LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
  1095. __func__,
  1096. lacp_format_lagid(&la->la_actor, &la->la_partner,
  1097. buf, sizeof(buf)),
  1098. la->la_refcnt, la->la_refcnt + 1));
  1099. KASSERT(la->la_refcnt > 0, ("refcount <= 0"));
  1100. la->la_refcnt++;
  1101. KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount"));
  1102. }
  1103. static void
  1104. lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
  1105. {
  1106. char buf[LACP_LAGIDSTR_MAX+1];
  1107. LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
  1108. __func__,
  1109. lacp_format_lagid(&la->la_actor, &la->la_partner,
  1110. buf, sizeof(buf)),
  1111. la->la_refcnt, la->la_refcnt - 1));
  1112. KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt"));
  1113. la->la_refcnt--;
  1114. if (la->la_refcnt > 0) {
  1115. return;
  1116. }
  1117. KASSERT(la->la_refcnt == 0, ("refcount not zero"));
  1118. KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active"));
  1119. TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q);
  1120. free(la, M_DEVBUF);
  1121. }
  1122. /*
  1123. * lacp_aggregator_get: allocate an aggregator.
  1124. */
  1125. static struct lacp_aggregator *
  1126. lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp)
  1127. {
  1128. struct lacp_aggregator *la;
  1129. la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT);
  1130. if (la) {
  1131. la->la_refcnt = 1;
  1132. la->la_nports = 0;
  1133. TAILQ_INIT(&la->la_ports);
  1134. la->la_pending = 0;
  1135. TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q);
  1136. }
  1137. return (la);
  1138. }
  1139. /*
  1140. * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port.
  1141. */
  1142. static void
  1143. lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp)
  1144. {
  1145. lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner);
  1146. lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor);
  1147. la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION;
  1148. }
  1149. static void
  1150. lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr,
  1151. const struct lacp_peerinfo *lpi_port)
  1152. {
  1153. memset(lpi_aggr, 0, sizeof(*lpi_aggr));
  1154. lpi_aggr->lip_systemid = lpi_port->lip_systemid;
  1155. lpi_aggr->lip_key = lpi_port->lip_key;
  1156. }
  1157. /*
  1158. * lacp_aggregator_is_compatible: check if a port can join to an aggregator.
  1159. */
  1160. static bool
  1161. lacp_aggregator_is_compatible(const struct lacp_aggregator *la,
  1162. const struct lacp_port *lp)
  1163. {
  1164. if (!(lp->lp_state & LACP_STATE_AGGREGATION) ||
  1165. !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) {
  1166. return (false);
  1167. }
  1168. if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION))
  1169. return (false);
  1170. if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner))
  1171. return (false);
  1172. if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor))
  1173. return (false);
  1174. return (true);
  1175. }
  1176. static bool
  1177. lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a,
  1178. const struct lacp_peerinfo *b)
  1179. {
  1180. if (memcmp(&a->lip_systemid, &b->lip_systemid,
  1181. sizeof(a->lip_systemid)) != 0) {
  1182. return (false);
  1183. }
  1184. if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key)) != 0)
  1185. return (false);
  1186. return (true);
  1187. }
  1188. static void
  1189. lacp_port_enable(struct lacp_port *lp)
  1190. {
  1191. lp->lp_state |= LACP_STATE_AGGREGATION;
  1192. }
  1193. static void
  1194. lacp_port_disable(struct lacp_port *lp)
  1195. {
  1196. lacp_set_mux(lp, LACP_MUX_DETACHED);
  1197. lp->lp_state &= ~LACP_STATE_AGGREGATION;
  1198. lp->lp_selected = LACP_UNSELECTED;
  1199. lacp_sm_rx_record_default(lp);
  1200. lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION;
  1201. lp->lp_state &= ~LACP_STATE_EXPIRED;
  1202. }
  1203. /*
  1204. * lacp_select: select an aggregator. create one if necessary.
  1205. */
  1206. static void
  1207. lacp_select(struct lacp_port *lp)
  1208. {
  1209. struct lacp_softc *lsc = lp->lp_lsc;
  1210. struct lacp_aggregator *la;
  1211. char buf[LACP_LAGIDSTR_MAX+1];
  1212. if (lp->lp_aggregator) {
  1213. return;
  1214. }
  1215. /* If we haven't heard from our peer, skip this step. */
  1216. if (lp->lp_state & LACP_STATE_DEFAULTED)
  1217. return;
  1218. KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
  1219. ("timer_wait_while still active"));
  1220. LACP_DPRINTF((lp, "port lagid=%s\n",
  1221. lacp_format_lagid(&lp->lp_actor, &lp->lp_partner,
  1222. buf, sizeof(buf))));
  1223. TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
  1224. if (lacp_aggregator_is_compatible(la, lp)) {
  1225. break;
  1226. }
  1227. }
  1228. if (la == NULL) {
  1229. la = lacp_aggregator_get(lsc, lp);
  1230. if (la == NULL) {
  1231. LACP_DPRINTF((lp, "aggregator creation failed\n"));
  1232. /*
  1233. * will retry on the next tick.
  1234. */
  1235. return;
  1236. }
  1237. lacp_fill_aggregator_id(la, lp);
  1238. LACP_DPRINTF((lp, "aggregator created\n"));
  1239. } else {
  1240. LACP_DPRINTF((lp, "compatible aggregator found\n"));
  1241. if (la->la_refcnt == LACP_MAX_PORTS)
  1242. return;
  1243. lacp_aggregator_addref(lsc, la);
  1244. }
  1245. LACP_DPRINTF((lp, "aggregator lagid=%s\n",
  1246. lacp_format_lagid(&la->la_actor, &la->la_partner,
  1247. buf, sizeof(buf))));
  1248. lp->lp_aggregator = la;
  1249. lp->lp_selected = LACP_SELECTED;
  1250. }
  1251. /*
  1252. * lacp_unselect: finish unselect/detach process.
  1253. */
  1254. static void
  1255. lacp_unselect(struct lacp_port *lp)
  1256. {
  1257. struct lacp_softc *lsc = lp->lp_lsc;
  1258. struct lacp_aggregator *la = lp->lp_aggregator;
  1259. KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
  1260. ("timer_wait_while still active"));
  1261. if (la == NULL) {
  1262. return;
  1263. }
  1264. lp->lp_aggregator = NULL;
  1265. lacp_aggregator_delref(lsc, la);
  1266. }
  1267. /* mux machine */
  1268. static void
  1269. lacp_sm_mux(struct lacp_port *lp)
  1270. {
  1271. struct lagg_port *lgp = lp->lp_lagg;
  1272. struct lagg_softc *sc = lgp->lp_softc;
  1273. enum lacp_mux_state new_state;
  1274. boolean_t p_sync =
  1275. (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
  1276. boolean_t p_collecting =
  1277. (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0;
  1278. enum lacp_selected selected = lp->lp_selected;
  1279. struct lacp_aggregator *la;
  1280. if (V_lacp_debug > 1)
  1281. lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, "
  1282. "p_sync= 0x%x, p_collecting= 0x%x\n", __func__,
  1283. lp->lp_mux_state, selected, p_sync, p_collecting);
  1284. re_eval:
  1285. la = lp->lp_aggregator;
  1286. KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL,
  1287. ("MUX not detached"));
  1288. new_state = lp->lp_mux_state;
  1289. switch (lp->lp_mux_state) {
  1290. case LACP_MUX_DETACHED:
  1291. if (selected != LACP_UNSELECTED) {
  1292. new_state = LACP_MUX_WAITING;
  1293. }
  1294. break;
  1295. case LACP_MUX_WAITING:
  1296. KASSERT(la->la_pending > 0 ||
  1297. !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
  1298. ("timer_wait_while still active"));
  1299. if (selected == LACP_SELECTED && la->la_pending == 0) {
  1300. new_state = LACP_MUX_ATTACHED;
  1301. } else if (selected == LACP_UNSELECTED) {
  1302. new_state = LACP_MUX_DETACHED;
  1303. }
  1304. break;
  1305. case LACP_MUX_ATTACHED:
  1306. if (selected == LACP_SELECTED && p_sync) {
  1307. new_state = LACP_MUX_COLLECTING;
  1308. } else if (selected != LACP_SELECTED) {
  1309. new_state = LACP_MUX_DETACHED;
  1310. }
  1311. break;
  1312. case LACP_MUX_COLLECTING:
  1313. if (selected == LACP_SELECTED && p_sync && p_collecting) {
  1314. new_state = LACP_MUX_DISTRIBUTING;
  1315. } else if (selected != LACP_SELECTED || !p_sync) {
  1316. new_state = LACP_MUX_ATTACHED;
  1317. }
  1318. break;
  1319. case LACP_MUX_DISTRIBUTING:
  1320. if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
  1321. new_state = LACP_MUX_COLLECTING;
  1322. lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n");
  1323. sc->sc_flapping++;
  1324. }
  1325. break;
  1326. default:
  1327. panic("%s: unknown state", __func__);
  1328. }
  1329. if (lp->lp_mux_state == new_state) {
  1330. return;
  1331. }
  1332. lacp_set_mux(lp, new_state);
  1333. goto re_eval;
  1334. }
  1335. static void
  1336. lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state)
  1337. {
  1338. struct lacp_aggregator *la = lp->lp_aggregator;
  1339. if (lp->lp_mux_state == new_state) {
  1340. return;
  1341. }
  1342. switch (new_state) {
  1343. case LACP_MUX_DETACHED:
  1344. lp->lp_state &= ~LACP_STATE_SYNC;
  1345. lacp_disable_distributing(lp);
  1346. lacp_disable_collecting(lp);
  1347. lacp_sm_assert_ntt(lp);
  1348. /* cancel timer */
  1349. if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) {
  1350. KASSERT(la->la_pending > 0,
  1351. ("timer_wait_while not active"));
  1352. la->la_pending--;
  1353. }
  1354. LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE);
  1355. lacp_unselect(lp);
  1356. break;
  1357. case LACP_MUX_WAITING:
  1358. LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE,
  1359. LACP_AGGREGATE_WAIT_TIME);
  1360. la->la_pending++;
  1361. break;
  1362. case LACP_MUX_ATTACHED:
  1363. lp->lp_state |= LACP_STATE_SYNC;
  1364. lacp_disable_collecting(lp);
  1365. lacp_sm_assert_ntt(lp);
  1366. break;
  1367. case LACP_MUX_COLLECTING:
  1368. lacp_enable_collecting(lp);
  1369. lacp_disable_distributing(lp);
  1370. lacp_sm_assert_ntt(lp);
  1371. break;
  1372. case LACP_MUX_DISTRIBUTING:
  1373. lacp_enable_distributing(lp);
  1374. break;
  1375. default:
  1376. panic("%s: unknown state", __func__);
  1377. }
  1378. LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state));
  1379. lp->lp_mux_state = new_state;
  1380. }
  1381. static void
  1382. lacp_sm_mux_timer(struct lacp_port *lp)
  1383. {
  1384. struct lacp_aggregator *la = lp->lp_aggregator;
  1385. char buf[LACP_LAGIDSTR_MAX+1];
  1386. KASSERT(la->la_pending > 0, ("no pending event"));
  1387. LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__,
  1388. lacp_format_lagid(&la->la_actor, &la->la_partner,
  1389. buf, sizeof(buf)),
  1390. la->la_pending, la->la_pending - 1));
  1391. la->la_pending--;
  1392. }
  1393. /* periodic transmit machine */
  1394. static void
  1395. lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate)
  1396. {
  1397. if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state,
  1398. LACP_STATE_TIMEOUT)) {
  1399. return;
  1400. }
  1401. LACP_DPRINTF((lp, "partner timeout changed\n"));
  1402. /*
  1403. * FAST_PERIODIC -> SLOW_PERIODIC
  1404. * or
  1405. * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC
  1406. *
  1407. * let lacp_sm_ptx_tx_schedule to update timeout.
  1408. */
  1409. LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
  1410. /*
  1411. * if timeout has been shortened, assert NTT.
  1412. */
  1413. if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) {
  1414. lacp_sm_assert_ntt(lp);
  1415. }
  1416. }
  1417. static void
  1418. lacp_sm_ptx_tx_schedule(struct lacp_port *lp)
  1419. {
  1420. int timeout;
  1421. if (!(lp->lp_state & LACP_STATE_ACTIVITY) &&
  1422. !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) {
  1423. /*
  1424. * NO_PERIODIC
  1425. */
  1426. LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
  1427. return;
  1428. }
  1429. if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) {
  1430. return;
  1431. }
  1432. timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ?
  1433. LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME;
  1434. LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout);
  1435. }
  1436. static void
  1437. lacp_sm_ptx_timer(struct lacp_port *lp)
  1438. {
  1439. lacp_sm_assert_ntt(lp);
  1440. }
  1441. static void
  1442. lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du)
  1443. {
  1444. int timeout;
  1445. /*
  1446. * check LACP_DISABLED first
  1447. */
  1448. if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
  1449. return;
  1450. }
  1451. /*
  1452. * check loopback condition.
  1453. */
  1454. if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid,
  1455. &lp->lp_actor.lip_systemid)) {
  1456. return;
  1457. }
  1458. /*
  1459. * EXPIRED, DEFAULTED, CURRENT -> CURRENT
  1460. */
  1461. microuptime(&lp->lp_last_lacpdu_rx);
  1462. lacp_sm_rx_update_selected(lp, du);
  1463. lacp_sm_rx_update_ntt(lp, du);
  1464. lacp_sm_rx_record_pdu(lp, du);
  1465. timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ?
  1466. LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME;
  1467. LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout);
  1468. lp->lp_state &= ~LACP_STATE_EXPIRED;
  1469. /*
  1470. * kick transmit machine without waiting the next tick.
  1471. */
  1472. lacp_sm_tx(lp);
  1473. }
  1474. static void
  1475. lacp_sm_rx_set_expired(struct lacp_port *lp)
  1476. {
  1477. lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
  1478. lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT;
  1479. LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME);
  1480. lp->lp_state |= LACP_STATE_EXPIRED;
  1481. }
  1482. static void
  1483. lacp_sm_rx_timer(struct lacp_port *lp)
  1484. {
  1485. if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) {
  1486. /* CURRENT -> EXPIRED */
  1487. LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__));
  1488. lacp_sm_rx_set_expired(lp);
  1489. } else {
  1490. /* EXPIRED -> DEFAULTED */
  1491. LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__));
  1492. lacp_sm_rx_update_default_selected(lp);
  1493. lacp_sm_rx_record_default(lp);
  1494. lp->lp_state &= ~LACP_STATE_EXPIRED;
  1495. }
  1496. }
  1497. static void
  1498. lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
  1499. {
  1500. boolean_t active;
  1501. uint8_t oldpstate;
  1502. char buf[LACP_STATESTR_MAX+1];
  1503. LACP_TRACE(lp);
  1504. oldpstate = lp->lp_partner.lip_state;
  1505. active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY)
  1506. || ((lp->lp_state & LACP_STATE_ACTIVITY) &&
  1507. (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY));
  1508. lp->lp_partner = du->ldu_actor;
  1509. if (active &&
  1510. ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
  1511. LACP_STATE_AGGREGATION) &&
  1512. !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner))
  1513. || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) {
  1514. /*
  1515. * XXX Maintain legacy behavior of leaving the
  1516. * LACP_STATE_SYNC bit unchanged from the partner's
  1517. * advertisement if lsc_strict_mode is false.
  1518. * TODO: We should re-examine the concept of the "strict mode"
  1519. * to ensure it makes sense to maintain a non-strict mode.
  1520. */
  1521. if (lp->lp_lsc->lsc_strict_mode)
  1522. lp->lp_partner.lip_state |= LACP_STATE_SYNC;
  1523. } else {
  1524. lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
  1525. }
  1526. lp->lp_state &= ~LACP_STATE_DEFAULTED;
  1527. if (oldpstate != lp->lp_partner.lip_state) {
  1528. LACP_DPRINTF((lp, "old pstate %s\n",
  1529. lacp_format_state(oldpstate, buf, sizeof(buf))));
  1530. LACP_DPRINTF((lp, "new pstate %s\n",
  1531. lacp_format_state(lp->lp_partner.lip_state, buf,
  1532. sizeof(buf))));
  1533. }
  1534. lacp_sm_ptx_update_timeout(lp, oldpstate);
  1535. }
  1536. static void
  1537. lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
  1538. {
  1539. LACP_TRACE(lp);
  1540. if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
  1541. !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
  1542. LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) {
  1543. LACP_DPRINTF((lp, "%s: assert ntt\n", __func__));
  1544. lacp_sm_assert_ntt(lp);
  1545. }
  1546. }
  1547. static void
  1548. lacp_sm_rx_record_default(struct lacp_port *lp)
  1549. {
  1550. uint8_t oldpstate;
  1551. LACP_TRACE(lp);
  1552. oldpstate = lp->lp_partner.lip_state;
  1553. if (lp->lp_lsc->lsc_strict_mode)
  1554. lp->lp_partner = lacp_partner_admin_strict;
  1555. else
  1556. lp->lp_partner = lacp_partner_admin_optimistic;
  1557. lp->lp_state |= LACP_STATE_DEFAULTED;
  1558. lacp_sm_ptx_update_timeout(lp, oldpstate);
  1559. }
  1560. static void
  1561. lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
  1562. const struct lacp_peerinfo *info)
  1563. {
  1564. LACP_TRACE(lp);
  1565. if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
  1566. !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
  1567. LACP_STATE_AGGREGATION)) {
  1568. lp->lp_selected = LACP_UNSELECTED;
  1569. /* mux machine will clean up lp->lp_aggregator */
  1570. }
  1571. }
  1572. static void
  1573. lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
  1574. {
  1575. LACP_TRACE(lp);
  1576. lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
  1577. }
  1578. static void
  1579. lacp_sm_rx_update_default_selected(struct lacp_port *lp)
  1580. {
  1581. LACP_TRACE(lp);
  1582. if (lp->lp_lsc->lsc_strict_mode)
  1583. lacp_sm_rx_update_selected_from_peerinfo(lp,
  1584. &lacp_partner_admin_strict);
  1585. else
  1586. lacp_sm_rx_update_selected_from_peerinfo(lp,
  1587. &lacp_partner_admin_optimistic);
  1588. }
  1589. /* transmit machine */
  1590. static void
  1591. lacp_sm_tx(struct lacp_port *lp)
  1592. {
  1593. int error = 0;
  1594. if (!(lp->lp_state & LACP_STATE_AGGREGATION)
  1595. #if 1
  1596. || (!(lp->lp_state & LACP_STATE_ACTIVITY)
  1597. && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY))
  1598. #endif
  1599. ) {
  1600. lp->lp_flags &= ~LACP_PORT_NTT;
  1601. }
  1602. if (!(lp->lp_flags & LACP_PORT_NTT)) {
  1603. return;
  1604. }
  1605. /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */
  1606. if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent,
  1607. (3 / LACP_FAST_PERIODIC_TIME)) == 0) {
  1608. LACP_DPRINTF((lp, "rate limited pdu\n"));
  1609. return;
  1610. }
  1611. if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) {
  1612. error = lacp_xmit_lacpdu(lp);
  1613. } else {
  1614. LACP_TPRINTF((lp, "Dropping TX PDU\n"));
  1615. }
  1616. if (error == 0) {
  1617. lp->lp_flags &= ~LACP_PORT_NTT;
  1618. } else {
  1619. LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n",
  1620. error));
  1621. }
  1622. }
  1623. static void
  1624. lacp_sm_assert_ntt(struct lacp_port *lp)
  1625. {
  1626. lp->lp_flags |= LACP_PORT_NTT;
  1627. }
  1628. static void
  1629. lacp_run_timers(struct lacp_port *lp)
  1630. {
  1631. int i;
  1632. struct timeval time_diff;
  1633. for (i = 0; i < LACP_NTIMER; i++) {
  1634. KASSERT(lp->lp_timer[i] >= 0,
  1635. ("invalid timer value %d", lp->lp_timer[i]));
  1636. if (lp->lp_timer[i] == 0) {
  1637. continue;
  1638. } else {
  1639. if (i == LACP_TIMER_CURRENT_WHILE) {
  1640. microuptime(&time_diff);
  1641. timevalsub(&time_diff, &lp->lp_last_lacpdu_rx);
  1642. if (time_diff.tv_sec) {
  1643. /* At least one sec has elapsed since last LACP packet. */
  1644. --lp->lp_timer[i];
  1645. }
  1646. } else {
  1647. --lp->lp_timer[i];
  1648. }
  1649. if ((lp->lp_timer[i] <= 0) && (lacp_timer_funcs[i])) {
  1650. (*lacp_timer_funcs[i])(lp);
  1651. }
  1652. }
  1653. }
  1654. }
  1655. int
  1656. lacp_marker_input(struct lacp_port *lp, struct mbuf *m)
  1657. {
  1658. struct lacp_softc *lsc = lp->lp_lsc;
  1659. struct lagg_port *lgp = lp->lp_lagg;
  1660. struct lacp_port *lp2;
  1661. struct markerdu *mdu;
  1662. int error = 0;
  1663. int pending = 0;
  1664. if (m->m_pkthdr.len != sizeof(*mdu)) {
  1665. goto bad;
  1666. }
  1667. if ((m->m_flags & M_MCAST) == 0) {
  1668. goto bad;
  1669. }
  1670. if (m->m_len < sizeof(*mdu)) {
  1671. m = m_pullup(m, sizeof(*mdu));
  1672. if (m == NULL) {
  1673. return (ENOMEM);
  1674. }
  1675. }
  1676. mdu = mtod(m, struct markerdu *);
  1677. if (memcmp(&mdu->mdu_eh.ether_dhost,
  1678. &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
  1679. goto bad;
  1680. }
  1681. if (mdu->mdu_sph.sph_version != 1) {
  1682. goto bad;
  1683. }
  1684. switch (mdu->mdu_tlv.tlv_type) {
  1685. case MARKER_TYPE_INFO:
  1686. if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
  1687. marker_info_tlv_template, TRUE)) {
  1688. goto bad;
  1689. }
  1690. mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE;
  1691. memcpy(&mdu->mdu_eh.ether_dhost,
  1692. &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN);
  1693. memcpy(&mdu->mdu_eh.ether_shost,
  1694. lgp->lp_lladdr, ETHER_ADDR_LEN);
  1695. error = lagg_enqueue(lp->lp_ifp, m);
  1696. break;
  1697. case MARKER_TYPE_RESPONSE:
  1698. if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
  1699. marker_response_tlv_template, TRUE)) {
  1700. goto bad;
  1701. }
  1702. LACP_DPRINTF((lp, "marker response, port=%u, sys=%6D, id=%u\n",
  1703. ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system,
  1704. ":", ntohl(mdu->mdu_info.mi_rq_xid)));
  1705. /* Verify that it is the last marker we sent out */
  1706. if (memcmp(&mdu->mdu_info, &lp->lp_marker,
  1707. sizeof(struct lacp_markerinfo)))
  1708. goto bad;
  1709. LACP_LOCK(lsc);
  1710. lp->lp_flags &= ~LACP_PORT_MARK;
  1711. if (lsc->lsc_suppress_distributing) {
  1712. /* Check if any ports are waiting for a response */
  1713. LIST_FOREACH(lp2, &lsc->lsc_ports, lp_next) {
  1714. if (lp2->lp_flags & LACP_PORT_MARK) {
  1715. pending = 1;
  1716. break;
  1717. }
  1718. }
  1719. if (pending == 0) {
  1720. /* All interface queues are clear */
  1721. LACP_DPRINTF((NULL, "queue flush complete\n"));
  1722. lsc->lsc_suppress_distributing = FALSE;
  1723. }
  1724. }
  1725. LACP_UNLOCK(lsc);
  1726. m_freem(m);
  1727. break;
  1728. default:
  1729. goto bad;
  1730. }
  1731. return (error);
  1732. bad:
  1733. LACP_DPRINTF((lp, "bad marker frame\n"));
  1734. m_freem(m);
  1735. return (EINVAL);
  1736. }
  1737. static int
  1738. tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
  1739. const struct tlv_template *tmpl, boolean_t check_type)
  1740. {
  1741. while (/* CONSTCOND */ 1) {
  1742. if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) {
  1743. return (EINVAL);
  1744. }
  1745. if ((check_type && tlv->tlv_type != tmpl->tmpl_type) ||
  1746. tlv->tlv_length != tmpl->tmpl_length) {
  1747. return (EINVAL);
  1748. }
  1749. if (tmpl->tmpl_type == 0) {
  1750. break;
  1751. }
  1752. tlv = (const struct tlvhdr *)
  1753. ((const char *)tlv + tlv->tlv_length);
  1754. tmpl++;
  1755. }
  1756. return (0);
  1757. }
  1758. /* Debugging */
  1759. const char *
  1760. lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
  1761. {
  1762. snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
  1763. (int)mac[0],
  1764. (int)mac[1],
  1765. (int)mac[2],
  1766. (int)mac[3],
  1767. (int)mac[4],
  1768. (int)mac[5]);
  1769. return (buf);
  1770. }
  1771. const char *
  1772. lacp_format_systemid(const struct lacp_systemid *sysid,
  1773. char *buf, size_t buflen)
  1774. {
  1775. char macbuf[LACP_MACSTR_MAX+1];
  1776. snprintf(buf, buflen, "%04X,%s",
  1777. ntohs(sysid->lsi_prio),
  1778. lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf)));
  1779. return (buf);
  1780. }
  1781. const char *
  1782. lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen)
  1783. {
  1784. snprintf(buf, buflen, "%04X,%04X",
  1785. ntohs(portid->lpi_prio),
  1786. ntohs(portid->lpi_portno));
  1787. return (buf);
  1788. }
  1789. const char *
  1790. lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen)
  1791. {
  1792. char sysid[LACP_SYSTEMIDSTR_MAX+1];
  1793. char portid[LACP_PORTIDSTR_MAX+1];
  1794. snprintf(buf, buflen, "(%s,%04X,%s)",
  1795. lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)),
  1796. ntohs(peer->lip_key),
  1797. lacp_format_portid(&peer->lip_portid, portid, sizeof(portid)));
  1798. return (buf);
  1799. }
  1800. const char *
  1801. lacp_format_lagid(const struct lacp_peerinfo *a,
  1802. const struct lacp_peerinfo *b, char *buf, size_t buflen)
  1803. {
  1804. char astr[LACP_PARTNERSTR_MAX+1];
  1805. char bstr[LACP_PARTNERSTR_MAX+1];
  1806. #if 0
  1807. /*
  1808. * there's a convention to display small numbered peer
  1809. * in the left.
  1810. */
  1811. if (lacp_compare_peerinfo(a, b) > 0) {
  1812. const struct lacp_peerinfo *t;
  1813. t = a;
  1814. a = b;
  1815. b = t;
  1816. }
  1817. #endif
  1818. snprintf(buf, buflen, "[%s,%s]",
  1819. lacp_format_partner(a, astr, sizeof(astr)),
  1820. lacp_format_partner(b, bstr, sizeof(bstr)));
  1821. return (buf);
  1822. }
  1823. const char *
  1824. lacp_format_lagid_aggregator(const struct lacp_aggregator *la,
  1825. char *buf, size_t buflen)
  1826. {
  1827. if (la == NULL) {
  1828. return ("(none)");
  1829. }
  1830. return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen));
  1831. }
  1832. const char *
  1833. lacp_format_state(uint8_t state, char *buf, size_t buflen)
  1834. {
  1835. snprintf(buf, buflen, "%b", state, LACP_STATE_BITS);
  1836. return (buf);
  1837. }
  1838. static void
  1839. lacp_dump_lacpdu(const struct lacpdu *du)
  1840. {
  1841. char buf[LACP_PARTNERSTR_MAX+1];
  1842. char buf2[LACP_STATESTR_MAX+1];
  1843. printf("actor=%s\n",
  1844. lacp_format_partner(&du->ldu_actor, buf, sizeof(buf)));
  1845. printf("actor.state=%s\n",
  1846. lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2)));
  1847. printf("partner=%s\n",
  1848. lacp_format_partner(&du->ldu_partner, buf, sizeof(buf)));
  1849. printf("partner.state=%s\n",
  1850. lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2)));
  1851. printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay));
  1852. }
  1853. static void
  1854. lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
  1855. {
  1856. va_list va;
  1857. if (lp) {
  1858. printf("%s: ", lp->lp_ifp->if_xname);
  1859. }
  1860. va_start(va, fmt);
  1861. vprintf(fmt, va);
  1862. va_end(va);
  1863. }