123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685 |
- /*-
- * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- #include "opt_inet.h"
- #include "opt_inet6.h"
- #include "opt_kbd.h"
- #include <sys/param.h>
- #include <sys/systm.h>
- #include <sys/devctl.h>
- #include <sys/eventhandler.h>
- #include <sys/kernel.h>
- #include <sys/mbuf.h>
- #include <sys/module.h>
- #include <sys/socket.h>
- #include <sys/sysctl.h>
- #ifdef KDB
- #include <sys/kdb.h>
- #endif
- #include <net/bpf.h>
- #include <net/ethernet.h>
- #include <net/infiniband.h>
- #include <net/if.h>
- #include <net/if_var.h>
- #include <net/if_private.h>
- #include <net/if_dl.h>
- #include <net/if_media.h>
- #include <net/if_lagg.h>
- #include <net/if_llatbl.h>
- #include <net/if_types.h>
- #include <net/netisr.h>
- #include <net/route.h>
- #include <netinet/if_ether.h>
- #include <netinet/in.h>
- #include <netinet/ip6.h>
- #include <netinet6/in6_var.h>
- #include <netinet6/nd6.h>
- #include <security/mac/mac_framework.h>
- /* if_lagg(4) support */
- struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
- #ifdef INET
- static inline void
- infiniband_ipv4_multicast_map(uint32_t addr,
- const uint8_t *broadcast, uint8_t *buf)
- {
- uint8_t scope;
- addr = ntohl(addr);
- scope = broadcast[5] & 0xF;
- buf[0] = 0;
- buf[1] = 0xff;
- buf[2] = 0xff;
- buf[3] = 0xff;
- buf[4] = 0xff;
- buf[5] = 0x10 | scope;
- buf[6] = 0x40;
- buf[7] = 0x1b;
- buf[8] = broadcast[8];
- buf[9] = broadcast[9];
- buf[10] = 0;
- buf[11] = 0;
- buf[12] = 0;
- buf[13] = 0;
- buf[14] = 0;
- buf[15] = 0;
- buf[16] = (addr >> 24) & 0xff;
- buf[17] = (addr >> 16) & 0xff;
- buf[18] = (addr >> 8) & 0xff;
- buf[19] = addr & 0xff;
- }
- #endif
- #ifdef INET6
- static inline void
- infiniband_ipv6_multicast_map(const struct in6_addr *addr,
- const uint8_t *broadcast, uint8_t *buf)
- {
- uint8_t scope;
- scope = broadcast[5] & 0xF;
- buf[0] = 0;
- buf[1] = 0xff;
- buf[2] = 0xff;
- buf[3] = 0xff;
- buf[4] = 0xff;
- buf[5] = 0x10 | scope;
- buf[6] = 0x60;
- buf[7] = 0x1b;
- buf[8] = broadcast[8];
- buf[9] = broadcast[9];
- memcpy(&buf[10], &addr->s6_addr[6], 10);
- }
- #endif
- /*
- * This is for clients that have an infiniband_header in the mbuf.
- */
- void
- infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
- {
- struct infiniband_header *ibh;
- struct ether_header eh;
- if (!bpf_peers_present(ifp->if_bpf))
- return;
- M_ASSERTVALID(mb);
- if (mb->m_len < sizeof(*ibh))
- return;
- ibh = mtod(mb, struct infiniband_header *);
- eh.ether_type = ibh->ib_protocol;
- memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
- memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
- mb->m_data += sizeof(*ibh);
- mb->m_len -= sizeof(*ibh);
- mb->m_pkthdr.len -= sizeof(*ibh);
- bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
- mb->m_data -= sizeof(*ibh);
- mb->m_len += sizeof(*ibh);
- mb->m_pkthdr.len += sizeof(*ibh);
- }
- static void
- update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
- {
- int csum_flags = 0;
- if (src->m_pkthdr.csum_flags & CSUM_IP)
- csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
- if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
- csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
- if (src->m_pkthdr.csum_flags & CSUM_SCTP)
- csum_flags |= CSUM_SCTP_VALID;
- dst->m_pkthdr.csum_flags |= csum_flags;
- if (csum_flags & CSUM_DATA_VALID)
- dst->m_pkthdr.csum_data = 0xffff;
- }
- /*
- * Handle link-layer encapsulation requests.
- */
- static int
- infiniband_requestencap(struct ifnet *ifp, struct if_encap_req *req)
- {
- struct infiniband_header *ih;
- struct arphdr *ah;
- uint16_t etype;
- const uint8_t *lladdr;
- if (req->rtype != IFENCAP_LL)
- return (EOPNOTSUPP);
- if (req->bufsize < INFINIBAND_HDR_LEN)
- return (ENOMEM);
- ih = (struct infiniband_header *)req->buf;
- lladdr = req->lladdr;
- req->lladdr_off = 0;
- switch (req->family) {
- case AF_INET:
- etype = htons(ETHERTYPE_IP);
- break;
- case AF_INET6:
- etype = htons(ETHERTYPE_IPV6);
- break;
- case AF_ARP:
- ah = (struct arphdr *)req->hdata;
- ah->ar_hrd = htons(ARPHRD_INFINIBAND);
- switch (ntohs(ah->ar_op)) {
- case ARPOP_REVREQUEST:
- case ARPOP_REVREPLY:
- etype = htons(ETHERTYPE_REVARP);
- break;
- case ARPOP_REQUEST:
- case ARPOP_REPLY:
- default:
- etype = htons(ETHERTYPE_ARP);
- break;
- }
- if (req->flags & IFENCAP_FLAG_BROADCAST)
- lladdr = ifp->if_broadcastaddr;
- break;
- default:
- return (EAFNOSUPPORT);
- }
- ih->ib_protocol = etype;
- ih->ib_reserved = 0;
- memcpy(ih->ib_hwaddr, lladdr, INFINIBAND_ADDR_LEN);
- req->bufsize = sizeof(struct infiniband_header);
- return (0);
- }
- static int
- infiniband_resolve_addr(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro, uint8_t *phdr,
- uint32_t *pflags, struct llentry **plle)
- {
- #if defined(INET) || defined(INET6)
- struct infiniband_header *ih = (struct infiniband_header *)phdr;
- #endif
- uint32_t lleflags = 0;
- int error = 0;
- if (plle)
- *plle = NULL;
- switch (dst->sa_family) {
- #ifdef INET
- case AF_INET:
- if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
- error = arpresolve(ifp, 0, m, dst, phdr, &lleflags, plle);
- } else {
- if (m->m_flags & M_BCAST) {
- memcpy(ih->ib_hwaddr, ifp->if_broadcastaddr,
- INFINIBAND_ADDR_LEN);
- } else {
- infiniband_ipv4_multicast_map(
- ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
- ifp->if_broadcastaddr, ih->ib_hwaddr);
- }
- ih->ib_protocol = htons(ETHERTYPE_IP);
- ih->ib_reserved = 0;
- }
- break;
- #endif
- #ifdef INET6
- case AF_INET6:
- if ((m->m_flags & M_MCAST) == 0) {
- int af = RO_GET_FAMILY(ro, dst);
- error = nd6_resolve(ifp, LLE_SF(af, 0), m, dst, phdr,
- &lleflags, plle);
- } else {
- infiniband_ipv6_multicast_map(
- &((const struct sockaddr_in6 *)dst)->sin6_addr,
- ifp->if_broadcastaddr, ih->ib_hwaddr);
- ih->ib_protocol = htons(ETHERTYPE_IPV6);
- ih->ib_reserved = 0;
- }
- break;
- #endif
- default:
- if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- if (m != NULL)
- m_freem(m);
- return (EAFNOSUPPORT);
- }
- if (error == EHOSTDOWN) {
- if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
- error = EHOSTUNREACH;
- }
- if (error != 0)
- return (error);
- *pflags = RT_MAY_LOOP;
- if (lleflags & LLE_IFADDR)
- *pflags |= RT_L2_ME;
- return (0);
- }
- /*
- * Infiniband output routine.
- */
- static int
- infiniband_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
- {
- uint8_t linkhdr[INFINIBAND_HDR_LEN];
- uint8_t *phdr;
- struct llentry *lle = NULL;
- struct infiniband_header *ih;
- int error = 0;
- int hlen; /* link layer header length */
- uint32_t pflags;
- bool addref;
- NET_EPOCH_ASSERT();
- addref = false;
- phdr = NULL;
- pflags = 0;
- if (ro != NULL) {
- /* XXX BPF uses ro_prepend */
- if (ro->ro_prepend != NULL) {
- phdr = ro->ro_prepend;
- hlen = ro->ro_plen;
- } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
- if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
- lle = ro->ro_lle;
- if (lle != NULL &&
- (lle->la_flags & LLE_VALID) == 0) {
- LLE_FREE(lle);
- lle = NULL; /* redundant */
- ro->ro_lle = NULL;
- }
- if (lle == NULL) {
- /* if we lookup, keep cache */
- addref = 1;
- } else
- /*
- * Notify LLE code that
- * the entry was used
- * by datapath.
- */
- llentry_provide_feedback(lle);
- }
- if (lle != NULL) {
- phdr = lle->r_linkdata;
- hlen = lle->r_hdrlen;
- pflags = lle->r_flags;
- }
- }
- }
- #ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error)
- goto bad;
- #endif
- M_PROFILE(m);
- if (ifp->if_flags & IFF_MONITOR) {
- error = ENETDOWN;
- goto bad;
- }
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
- error = ENETDOWN;
- goto bad;
- }
- if (phdr == NULL) {
- /* No prepend data supplied. Try to calculate ourselves. */
- phdr = linkhdr;
- hlen = INFINIBAND_HDR_LEN;
- error = infiniband_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
- addref ? &lle : NULL);
- if (addref && lle != NULL)
- ro->ro_lle = lle;
- if (error != 0)
- return (error == EWOULDBLOCK ? 0 : error);
- }
- if ((pflags & RT_L2_ME) != 0) {
- update_mbuf_csumflags(m, m);
- return (if_simloop(ifp, m, RO_GET_FAMILY(ro, dst), 0));
- }
- /*
- * Add local infiniband header. If no space in first mbuf,
- * allocate another.
- */
- M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
- if (m == NULL) {
- error = ENOBUFS;
- goto bad;
- }
- if ((pflags & RT_HAS_HEADER) == 0) {
- ih = mtod(m, struct infiniband_header *);
- memcpy(ih, phdr, hlen);
- }
- /*
- * Queue message on interface, update output statistics if
- * successful, and start output if interface not yet active.
- */
- return (ifp->if_transmit(ifp, m));
- bad:
- if (m != NULL)
- m_freem(m);
- return (error);
- }
- /*
- * Process a received Infiniband packet.
- */
- static void
- infiniband_input(struct ifnet *ifp, struct mbuf *m)
- {
- struct infiniband_header *ibh;
- struct epoch_tracker et;
- int isr;
- bool needs_epoch;
- needs_epoch = (ifp->if_flags & IFF_NEEDSEPOCH);
- #ifdef INVARIANTS
- /*
- * This temporary code is here to prevent epoch unaware and unmarked
- * drivers to panic the system. Once all drivers are taken care of,
- * the whole INVARIANTS block should go away.
- */
- if (!needs_epoch && !in_epoch(net_epoch_preempt)) {
- static bool printedonce;
- needs_epoch = true;
- if (!printedonce) {
- printedonce = true;
- if_printf(ifp, "called %s w/o net epoch! "
- "PLEASE file a bug report.", __func__);
- #ifdef KDB
- kdb_backtrace();
- #endif
- }
- }
- #endif
- CURVNET_SET_QUIET(ifp->if_vnet);
- if (__predict_false(needs_epoch))
- NET_EPOCH_ENTER(et);
- if ((ifp->if_flags & IFF_UP) == 0) {
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- m_freem(m);
- goto done;
- }
- ibh = mtod(m, struct infiniband_header *);
- /*
- * Reset layer specific mbuf flags to avoid confusing upper
- * layers:
- */
- m->m_flags &= ~M_VLANTAG;
- m_clrprotoflags(m);
- if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
- if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
- ifp->if_addrlen) == 0)
- m->m_flags |= M_BCAST;
- else
- m->m_flags |= M_MCAST;
- if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
- }
- /* Let BPF have it before we strip the header. */
- infiniband_bpf_mtap(ifp, m);
- /* Allow monitor mode to claim this frame, after stats are updated. */
- if (ifp->if_flags & IFF_MONITOR) {
- m_freem(m);
- goto done;
- }
- /* Direct packet to correct FIB based on interface config. */
- M_SETFIB(m, ifp->if_fib);
- /* Handle input from a lagg<N> port */
- if (ifp->if_type == IFT_INFINIBANDLAG) {
- KASSERT(lagg_input_infiniband_p != NULL,
- ("%s: if_lagg not loaded!", __func__));
- m = (*lagg_input_infiniband_p)(ifp, m);
- if (__predict_false(m == NULL))
- goto done;
- ifp = m->m_pkthdr.rcvif;
- }
- /*
- * Dispatch frame to upper layer.
- */
- switch (ibh->ib_protocol) {
- #ifdef INET
- case htons(ETHERTYPE_IP):
- isr = NETISR_IP;
- break;
- case htons(ETHERTYPE_ARP):
- if (ifp->if_flags & IFF_NOARP) {
- /* Discard packet if ARP is disabled on interface */
- m_freem(m);
- goto done;
- }
- isr = NETISR_ARP;
- break;
- #endif
- #ifdef INET6
- case htons(ETHERTYPE_IPV6):
- isr = NETISR_IPV6;
- break;
- #endif
- default:
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- m_freem(m);
- goto done;
- }
- /* Strip off the Infiniband header. */
- m_adj(m, INFINIBAND_HDR_LEN);
- #ifdef MAC
- /*
- * Tag the mbuf with an appropriate MAC label before any other
- * consumers can get to it.
- */
- mac_ifnet_create_mbuf(ifp, m);
- #endif
- /* Allow monitor mode to claim this frame, after stats are updated. */
- netisr_dispatch(isr, m);
- done:
- if (__predict_false(needs_epoch))
- NET_EPOCH_EXIT(et);
- CURVNET_RESTORE();
- }
- static int
- infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
- struct sockaddr *sa)
- {
- struct sockaddr_dl *sdl;
- #ifdef INET
- struct sockaddr_in *sin;
- #endif
- #ifdef INET6
- struct sockaddr_in6 *sin6;
- #endif
- uint8_t *e_addr;
- switch (sa->sa_family) {
- case AF_LINK:
- /*
- * No mapping needed. Just check that it's a valid MC address.
- */
- sdl = (struct sockaddr_dl *)sa;
- e_addr = LLADDR(sdl);
- if (!INFINIBAND_IS_MULTICAST(e_addr))
- return (EADDRNOTAVAIL);
- *llsa = NULL;
- return 0;
- #ifdef INET
- case AF_INET:
- sin = (struct sockaddr_in *)sa;
- if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
- return (EADDRNOTAVAIL);
- sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
- sdl->sdl_alen = INFINIBAND_ADDR_LEN;
- e_addr = LLADDR(sdl);
- infiniband_ipv4_multicast_map(
- sin->sin_addr.s_addr, ifp->if_broadcastaddr, e_addr);
- *llsa = (struct sockaddr *)sdl;
- return (0);
- #endif
- #ifdef INET6
- case AF_INET6:
- sin6 = (struct sockaddr_in6 *)sa;
- /*
- * An IP6 address of 0 means listen to all of the
- * multicast address used for IP6. This has no meaning
- * in infiniband.
- */
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
- return (EADDRNOTAVAIL);
- if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
- return (EADDRNOTAVAIL);
- sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
- sdl->sdl_alen = INFINIBAND_ADDR_LEN;
- e_addr = LLADDR(sdl);
- infiniband_ipv6_multicast_map(
- &sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
- *llsa = (struct sockaddr *)sdl;
- return (0);
- #endif
- default:
- return (EAFNOSUPPORT);
- }
- }
- void
- infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
- {
- struct sockaddr_dl *sdl;
- struct ifaddr *ifa;
- int i;
- ifp->if_addrlen = INFINIBAND_ADDR_LEN;
- ifp->if_hdrlen = INFINIBAND_HDR_LEN;
- ifp->if_mtu = INFINIBAND_MTU;
- if_attach(ifp);
- ifp->if_output = infiniband_output;
- ifp->if_input = infiniband_input;
- ifp->if_resolvemulti = infiniband_resolvemulti;
- ifp->if_requestencap = infiniband_requestencap;
- if (ifp->if_baudrate == 0)
- ifp->if_baudrate = IF_Gbps(10); /* default value */
- if (llb != NULL)
- ifp->if_broadcastaddr = llb;
- ifa = ifp->if_addr;
- KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
- sdl = (struct sockaddr_dl *)ifa->ifa_addr;
- sdl->sdl_type = IFT_INFINIBAND;
- sdl->sdl_alen = ifp->if_addrlen;
- if (lla != NULL) {
- memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
- if (ifp->if_hw_addr != NULL)
- memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
- } else {
- lla = LLADDR(sdl);
- }
- /* Attach ethernet compatible network device */
- bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
- /* Announce Infiniband MAC address if non-zero. */
- for (i = 0; i < ifp->if_addrlen; i++)
- if (lla[i] != 0)
- break;
- if (i != ifp->if_addrlen)
- if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
- /* Add necessary bits are setup; announce it now. */
- EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
- if (IS_DEFAULT_VNET(curvnet))
- devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
- }
- /*
- * Perform common duties while detaching an Infiniband interface
- */
- void
- infiniband_ifdetach(struct ifnet *ifp)
- {
- bpfdetach(ifp);
- if_detach(ifp);
- }
- static int
- infiniband_modevent(module_t mod, int type, void *data)
- {
- switch (type) {
- case MOD_LOAD:
- case MOD_UNLOAD:
- return (0);
- default:
- return (EOPNOTSUPP);
- }
- }
- static moduledata_t infiniband_mod = {
- .name = "if_infiniband",
- .evhand = &infiniband_modevent,
- };
- DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
- MODULE_VERSION(if_infiniband, 1);
|