debugnet.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126
  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause
  3. *
  4. * Copyright (c) 2019 Isilon Systems, LLC.
  5. * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved.
  6. * Copyright (c) 2000 Darrell Anderson
  7. * All rights reserved.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions
  11. * are met:
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in the
  16. * documentation and/or other materials provided with the distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  19. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  22. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  24. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  25. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  26. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  27. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  28. * SUCH DAMAGE.
  29. */
  30. #include <sys/cdefs.h>
  31. #include "opt_ddb.h"
  32. #include "opt_inet.h"
  33. #include <sys/param.h>
  34. #include <sys/systm.h>
  35. #include <sys/endian.h>
  36. #include <sys/errno.h>
  37. #include <sys/eventhandler.h>
  38. #include <sys/kernel.h>
  39. #include <sys/lock.h>
  40. #include <sys/mutex.h>
  41. #include <sys/socket.h>
  42. #include <sys/sysctl.h>
  43. #ifdef DDB
  44. #include <ddb/ddb.h>
  45. #include <ddb/db_lex.h>
  46. #endif
  47. #include <net/ethernet.h>
  48. #include <net/if.h>
  49. #include <net/if_arp.h>
  50. #include <net/if_dl.h>
  51. #include <net/if_types.h>
  52. #include <net/if_var.h>
  53. #include <net/if_private.h>
  54. #include <net/vnet.h>
  55. #include <net/route.h>
  56. #include <net/route/nhop.h>
  57. #include <netinet/in.h>
  58. #include <netinet/in_fib.h>
  59. #include <netinet/in_systm.h>
  60. #include <netinet/in_var.h>
  61. #include <netinet/ip.h>
  62. #include <netinet/ip_var.h>
  63. #include <netinet/ip_options.h>
  64. #include <netinet/udp.h>
  65. #include <netinet/udp_var.h>
  66. #include <machine/in_cksum.h>
  67. #include <machine/pcb.h>
  68. #include <net/debugnet.h>
  69. #define DEBUGNET_INTERNAL
  70. #include <net/debugnet_int.h>
  71. FEATURE(debugnet, "Debugnet support");
  72. SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
  73. "debugnet parameters");
  74. unsigned debugnet_debug;
  75. SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN,
  76. &debugnet_debug, 0,
  77. "Debug message verbosity (0: off; 1: on; 2: verbose)");
  78. int debugnet_npolls = 2000;
  79. SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN,
  80. &debugnet_npolls, 0,
  81. "Number of times to poll before assuming packet loss (0.5ms per poll)");
  82. int debugnet_nretries = 10;
  83. SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN,
  84. &debugnet_nretries, 0,
  85. "Number of retransmit attempts before giving up");
  86. int debugnet_fib = RT_DEFAULT_FIB;
  87. SYSCTL_INT(_net_debugnet, OID_AUTO, fib, CTLFLAG_RWTUN,
  88. &debugnet_fib, 0,
  89. "Fib to use when sending dump");
  90. static bool g_debugnet_pcb_inuse;
  91. static struct debugnet_pcb g_dnet_pcb;
  92. /*
  93. * Simple accessors for opaque PCB.
  94. */
  95. const unsigned char *
  96. debugnet_get_gw_mac(const struct debugnet_pcb *pcb)
  97. {
  98. MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
  99. pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
  100. return (pcb->dp_gw_mac.octet);
  101. }
  102. const in_addr_t *
  103. debugnet_get_server_addr(const struct debugnet_pcb *pcb)
  104. {
  105. MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
  106. pcb->dp_state >= DN_STATE_GOT_HERALD_PORT);
  107. return (&pcb->dp_server);
  108. }
  109. const uint16_t
  110. debugnet_get_server_port(const struct debugnet_pcb *pcb)
  111. {
  112. MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
  113. pcb->dp_state >= DN_STATE_GOT_HERALD_PORT);
  114. return (pcb->dp_server_port);
  115. }
  116. /*
  117. * Start of network primitives, beginning with output primitives.
  118. */
  119. /*
  120. * Handles creation of the ethernet header, then places outgoing packets into
  121. * the tx buffer for the NIC
  122. *
  123. * Parameters:
  124. * m The mbuf containing the packet to be sent (will be freed by
  125. * this function or the NIC driver)
  126. * ifp The interface to send on
  127. * dst The destination ethernet address (source address will be looked
  128. * up using ifp)
  129. * etype The ETHERTYPE_* value for the protocol that is being sent
  130. *
  131. * Returns:
  132. * int see errno.h, 0 for success
  133. */
  134. int
  135. debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst,
  136. u_short etype)
  137. {
  138. struct ether_header *eh;
  139. if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) ||
  140. (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) {
  141. if_printf(ifp, "%s: interface isn't up\n", __func__);
  142. m_freem(m);
  143. return (ENETDOWN);
  144. }
  145. /* Fill in the ethernet header. */
  146. M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
  147. if (m == NULL) {
  148. printf("%s: out of mbufs\n", __func__);
  149. return (ENOBUFS);
  150. }
  151. eh = mtod(m, struct ether_header *);
  152. memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
  153. memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN);
  154. eh->ether_type = htons(etype);
  155. return (ifp->if_debugnet_methods->dn_transmit(ifp, m));
  156. }
  157. /*
  158. * Unreliable transmission of an mbuf chain to the debugnet server
  159. * Note: can't handle fragmentation; fails if the packet is larger than
  160. * ifp->if_mtu after adding the UDP/IP headers
  161. *
  162. * Parameters:
  163. * pcb The debugnet context block
  164. * m mbuf chain
  165. *
  166. * Returns:
  167. * int see errno.h, 0 for success
  168. */
  169. static int
  170. debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m)
  171. {
  172. struct udphdr *udp;
  173. MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
  174. M_PREPEND(m, sizeof(*udp), M_NOWAIT);
  175. if (m == NULL) {
  176. printf("%s: out of mbufs\n", __func__);
  177. return (ENOBUFS);
  178. }
  179. udp = mtod(m, void *);
  180. udp->uh_ulen = htons(m->m_pkthdr.len);
  181. /* Use this src port so that the server can connect() the socket */
  182. udp->uh_sport = htons(pcb->dp_client_port);
  183. udp->uh_dport = htons(pcb->dp_server_port);
  184. /* Computed later (protocol-dependent). */
  185. udp->uh_sum = 0;
  186. return (debugnet_ip_output(pcb, m));
  187. }
  188. int
  189. debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */)
  190. {
  191. struct debugnet_ack *dn_ack;
  192. struct mbuf *m;
  193. DNETDEBUG("Acking with seqno %u\n", ntohl(seqno));
  194. m = m_gethdr(M_NOWAIT, MT_DATA);
  195. if (m == NULL) {
  196. printf("%s: Out of mbufs\n", __func__);
  197. return (ENOBUFS);
  198. }
  199. m->m_len = sizeof(*dn_ack);
  200. m->m_pkthdr.len = sizeof(*dn_ack);
  201. MH_ALIGN(m, sizeof(*dn_ack));
  202. dn_ack = mtod(m, void *);
  203. dn_ack->da_seqno = seqno;
  204. return (debugnet_udp_output(pcb, m));
  205. }
  206. /*
  207. * Dummy free function for debugnet clusters.
  208. */
  209. static void
  210. debugnet_mbuf_free(struct mbuf *m __unused)
  211. {
  212. }
  213. /*
  214. * Construct and reliably send a debugnet packet. May fail from a resource
  215. * shortage or extreme number of unacknowledged retransmissions. Wait for
  216. * an acknowledgement before returning. Splits packets into chunks small
  217. * enough to be sent without fragmentation (looks up the interface MTU)
  218. *
  219. * Parameters:
  220. * type debugnet packet type (HERALD, FINISHED, ...)
  221. * data data
  222. * datalen data size (bytes)
  223. * auxdata optional auxiliary information
  224. *
  225. * Returns:
  226. * int see errno.h, 0 for success
  227. */
  228. int
  229. debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data,
  230. uint32_t datalen, const struct debugnet_proto_aux *auxdata)
  231. {
  232. struct debugnet_msg_hdr *dn_msg_hdr;
  233. struct mbuf *m, *m2;
  234. uint64_t want_acks;
  235. uint32_t i, pktlen, sent_so_far;
  236. int retries, polls, error;
  237. if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
  238. return (ECONNRESET);
  239. want_acks = 0;
  240. pcb->dp_rcvd_acks = 0;
  241. retries = 0;
  242. retransmit:
  243. /* Chunks can be too big to fit in packets. */
  244. for (i = sent_so_far = 0; sent_so_far < datalen ||
  245. (i == 0 && datalen == 0); i++) {
  246. pktlen = datalen - sent_so_far;
  247. /* Bound: the interface MTU (assume no IP options). */
  248. pktlen = min(pktlen, pcb->dp_ifp->if_mtu -
  249. sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr));
  250. /*
  251. * Check if it is retransmitting and this has been ACKed
  252. * already.
  253. */
  254. if ((pcb->dp_rcvd_acks & (1 << i)) != 0) {
  255. sent_so_far += pktlen;
  256. continue;
  257. }
  258. /*
  259. * Get and fill a header mbuf, then chain data as an extended
  260. * mbuf.
  261. */
  262. m = m_gethdr(M_NOWAIT, MT_DATA);
  263. if (m == NULL) {
  264. printf("%s: Out of mbufs\n", __func__);
  265. return (ENOBUFS);
  266. }
  267. m->m_len = sizeof(struct debugnet_msg_hdr);
  268. m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr);
  269. MH_ALIGN(m, sizeof(struct debugnet_msg_hdr));
  270. dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *);
  271. dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i);
  272. dn_msg_hdr->mh_type = htonl(type);
  273. dn_msg_hdr->mh_len = htonl(pktlen);
  274. if (auxdata != NULL) {
  275. dn_msg_hdr->mh_offset =
  276. htobe64(auxdata->dp_offset_start + sent_so_far);
  277. dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2);
  278. } else {
  279. dn_msg_hdr->mh_offset = htobe64(sent_so_far);
  280. dn_msg_hdr->mh_aux2 = 0;
  281. }
  282. if (pktlen != 0) {
  283. m2 = m_get(M_NOWAIT, MT_DATA);
  284. if (m2 == NULL) {
  285. m_freem(m);
  286. printf("%s: Out of mbufs\n", __func__);
  287. return (ENOBUFS);
  288. }
  289. MEXTADD(m2, __DECONST(char *, data) + sent_so_far,
  290. pktlen, debugnet_mbuf_free, NULL, NULL, 0,
  291. EXT_DISPOSABLE);
  292. m2->m_len = pktlen;
  293. m_cat(m, m2);
  294. m->m_pkthdr.len += pktlen;
  295. }
  296. error = debugnet_udp_output(pcb, m);
  297. if (error != 0)
  298. return (error);
  299. /* Note that we're waiting for this packet in the bitfield. */
  300. want_acks |= (1 << i);
  301. sent_so_far += pktlen;
  302. }
  303. if (i >= DEBUGNET_MAX_IN_FLIGHT)
  304. printf("Warning: Sent more than %d packets (%d). "
  305. "Acknowledgements will fail unless the size of "
  306. "rcvd_acks/want_acks is increased.\n",
  307. DEBUGNET_MAX_IN_FLIGHT, i);
  308. /*
  309. * Wait for acks. A *real* window would speed things up considerably.
  310. */
  311. polls = 0;
  312. while (pcb->dp_rcvd_acks != want_acks) {
  313. if (polls++ > debugnet_npolls) {
  314. if (retries++ > debugnet_nretries)
  315. return (ETIMEDOUT);
  316. printf(". ");
  317. goto retransmit;
  318. }
  319. debugnet_network_poll(pcb);
  320. DELAY(500);
  321. if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
  322. return (ECONNRESET);
  323. }
  324. pcb->dp_seqno += i;
  325. return (0);
  326. }
  327. /*
  328. * Network input primitives.
  329. */
  330. /*
  331. * Just introspect the header enough to fire off a seqno ack and validate
  332. * length fits.
  333. */
  334. static void
  335. debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb)
  336. {
  337. const struct debugnet_msg_hdr *dnh;
  338. struct mbuf *m;
  339. uint32_t hdr_type;
  340. uint32_t seqno;
  341. int error;
  342. m = *mb;
  343. if (m->m_pkthdr.len < sizeof(*dnh)) {
  344. DNETDEBUG("ignoring small debugnet_msg packet\n");
  345. return;
  346. }
  347. /* Get ND header. */
  348. if (m->m_len < sizeof(*dnh)) {
  349. m = m_pullup(m, sizeof(*dnh));
  350. *mb = m;
  351. if (m == NULL) {
  352. DNETDEBUG("m_pullup failed\n");
  353. return;
  354. }
  355. }
  356. dnh = mtod(m, const void *);
  357. if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) {
  358. DNETDEBUG("Dropping short packet.\n");
  359. return;
  360. }
  361. hdr_type = ntohl(dnh->mh_type);
  362. if (hdr_type != DEBUGNET_DATA) {
  363. if (hdr_type == DEBUGNET_FINISHED) {
  364. printf("Remote shut down the connection on us!\n");
  365. pcb->dp_state = DN_STATE_REMOTE_CLOSED;
  366. if (pcb->dp_finish_handler != NULL) {
  367. pcb->dp_finish_handler();
  368. }
  369. } else {
  370. DNETDEBUG("Got unexpected debugnet message %u\n", hdr_type);
  371. }
  372. return;
  373. }
  374. /*
  375. * If the issue is transient (ENOBUFS), sender should resend. If
  376. * non-transient (like driver objecting to rx -> tx from the same
  377. * thread), not much else we can do.
  378. */
  379. seqno = dnh->mh_seqno; /* net endian */
  380. m_adj(m, sizeof(*dnh));
  381. dnh = NULL;
  382. error = pcb->dp_rx_handler(m);
  383. if (error != 0) {
  384. DNETDEBUG("RX handler was not able to accept message, error %d. "
  385. "Skipping ack.\n", error);
  386. return;
  387. }
  388. error = debugnet_ack_output(pcb, seqno);
  389. if (error != 0) {
  390. DNETDEBUG("Couldn't ACK rx packet %u; %d\n", ntohl(seqno), error);
  391. }
  392. }
  393. static void
  394. debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport)
  395. {
  396. const struct debugnet_ack *dn_ack;
  397. struct mbuf *m;
  398. uint32_t rcv_ackno;
  399. m = *mb;
  400. /* Get Ack. */
  401. if (m->m_len < sizeof(*dn_ack)) {
  402. m = m_pullup(m, sizeof(*dn_ack));
  403. *mb = m;
  404. if (m == NULL) {
  405. DNETDEBUG("m_pullup failed\n");
  406. return;
  407. }
  408. }
  409. dn_ack = mtod(m, const void *);
  410. /* Debugnet processing. */
  411. /*
  412. * Packet is meant for us. Extract the ack sequence number and the
  413. * port number if necessary.
  414. */
  415. rcv_ackno = ntohl(dn_ack->da_seqno);
  416. if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) {
  417. pcb->dp_server_port = sport;
  418. pcb->dp_state = DN_STATE_GOT_HERALD_PORT;
  419. }
  420. if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT)
  421. printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno);
  422. else if (rcv_ackno >= pcb->dp_seqno) {
  423. /* We're interested in this ack. Record it. */
  424. pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno);
  425. }
  426. }
  427. void
  428. debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb)
  429. {
  430. const struct udphdr *udp;
  431. struct mbuf *m;
  432. uint16_t sport, ulen;
  433. /* UDP processing. */
  434. m = *mb;
  435. if (m->m_pkthdr.len < sizeof(*udp)) {
  436. DNETDEBUG("ignoring small UDP packet\n");
  437. return;
  438. }
  439. /* Get UDP headers. */
  440. if (m->m_len < sizeof(*udp)) {
  441. m = m_pullup(m, sizeof(*udp));
  442. *mb = m;
  443. if (m == NULL) {
  444. DNETDEBUG("m_pullup failed\n");
  445. return;
  446. }
  447. }
  448. udp = mtod(m, const void *);
  449. /* We expect to receive UDP packets on the configured client port. */
  450. if (ntohs(udp->uh_dport) != pcb->dp_client_port) {
  451. DNETDEBUG("not on the expected port.\n");
  452. return;
  453. }
  454. /* Check that ulen does not exceed actual size of data. */
  455. ulen = ntohs(udp->uh_ulen);
  456. if (m->m_pkthdr.len < ulen) {
  457. DNETDEBUG("ignoring runt UDP packet\n");
  458. return;
  459. }
  460. sport = ntohs(udp->uh_sport);
  461. m_adj(m, sizeof(*udp));
  462. ulen -= sizeof(*udp);
  463. if (ulen == sizeof(struct debugnet_ack)) {
  464. debugnet_handle_ack(pcb, mb, sport);
  465. return;
  466. }
  467. if (pcb->dp_rx_handler == NULL) {
  468. if (ulen < sizeof(struct debugnet_ack))
  469. DNETDEBUG("ignoring small ACK packet\n");
  470. else
  471. DNETDEBUG("ignoring unexpected non-ACK packet on "
  472. "half-duplex connection.\n");
  473. return;
  474. }
  475. debugnet_handle_rx_msg(pcb, mb);
  476. }
  477. /*
  478. * Handler for incoming packets directly from the network adapter
  479. * Identifies the packet type (IP or ARP) and passes it along to one of the
  480. * helper functions debugnet_handle_ip or debugnet_handle_arp.
  481. *
  482. * It needs to partially replicate the behaviour of ether_input() and
  483. * ether_demux().
  484. *
  485. * Parameters:
  486. * ifp the interface the packet came from
  487. * m an mbuf containing the packet received
  488. */
  489. static void
  490. debugnet_input_one(struct ifnet *ifp, struct mbuf *m)
  491. {
  492. struct ifreq ifr;
  493. struct ether_header *eh;
  494. u_short etype;
  495. /* Ethernet processing. */
  496. if ((m->m_flags & M_PKTHDR) == 0) {
  497. DNETDEBUG_IF(ifp, "discard frame without packet header\n");
  498. goto done;
  499. }
  500. if (m->m_len < ETHER_HDR_LEN) {
  501. DNETDEBUG_IF(ifp,
  502. "discard frame without leading eth header (len %d pktlen %d)\n",
  503. m->m_len, m->m_pkthdr.len);
  504. goto done;
  505. }
  506. eh = mtod(m, struct ether_header *);
  507. etype = ntohs(eh->ether_type);
  508. if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) {
  509. DNETDEBUG_IF(ifp, "ignoring vlan packets\n");
  510. goto done;
  511. }
  512. if (if_gethwaddr(ifp, &ifr) != 0) {
  513. DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n");
  514. goto done;
  515. }
  516. if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost,
  517. ETHER_ADDR_LEN) != 0 &&
  518. (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) {
  519. DNETDEBUG_IF(ifp,
  520. "discard frame with incorrect destination addr\n");
  521. goto done;
  522. }
  523. MPASS(g_debugnet_pcb_inuse);
  524. /* Done ethernet processing. Strip off the ethernet header. */
  525. m_adj(m, ETHER_HDR_LEN);
  526. switch (etype) {
  527. case ETHERTYPE_ARP:
  528. debugnet_handle_arp(&g_dnet_pcb, &m);
  529. break;
  530. case ETHERTYPE_IP:
  531. debugnet_handle_ip(&g_dnet_pcb, &m);
  532. break;
  533. default:
  534. DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype);
  535. break;
  536. }
  537. done:
  538. if (m != NULL)
  539. m_freem(m);
  540. }
  541. static void
  542. debugnet_input(struct ifnet *ifp, struct mbuf *m)
  543. {
  544. struct mbuf *n;
  545. do {
  546. n = m->m_nextpkt;
  547. m->m_nextpkt = NULL;
  548. debugnet_input_one(ifp, m);
  549. m = n;
  550. } while (m != NULL);
  551. }
  552. /*
  553. * Network polling primitive.
  554. *
  555. * Instead of assuming that most of the network stack is sane, we just poll the
  556. * driver directly for packets.
  557. */
  558. void
  559. debugnet_network_poll(struct debugnet_pcb *pcb)
  560. {
  561. struct ifnet *ifp;
  562. ifp = pcb->dp_ifp;
  563. ifp->if_debugnet_methods->dn_poll(ifp, 1000);
  564. }
  565. /*
  566. * Start of consumer API surface.
  567. */
  568. void
  569. debugnet_free(struct debugnet_pcb *pcb)
  570. {
  571. struct ifnet *ifp;
  572. MPASS(pcb == &g_dnet_pcb);
  573. MPASS(pcb->dp_drv_input == NULL || g_debugnet_pcb_inuse);
  574. ifp = pcb->dp_ifp;
  575. if (ifp != NULL) {
  576. if (pcb->dp_drv_input != NULL)
  577. ifp->if_input = pcb->dp_drv_input;
  578. if (pcb->dp_event_started)
  579. ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END);
  580. }
  581. debugnet_mbuf_finish();
  582. g_debugnet_pcb_inuse = false;
  583. memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb));
  584. }
  585. int
  586. debugnet_connect(const struct debugnet_conn_params *dcp,
  587. struct debugnet_pcb **pcb_out)
  588. {
  589. struct debugnet_proto_aux herald_auxdata;
  590. struct debugnet_pcb *pcb;
  591. struct ifnet *ifp;
  592. int error;
  593. if (g_debugnet_pcb_inuse) {
  594. printf("%s: Only one connection at a time.\n", __func__);
  595. return (EBUSY);
  596. }
  597. pcb = &g_dnet_pcb;
  598. *pcb = (struct debugnet_pcb) {
  599. .dp_state = DN_STATE_INIT,
  600. .dp_client = dcp->dc_client,
  601. .dp_server = dcp->dc_server,
  602. .dp_gateway = dcp->dc_gateway,
  603. .dp_server_port = dcp->dc_herald_port, /* Initially */
  604. .dp_client_port = dcp->dc_client_port,
  605. .dp_seqno = 1,
  606. .dp_ifp = dcp->dc_ifp,
  607. .dp_rx_handler = dcp->dc_rx_handler,
  608. .dp_drv_input = NULL,
  609. };
  610. /* Switch to the debugnet mbuf zones. */
  611. debugnet_mbuf_start();
  612. /* At least one needed parameter is missing; infer it. */
  613. if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY ||
  614. pcb->dp_ifp == NULL) {
  615. struct sockaddr_in dest_sin, *gw_sin, *local_sin;
  616. struct ifnet *rt_ifp;
  617. struct nhop_object *nh;
  618. memset(&dest_sin, 0, sizeof(dest_sin));
  619. dest_sin = (struct sockaddr_in) {
  620. .sin_len = sizeof(dest_sin),
  621. .sin_family = AF_INET,
  622. .sin_addr.s_addr = pcb->dp_server,
  623. };
  624. CURVNET_SET(vnet0);
  625. nh = fib4_lookup_debugnet(debugnet_fib, dest_sin.sin_addr, 0,
  626. NHR_NONE);
  627. CURVNET_RESTORE();
  628. if (nh == NULL) {
  629. printf("%s: Could not get route for that server.\n",
  630. __func__);
  631. error = ENOENT;
  632. goto cleanup;
  633. }
  634. /* TODO support AF_INET6 */
  635. if (nh->gw_sa.sa_family == AF_INET)
  636. gw_sin = &nh->gw4_sa;
  637. else {
  638. if (nh->gw_sa.sa_family == AF_LINK)
  639. DNETDEBUG("Destination address is on link.\n");
  640. gw_sin = NULL;
  641. }
  642. MPASS(nh->nh_ifa->ifa_addr->sa_family == AF_INET);
  643. local_sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr;
  644. rt_ifp = nh->nh_ifp;
  645. if (pcb->dp_client == INADDR_ANY)
  646. pcb->dp_client = local_sin->sin_addr.s_addr;
  647. if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL)
  648. pcb->dp_gateway = gw_sin->sin_addr.s_addr;
  649. if (pcb->dp_ifp == NULL)
  650. pcb->dp_ifp = rt_ifp;
  651. }
  652. ifp = pcb->dp_ifp;
  653. if (debugnet_debug > 0) {
  654. char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN],
  655. gwbuf[INET_ADDRSTRLEN];
  656. inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf));
  657. inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf));
  658. if (pcb->dp_gateway != INADDR_ANY)
  659. inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf));
  660. DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n",
  661. serbuf, pcb->dp_server_port,
  662. (pcb->dp_gateway == INADDR_ANY) ? "" : " via ",
  663. (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf,
  664. clibuf, pcb->dp_client_port, if_name(ifp));
  665. }
  666. /* Validate iface is online and supported. */
  667. if (!DEBUGNET_SUPPORTED_NIC(ifp)) {
  668. printf("%s: interface '%s' does not support debugnet\n",
  669. __func__, if_name(ifp));
  670. error = ENODEV;
  671. goto cleanup;
  672. }
  673. if ((if_getflags(ifp) & IFF_UP) == 0) {
  674. printf("%s: interface '%s' link is down\n", __func__,
  675. if_name(ifp));
  676. error = ENXIO;
  677. goto cleanup;
  678. }
  679. ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START);
  680. pcb->dp_event_started = true;
  681. /*
  682. * We maintain the invariant that g_debugnet_pcb_inuse is always true
  683. * while the debugnet ifp's if_input is overridden with
  684. * debugnet_input().
  685. */
  686. g_debugnet_pcb_inuse = true;
  687. /* Make the card use *our* receive callback. */
  688. pcb->dp_drv_input = ifp->if_input;
  689. ifp->if_input = debugnet_input;
  690. printf("%s: searching for %s MAC...\n", __func__,
  691. (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway");
  692. error = debugnet_arp_gw(pcb);
  693. if (error != 0) {
  694. printf("%s: failed to locate MAC address\n", __func__);
  695. goto cleanup;
  696. }
  697. MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC);
  698. herald_auxdata = (struct debugnet_proto_aux) {
  699. .dp_offset_start = dcp->dc_herald_offset,
  700. .dp_aux2 = dcp->dc_herald_aux2,
  701. };
  702. error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data,
  703. dcp->dc_herald_datalen, &herald_auxdata);
  704. if (error != 0) {
  705. printf("%s: failed to herald debugnet server\n", __func__);
  706. goto cleanup;
  707. }
  708. *pcb_out = pcb;
  709. return (0);
  710. cleanup:
  711. debugnet_free(pcb);
  712. return (error);
  713. }
  714. /*
  715. * Pre-allocated dump-time mbuf tracking.
  716. *
  717. * We just track the high water mark we've ever seen and allocate appropriately
  718. * for that iface/mtu combo.
  719. */
  720. static struct {
  721. int nmbuf;
  722. int ncl;
  723. int clsize;
  724. } dn_hwm;
  725. static struct mtx dn_hwm_lk;
  726. MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF);
  727. static void
  728. dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize)
  729. {
  730. bool any;
  731. any = false;
  732. mtx_lock(&dn_hwm_lk);
  733. if (nmbuf > dn_hwm.nmbuf) {
  734. any = true;
  735. dn_hwm.nmbuf = nmbuf;
  736. } else
  737. nmbuf = dn_hwm.nmbuf;
  738. if (ncl > dn_hwm.ncl) {
  739. any = true;
  740. dn_hwm.ncl = ncl;
  741. } else
  742. ncl = dn_hwm.ncl;
  743. if (clsize > dn_hwm.clsize) {
  744. any = true;
  745. dn_hwm.clsize = clsize;
  746. } else
  747. clsize = dn_hwm.clsize;
  748. mtx_unlock(&dn_hwm_lk);
  749. if (any)
  750. debugnet_mbuf_reinit(nmbuf, ncl, clsize);
  751. }
  752. void
  753. debugnet_any_ifnet_update(struct ifnet *ifp)
  754. {
  755. int clsize, nmbuf, ncl, nrxr;
  756. if (!DEBUGNET_SUPPORTED_NIC(ifp))
  757. return;
  758. ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize);
  759. KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr));
  760. /*
  761. * We need two headers per message on the transmit side. Multiply by
  762. * four to give us some breathing room.
  763. */
  764. nmbuf = ncl * (4 + nrxr);
  765. ncl *= nrxr;
  766. /*
  767. * Bandaid for drivers that (incorrectly) advertise LinkUp before their
  768. * dn_init method is available.
  769. */
  770. if (nmbuf == 0 || ncl == 0 || clsize == 0) {
  771. #ifndef INVARIANTS
  772. if (bootverbose)
  773. #endif
  774. printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n",
  775. __func__, if_name(ifp), ifp);
  776. return;
  777. }
  778. dn_maybe_reinit_mbufs(nmbuf, ncl, clsize);
  779. }
  780. /*
  781. * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless
  782. * for us because drivers tend to if_attach before invoking DEBUGNET_SET().
  783. *
  784. * On the other hand, hooking DEBUGNET_SET() itself may still be too early,
  785. * because the driver is still in attach. Since we cannot use down interfaces,
  786. * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient? ... Nope, at least
  787. * with vtnet and dhcpclient that event just never occurs.
  788. *
  789. * So that's how I've landed on the lower level ifnet_link_event.
  790. */
  791. static void
  792. dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state)
  793. {
  794. if (link_state == LINK_STATE_UP)
  795. debugnet_any_ifnet_update(ifp);
  796. }
  797. static eventhandler_tag dn_attach_cookie;
  798. static void
  799. dn_evh_init(void *ctx __unused)
  800. {
  801. dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event,
  802. dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
  803. }
  804. SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL);
  805. /*
  806. * DDB parsing helpers for debugnet(4) consumers.
  807. */
  808. #ifdef DDB
  809. struct my_inet_opt {
  810. bool has_opt;
  811. const char *printname;
  812. in_addr_t *result;
  813. };
  814. static int
  815. dn_parse_optarg_ipv4(struct my_inet_opt *opt)
  816. {
  817. in_addr_t tmp;
  818. unsigned octet;
  819. int t;
  820. tmp = 0;
  821. for (octet = 0; octet < 4; octet++) {
  822. t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL);
  823. if (t != tNUMBER) {
  824. db_printf("%s:%s: octet %u expected number; found %d\n",
  825. __func__, opt->printname, octet, t);
  826. return (EINVAL);
  827. }
  828. /*
  829. * db_lex lexes '-' distinctly from the number itself, but
  830. * let's document that invariant.
  831. */
  832. MPASS(db_tok_number >= 0);
  833. if (db_tok_number > UINT8_MAX) {
  834. db_printf("%s:%s: octet %u out of range: %jd\n", __func__,
  835. opt->printname, octet, (intmax_t)db_tok_number);
  836. return (EDOM);
  837. }
  838. /* Constructed host-endian and converted to network later. */
  839. tmp = (tmp << 8) | db_tok_number;
  840. if (octet < 3) {
  841. t = db_read_token_flags(DRT_WSPACE);
  842. if (t != tDOT) {
  843. db_printf("%s:%s: octet %u expected '.'; found"
  844. " %d\n", __func__, opt->printname, octet,
  845. t);
  846. return (EINVAL);
  847. }
  848. }
  849. }
  850. *opt->result = htonl(tmp);
  851. opt->has_opt = true;
  852. return (0);
  853. }
  854. int
  855. debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result)
  856. {
  857. struct ifnet *ifp;
  858. int t, error;
  859. bool want_ifp;
  860. char ch;
  861. struct my_inet_opt opt_client = {
  862. .printname = "client",
  863. .result = &result->dd_client,
  864. },
  865. opt_server = {
  866. .printname = "server",
  867. .result = &result->dd_server,
  868. },
  869. opt_gateway = {
  870. .printname = "gateway",
  871. .result = &result->dd_gateway,
  872. },
  873. *cur_inet_opt;
  874. ifp = NULL;
  875. memset(result, 0, sizeof(*result));
  876. /*
  877. * command [space] [-] [opt] [[space] [optarg]] ...
  878. *
  879. * db_command has already lexed 'command' for us.
  880. */
  881. t = db_read_token_flags(DRT_WSPACE);
  882. if (t == tWSPACE)
  883. t = db_read_token_flags(DRT_WSPACE);
  884. while (t != tEOL) {
  885. if (t != tMINUS) {
  886. db_printf("%s: Bad syntax; expected '-', got %d\n",
  887. cmd, t);
  888. goto usage;
  889. }
  890. t = db_read_token_flags(DRT_WSPACE);
  891. if (t != tIDENT) {
  892. db_printf("%s: Bad syntax; expected tIDENT, got %d\n",
  893. cmd, t);
  894. goto usage;
  895. }
  896. if (strlen(db_tok_string) > 1) {
  897. db_printf("%s: Bad syntax; expected single option "
  898. "flag, got '%s'\n", cmd, db_tok_string);
  899. goto usage;
  900. }
  901. want_ifp = false;
  902. cur_inet_opt = NULL;
  903. switch ((ch = db_tok_string[0])) {
  904. default:
  905. DNETDEBUG("Unexpected: '%c'\n", ch);
  906. /* FALLTHROUGH */
  907. case 'h':
  908. goto usage;
  909. case 'c':
  910. cur_inet_opt = &opt_client;
  911. break;
  912. case 'g':
  913. cur_inet_opt = &opt_gateway;
  914. break;
  915. case 's':
  916. cur_inet_opt = &opt_server;
  917. break;
  918. case 'i':
  919. want_ifp = true;
  920. break;
  921. }
  922. t = db_read_token_flags(DRT_WSPACE);
  923. if (t != tWSPACE) {
  924. db_printf("%s: Bad syntax; expected space after "
  925. "flag %c, got %d\n", cmd, ch, t);
  926. goto usage;
  927. }
  928. if (want_ifp) {
  929. t = db_read_token_flags(DRT_WSPACE);
  930. if (t != tIDENT) {
  931. db_printf("%s: Expected interface but got %d\n",
  932. cmd, t);
  933. goto usage;
  934. }
  935. CURVNET_SET(vnet0);
  936. /*
  937. * We *don't* take a ref here because the only current
  938. * consumer, db_netdump_cmd, does not need it. It
  939. * (somewhat redundantly) extracts the if_name(),
  940. * re-lookups the ifp, and takes its own reference.
  941. */
  942. ifp = ifunit(db_tok_string);
  943. CURVNET_RESTORE();
  944. if (ifp == NULL) {
  945. db_printf("Could not locate interface %s\n",
  946. db_tok_string);
  947. error = ENOENT;
  948. goto cleanup;
  949. }
  950. } else {
  951. MPASS(cur_inet_opt != NULL);
  952. /* Assume IPv4 for now. */
  953. error = dn_parse_optarg_ipv4(cur_inet_opt);
  954. if (error != 0)
  955. goto cleanup;
  956. }
  957. /* Skip (mandatory) whitespace after option, if not EOL. */
  958. t = db_read_token_flags(DRT_WSPACE);
  959. if (t == tEOL)
  960. break;
  961. if (t != tWSPACE) {
  962. db_printf("%s: Bad syntax; expected space after "
  963. "flag %c option; got %d\n", cmd, ch, t);
  964. goto usage;
  965. }
  966. t = db_read_token_flags(DRT_WSPACE);
  967. }
  968. if (!opt_server.has_opt) {
  969. db_printf("%s: need a destination server address\n", cmd);
  970. goto usage;
  971. }
  972. result->dd_has_client = opt_client.has_opt;
  973. result->dd_has_gateway = opt_gateway.has_opt;
  974. result->dd_ifp = ifp;
  975. /* We parsed the full line to tEOL already, or bailed with an error. */
  976. return (0);
  977. usage:
  978. db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> "
  979. "-i <interface>]\n", cmd);
  980. error = EINVAL;
  981. /* FALLTHROUGH */
  982. cleanup:
  983. db_skip_to_eol();
  984. return (error);
  985. }
  986. #endif /* DDB */