if_tuntap.c 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013
  1. /* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */
  2. /*-
  3. * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  4. *
  5. * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
  6. * All rights reserved.
  7. * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions
  11. * are met:
  12. *
  13. * 1. Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  20. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  23. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29. * SUCH DAMAGE.
  30. *
  31. * BASED ON:
  32. * -------------------------------------------------------------------------
  33. *
  34. * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
  35. * Nottingham University 1987.
  36. *
  37. * This source may be freely distributed, however I would be interested
  38. * in any changes that are made.
  39. *
  40. * This driver takes packets off the IP i/f and hands them up to a
  41. * user process to have its wicked way with. This driver has it's
  42. * roots in a similar driver written by Phil Cockcroft (formerly) at
  43. * UCL. This driver is based much more on read/write/poll mode of
  44. * operation though.
  45. *
  46. * $FreeBSD$
  47. */
  48. #include "opt_inet.h"
  49. #include "opt_inet6.h"
  50. #include <sys/param.h>
  51. #include <sys/lock.h>
  52. #include <sys/priv.h>
  53. #include <sys/proc.h>
  54. #include <sys/systm.h>
  55. #include <sys/jail.h>
  56. #include <sys/mbuf.h>
  57. #include <sys/module.h>
  58. #include <sys/socket.h>
  59. #include <sys/eventhandler.h>
  60. #include <sys/fcntl.h>
  61. #include <sys/filio.h>
  62. #include <sys/sockio.h>
  63. #include <sys/sx.h>
  64. #include <sys/syslog.h>
  65. #include <sys/ttycom.h>
  66. #include <sys/poll.h>
  67. #include <sys/selinfo.h>
  68. #include <sys/signalvar.h>
  69. #include <sys/filedesc.h>
  70. #include <sys/kernel.h>
  71. #include <sys/sysctl.h>
  72. #include <sys/conf.h>
  73. #include <sys/uio.h>
  74. #include <sys/malloc.h>
  75. #include <sys/random.h>
  76. #include <sys/ctype.h>
  77. #include <net/ethernet.h>
  78. #include <net/if.h>
  79. #include <net/if_var.h>
  80. #include <net/if_clone.h>
  81. #include <net/if_dl.h>
  82. #include <net/if_media.h>
  83. #include <net/if_types.h>
  84. #include <net/if_vlan_var.h>
  85. #include <net/netisr.h>
  86. #include <net/route.h>
  87. #include <net/vnet.h>
  88. #include <netinet/in.h>
  89. #ifdef INET
  90. #include <netinet/ip.h>
  91. #endif
  92. #ifdef INET6
  93. #include <netinet/ip6.h>
  94. #include <netinet6/ip6_var.h>
  95. #endif
  96. #include <netinet/udp.h>
  97. #include <netinet/tcp.h>
  98. #include <net/bpf.h>
  99. #include <net/if_tap.h>
  100. #include <net/if_tun.h>
  101. #include <dev/virtio/network/virtio_net.h>
  102. #include <sys/queue.h>
  103. #include <sys/condvar.h>
  104. #include <security/mac/mac_framework.h>
  105. struct tuntap_driver;
  106. /*
  107. * tun_list is protected by global tunmtx. Other mutable fields are
  108. * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is
  109. * static for the duration of a tunnel interface.
  110. */
  111. struct tuntap_softc {
  112. TAILQ_ENTRY(tuntap_softc) tun_list;
  113. struct cdev *tun_alias;
  114. struct cdev *tun_dev;
  115. u_short tun_flags; /* misc flags */
  116. #define TUN_OPEN 0x0001
  117. #define TUN_INITED 0x0002
  118. #define TUN_UNUSED1 0x0008
  119. #define TUN_UNUSED2 0x0010
  120. #define TUN_LMODE 0x0020
  121. #define TUN_RWAIT 0x0040
  122. #define TUN_ASYNC 0x0080
  123. #define TUN_IFHEAD 0x0100
  124. #define TUN_DYING 0x0200
  125. #define TUN_L2 0x0400
  126. #define TUN_VMNET 0x0800
  127. #define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET)
  128. #define TUN_READY (TUN_OPEN | TUN_INITED)
  129. pid_t tun_pid; /* owning pid */
  130. struct ifnet *tun_ifp; /* the interface */
  131. struct sigio *tun_sigio; /* async I/O info */
  132. struct tuntap_driver *tun_drv; /* appropriate driver */
  133. struct selinfo tun_rsel; /* read select */
  134. struct mtx tun_mtx; /* softc field mutex */
  135. struct cv tun_cv; /* for ref'd dev destroy */
  136. struct ether_addr tun_ether; /* remote address */
  137. int tun_busy; /* busy count */
  138. int tun_vhdrlen; /* virtio-net header length */
  139. };
  140. #define TUN2IFP(sc) ((sc)->tun_ifp)
  141. #define TUNDEBUG if (tundebug) if_printf
  142. #define TUN_LOCK(tp) mtx_lock(&(tp)->tun_mtx)
  143. #define TUN_UNLOCK(tp) mtx_unlock(&(tp)->tun_mtx)
  144. #define TUN_LOCK_ASSERT(tp) mtx_assert(&(tp)->tun_mtx, MA_OWNED);
  145. #define TUN_VMIO_FLAG_MASK 0x0fff
  146. /*
  147. * Interface capabilities of a tap device that supports the virtio-net
  148. * header.
  149. */
  150. #define TAP_VNET_HDR_CAPS (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 \
  151. | IFCAP_VLAN_HWCSUM \
  152. | IFCAP_TSO | IFCAP_LRO \
  153. | IFCAP_VLAN_HWTSO)
  154. #define TAP_ALL_OFFLOAD (CSUM_TSO | CSUM_TCP | CSUM_UDP |\
  155. CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
  156. /*
  157. * All mutable global variables in if_tun are locked using tunmtx, with
  158. * the exception of tundebug, which is used unlocked, and the drivers' *clones,
  159. * which are static after setup.
  160. */
  161. static struct mtx tunmtx;
  162. static eventhandler_tag arrival_tag;
  163. static eventhandler_tag clone_tag;
  164. static const char tunname[] = "tun";
  165. static const char tapname[] = "tap";
  166. static const char vmnetname[] = "vmnet";
  167. static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
  168. static int tundebug = 0;
  169. static int tundclone = 1;
  170. static int tap_allow_uopen = 0; /* allow user devfs cloning */
  171. static int tapuponopen = 0; /* IFF_UP on open() */
  172. static int tapdclone = 1; /* enable devfs cloning */
  173. static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
  174. SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
  175. static struct sx tun_ioctl_sx;
  176. SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
  177. SYSCTL_DECL(_net_link);
  178. /* tun */
  179. static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  180. "IP tunnel software network interface");
  181. SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
  182. "Enable legacy devfs interface creation");
  183. /* tap */
  184. static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  185. "Ethernet tunnel software network interface");
  186. SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0,
  187. "Enable legacy devfs interface creation for all users");
  188. SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
  189. "Bring interface up when /dev/tap is opened");
  190. SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
  191. "Enable legacy devfs interface creation");
  192. SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, "");
  193. static int tun_create_device(struct tuntap_driver *drv, int unit,
  194. struct ucred *cr, struct cdev **dev, const char *name);
  195. static int tun_busy_locked(struct tuntap_softc *tp);
  196. static void tun_unbusy_locked(struct tuntap_softc *tp);
  197. static int tun_busy(struct tuntap_softc *tp);
  198. static void tun_unbusy(struct tuntap_softc *tp);
  199. static int tuntap_name2info(const char *name, int *unit, int *flags);
  200. static void tunclone(void *arg, struct ucred *cred, char *name,
  201. int namelen, struct cdev **dev);
  202. static void tuncreate(struct cdev *dev);
  203. static void tundtor(void *data);
  204. static void tunrename(void *arg, struct ifnet *ifp);
  205. static int tunifioctl(struct ifnet *, u_long, caddr_t);
  206. static void tuninit(struct ifnet *);
  207. static void tunifinit(void *xtp);
  208. static int tuntapmodevent(module_t, int, void *);
  209. static int tunoutput(struct ifnet *, struct mbuf *,
  210. const struct sockaddr *, struct route *ro);
  211. static void tunstart(struct ifnet *);
  212. static void tunstart_l2(struct ifnet *);
  213. static int tun_clone_match(struct if_clone *ifc, const char *name);
  214. static int tap_clone_match(struct if_clone *ifc, const char *name);
  215. static int vmnet_clone_match(struct if_clone *ifc, const char *name);
  216. static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t);
  217. static int tun_clone_destroy(struct if_clone *, struct ifnet *);
  218. static void tun_vnethdr_set(struct ifnet *ifp, int vhdrlen);
  219. static d_open_t tunopen;
  220. static d_read_t tunread;
  221. static d_write_t tunwrite;
  222. static d_ioctl_t tunioctl;
  223. static d_poll_t tunpoll;
  224. static d_kqfilter_t tunkqfilter;
  225. static int tunkqread(struct knote *, long);
  226. static int tunkqwrite(struct knote *, long);
  227. static void tunkqdetach(struct knote *);
  228. static struct filterops tun_read_filterops = {
  229. .f_isfd = 1,
  230. .f_attach = NULL,
  231. .f_detach = tunkqdetach,
  232. .f_event = tunkqread,
  233. };
  234. static struct filterops tun_write_filterops = {
  235. .f_isfd = 1,
  236. .f_attach = NULL,
  237. .f_detach = tunkqdetach,
  238. .f_event = tunkqwrite,
  239. };
  240. static struct tuntap_driver {
  241. struct cdevsw cdevsw;
  242. int ident_flags;
  243. struct unrhdr *unrhdr;
  244. struct clonedevs *clones;
  245. ifc_match_t *clone_match_fn;
  246. ifc_create_t *clone_create_fn;
  247. ifc_destroy_t *clone_destroy_fn;
  248. } tuntap_drivers[] = {
  249. {
  250. .ident_flags = 0,
  251. .cdevsw = {
  252. .d_version = D_VERSION,
  253. .d_flags = D_NEEDMINOR,
  254. .d_open = tunopen,
  255. .d_read = tunread,
  256. .d_write = tunwrite,
  257. .d_ioctl = tunioctl,
  258. .d_poll = tunpoll,
  259. .d_kqfilter = tunkqfilter,
  260. .d_name = tunname,
  261. },
  262. .clone_match_fn = tun_clone_match,
  263. .clone_create_fn = tun_clone_create,
  264. .clone_destroy_fn = tun_clone_destroy,
  265. },
  266. {
  267. .ident_flags = TUN_L2,
  268. .cdevsw = {
  269. .d_version = D_VERSION,
  270. .d_flags = D_NEEDMINOR,
  271. .d_open = tunopen,
  272. .d_read = tunread,
  273. .d_write = tunwrite,
  274. .d_ioctl = tunioctl,
  275. .d_poll = tunpoll,
  276. .d_kqfilter = tunkqfilter,
  277. .d_name = tapname,
  278. },
  279. .clone_match_fn = tap_clone_match,
  280. .clone_create_fn = tun_clone_create,
  281. .clone_destroy_fn = tun_clone_destroy,
  282. },
  283. {
  284. .ident_flags = TUN_L2 | TUN_VMNET,
  285. .cdevsw = {
  286. .d_version = D_VERSION,
  287. .d_flags = D_NEEDMINOR,
  288. .d_open = tunopen,
  289. .d_read = tunread,
  290. .d_write = tunwrite,
  291. .d_ioctl = tunioctl,
  292. .d_poll = tunpoll,
  293. .d_kqfilter = tunkqfilter,
  294. .d_name = vmnetname,
  295. },
  296. .clone_match_fn = vmnet_clone_match,
  297. .clone_create_fn = tun_clone_create,
  298. .clone_destroy_fn = tun_clone_destroy,
  299. },
  300. };
  301. struct tuntap_driver_cloner {
  302. SLIST_ENTRY(tuntap_driver_cloner) link;
  303. struct tuntap_driver *drv;
  304. struct if_clone *cloner;
  305. };
  306. VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) =
  307. SLIST_HEAD_INITIALIZER(tuntap_driver_cloners);
  308. #define V_tuntap_driver_cloners VNET(tuntap_driver_cloners)
  309. /*
  310. * Mechanism for marking a tunnel device as busy so that we can safely do some
  311. * orthogonal operations (such as operations on devices) without racing against
  312. * tun_destroy. tun_destroy will wait on the condvar if we're at all busy or
  313. * open, to be woken up when the condition is alleviated.
  314. */
  315. static int
  316. tun_busy_locked(struct tuntap_softc *tp)
  317. {
  318. TUN_LOCK_ASSERT(tp);
  319. if ((tp->tun_flags & TUN_DYING) != 0) {
  320. /*
  321. * Perhaps unintuitive, but the device is busy going away.
  322. * Other interpretations of EBUSY from tun_busy make little
  323. * sense, since making a busy device even more busy doesn't
  324. * sound like a problem.
  325. */
  326. return (EBUSY);
  327. }
  328. ++tp->tun_busy;
  329. return (0);
  330. }
  331. static void
  332. tun_unbusy_locked(struct tuntap_softc *tp)
  333. {
  334. TUN_LOCK_ASSERT(tp);
  335. KASSERT(tp->tun_busy != 0, ("tun_unbusy: called for non-busy tunnel"));
  336. --tp->tun_busy;
  337. /* Wake up anything that may be waiting on our busy tunnel. */
  338. if (tp->tun_busy == 0)
  339. cv_broadcast(&tp->tun_cv);
  340. }
  341. static int
  342. tun_busy(struct tuntap_softc *tp)
  343. {
  344. int ret;
  345. TUN_LOCK(tp);
  346. ret = tun_busy_locked(tp);
  347. TUN_UNLOCK(tp);
  348. return (ret);
  349. }
  350. static void
  351. tun_unbusy(struct tuntap_softc *tp)
  352. {
  353. TUN_LOCK(tp);
  354. tun_unbusy_locked(tp);
  355. TUN_UNLOCK(tp);
  356. }
  357. /*
  358. * Sets unit and/or flags given the device name. Must be called with correct
  359. * vnet context.
  360. */
  361. static int
  362. tuntap_name2info(const char *name, int *outunit, int *outflags)
  363. {
  364. struct tuntap_driver *drv;
  365. struct tuntap_driver_cloner *drvc;
  366. char *dname;
  367. int flags, unit;
  368. bool found;
  369. if (name == NULL)
  370. return (EINVAL);
  371. /*
  372. * Needed for dev_stdclone, but dev_stdclone will not modify, it just
  373. * wants to be able to pass back a char * through the second param. We
  374. * will always set that as NULL here, so we'll fake it.
  375. */
  376. dname = __DECONST(char *, name);
  377. found = false;
  378. KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
  379. ("tuntap_driver_cloners failed to initialize"));
  380. SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
  381. KASSERT(drvc->drv != NULL,
  382. ("tuntap_driver_cloners entry not properly initialized"));
  383. drv = drvc->drv;
  384. if (strcmp(name, drv->cdevsw.d_name) == 0) {
  385. found = true;
  386. unit = -1;
  387. flags = drv->ident_flags;
  388. break;
  389. }
  390. if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) {
  391. found = true;
  392. flags = drv->ident_flags;
  393. break;
  394. }
  395. }
  396. if (!found)
  397. return (ENXIO);
  398. if (outunit != NULL)
  399. *outunit = unit;
  400. if (outflags != NULL)
  401. *outflags = flags;
  402. return (0);
  403. }
  404. /*
  405. * Get driver information from a set of flags specified. Masks the identifying
  406. * part of the flags and compares it against all of the available
  407. * tuntap_drivers. Must be called with correct vnet context.
  408. */
  409. static struct tuntap_driver *
  410. tuntap_driver_from_flags(int tun_flags)
  411. {
  412. struct tuntap_driver *drv;
  413. struct tuntap_driver_cloner *drvc;
  414. KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
  415. ("tuntap_driver_cloners failed to initialize"));
  416. SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
  417. KASSERT(drvc->drv != NULL,
  418. ("tuntap_driver_cloners entry not properly initialized"));
  419. drv = drvc->drv;
  420. if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags)
  421. return (drv);
  422. }
  423. return (NULL);
  424. }
  425. static int
  426. tun_clone_match(struct if_clone *ifc, const char *name)
  427. {
  428. int tunflags;
  429. if (tuntap_name2info(name, NULL, &tunflags) == 0) {
  430. if ((tunflags & TUN_L2) == 0)
  431. return (1);
  432. }
  433. return (0);
  434. }
  435. static int
  436. tap_clone_match(struct if_clone *ifc, const char *name)
  437. {
  438. int tunflags;
  439. if (tuntap_name2info(name, NULL, &tunflags) == 0) {
  440. if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2)
  441. return (1);
  442. }
  443. return (0);
  444. }
  445. static int
  446. vmnet_clone_match(struct if_clone *ifc, const char *name)
  447. {
  448. int tunflags;
  449. if (tuntap_name2info(name, NULL, &tunflags) == 0) {
  450. if ((tunflags & TUN_VMNET) != 0)
  451. return (1);
  452. }
  453. return (0);
  454. }
  455. static int
  456. tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
  457. {
  458. struct tuntap_driver *drv;
  459. struct cdev *dev;
  460. int err, i, tunflags, unit;
  461. tunflags = 0;
  462. /* The name here tells us exactly what we're creating */
  463. err = tuntap_name2info(name, &unit, &tunflags);
  464. if (err != 0)
  465. return (err);
  466. drv = tuntap_driver_from_flags(tunflags);
  467. if (drv == NULL)
  468. return (ENXIO);
  469. if (unit != -1) {
  470. /* If this unit number is still available that's okay. */
  471. if (alloc_unr_specific(drv->unrhdr, unit) == -1)
  472. return (EEXIST);
  473. } else {
  474. unit = alloc_unr(drv->unrhdr);
  475. }
  476. snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit);
  477. /* find any existing device, or allocate new unit number */
  478. dev = NULL;
  479. i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0);
  480. /* No preexisting struct cdev *, create one */
  481. if (i != 0)
  482. i = tun_create_device(drv, unit, NULL, &dev, name);
  483. if (i == 0)
  484. tuncreate(dev);
  485. return (i);
  486. }
  487. static void
  488. tunclone(void *arg, struct ucred *cred, char *name, int namelen,
  489. struct cdev **dev)
  490. {
  491. char devname[SPECNAMELEN + 1];
  492. struct tuntap_driver *drv;
  493. int append_unit, i, u, tunflags;
  494. bool mayclone;
  495. if (*dev != NULL)
  496. return;
  497. tunflags = 0;
  498. CURVNET_SET(CRED_TO_VNET(cred));
  499. if (tuntap_name2info(name, &u, &tunflags) != 0)
  500. goto out; /* Not recognized */
  501. if (u != -1 && u > IF_MAXUNIT)
  502. goto out; /* Unit number too high */
  503. mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0;
  504. if ((tunflags & TUN_L2) != 0) {
  505. /* tap/vmnet allow user open with a sysctl */
  506. mayclone = (mayclone || tap_allow_uopen) && tapdclone;
  507. } else {
  508. mayclone = mayclone && tundclone;
  509. }
  510. /*
  511. * If tun cloning is enabled, only the superuser can create an
  512. * interface.
  513. */
  514. if (!mayclone)
  515. goto out;
  516. if (u == -1)
  517. append_unit = 1;
  518. else
  519. append_unit = 0;
  520. drv = tuntap_driver_from_flags(tunflags);
  521. if (drv == NULL)
  522. goto out;
  523. /* find any existing device, or allocate new unit number */
  524. i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0);
  525. if (i) {
  526. if (append_unit) {
  527. namelen = snprintf(devname, sizeof(devname), "%s%d",
  528. name, u);
  529. name = devname;
  530. }
  531. i = tun_create_device(drv, u, cred, dev, name);
  532. }
  533. if (i == 0)
  534. if_clone_create(name, namelen, NULL);
  535. out:
  536. CURVNET_RESTORE();
  537. }
  538. static void
  539. tun_destroy(struct tuntap_softc *tp)
  540. {
  541. TUN_LOCK(tp);
  542. tp->tun_flags |= TUN_DYING;
  543. if (tp->tun_busy != 0)
  544. cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
  545. else
  546. TUN_UNLOCK(tp);
  547. CURVNET_SET(TUN2IFP(tp)->if_vnet);
  548. /* destroy_dev will take care of any alias. */
  549. destroy_dev(tp->tun_dev);
  550. seldrain(&tp->tun_rsel);
  551. knlist_clear(&tp->tun_rsel.si_note, 0);
  552. knlist_destroy(&tp->tun_rsel.si_note);
  553. if ((tp->tun_flags & TUN_L2) != 0) {
  554. ether_ifdetach(TUN2IFP(tp));
  555. } else {
  556. bpfdetach(TUN2IFP(tp));
  557. if_detach(TUN2IFP(tp));
  558. }
  559. sx_xlock(&tun_ioctl_sx);
  560. TUN2IFP(tp)->if_softc = NULL;
  561. sx_xunlock(&tun_ioctl_sx);
  562. free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
  563. if_free(TUN2IFP(tp));
  564. mtx_destroy(&tp->tun_mtx);
  565. cv_destroy(&tp->tun_cv);
  566. free(tp, M_TUN);
  567. CURVNET_RESTORE();
  568. }
  569. static int
  570. tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp)
  571. {
  572. struct tuntap_softc *tp = ifp->if_softc;
  573. mtx_lock(&tunmtx);
  574. TAILQ_REMOVE(&tunhead, tp, tun_list);
  575. mtx_unlock(&tunmtx);
  576. tun_destroy(tp);
  577. return (0);
  578. }
  579. static void
  580. vnet_tun_init(const void *unused __unused)
  581. {
  582. struct tuntap_driver *drv;
  583. struct tuntap_driver_cloner *drvc;
  584. int i;
  585. for (i = 0; i < nitems(tuntap_drivers); ++i) {
  586. drv = &tuntap_drivers[i];
  587. drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO);
  588. drvc->drv = drv;
  589. drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0,
  590. drv->clone_match_fn, drv->clone_create_fn,
  591. drv->clone_destroy_fn);
  592. SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link);
  593. };
  594. }
  595. VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
  596. vnet_tun_init, NULL);
  597. static void
  598. vnet_tun_uninit(const void *unused __unused)
  599. {
  600. struct tuntap_driver_cloner *drvc;
  601. while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) {
  602. drvc = SLIST_FIRST(&V_tuntap_driver_cloners);
  603. SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link);
  604. if_clone_detach(drvc->cloner);
  605. free(drvc, M_TUN);
  606. }
  607. }
  608. VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
  609. vnet_tun_uninit, NULL);
  610. static void
  611. tun_uninit(const void *unused __unused)
  612. {
  613. struct tuntap_driver *drv;
  614. struct tuntap_softc *tp;
  615. int i;
  616. EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag);
  617. EVENTHANDLER_DEREGISTER(dev_clone, clone_tag);
  618. drain_dev_clone_events();
  619. mtx_lock(&tunmtx);
  620. while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
  621. TAILQ_REMOVE(&tunhead, tp, tun_list);
  622. mtx_unlock(&tunmtx);
  623. tun_destroy(tp);
  624. mtx_lock(&tunmtx);
  625. }
  626. mtx_unlock(&tunmtx);
  627. for (i = 0; i < nitems(tuntap_drivers); ++i) {
  628. drv = &tuntap_drivers[i];
  629. delete_unrhdr(drv->unrhdr);
  630. clone_cleanup(&drv->clones);
  631. }
  632. mtx_destroy(&tunmtx);
  633. }
  634. SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
  635. static struct tuntap_driver *
  636. tuntap_driver_from_ifnet(const struct ifnet *ifp)
  637. {
  638. struct tuntap_driver *drv;
  639. int i;
  640. if (ifp == NULL)
  641. return (NULL);
  642. for (i = 0; i < nitems(tuntap_drivers); ++i) {
  643. drv = &tuntap_drivers[i];
  644. if (strcmp(ifp->if_dname, drv->cdevsw.d_name) == 0)
  645. return (drv);
  646. }
  647. return (NULL);
  648. }
  649. static int
  650. tuntapmodevent(module_t mod, int type, void *data)
  651. {
  652. struct tuntap_driver *drv;
  653. int i;
  654. switch (type) {
  655. case MOD_LOAD:
  656. mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
  657. for (i = 0; i < nitems(tuntap_drivers); ++i) {
  658. drv = &tuntap_drivers[i];
  659. clone_setup(&drv->clones);
  660. drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
  661. }
  662. arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
  663. tunrename, 0, 1000);
  664. if (arrival_tag == NULL)
  665. return (ENOMEM);
  666. clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
  667. if (clone_tag == NULL)
  668. return (ENOMEM);
  669. break;
  670. case MOD_UNLOAD:
  671. /* See tun_uninit, so it's done after the vnet_sysuninit() */
  672. break;
  673. default:
  674. return EOPNOTSUPP;
  675. }
  676. return 0;
  677. }
  678. static moduledata_t tuntap_mod = {
  679. "if_tuntap",
  680. tuntapmodevent,
  681. 0
  682. };
  683. /* We'll only ever have these two, so no need for a macro. */
  684. static moduledata_t tun_mod = { "if_tun", NULL, 0 };
  685. static moduledata_t tap_mod = { "if_tap", NULL, 0 };
  686. DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  687. MODULE_VERSION(if_tuntap, 1);
  688. DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  689. MODULE_VERSION(if_tun, 1);
  690. DECLARE_MODULE(if_tap, tap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  691. MODULE_VERSION(if_tap, 1);
  692. static int
  693. tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr,
  694. struct cdev **dev, const char *name)
  695. {
  696. struct make_dev_args args;
  697. struct tuntap_softc *tp;
  698. int error;
  699. tp = malloc(sizeof(*tp), M_TUN, M_WAITOK | M_ZERO);
  700. mtx_init(&tp->tun_mtx, "tun_mtx", NULL, MTX_DEF);
  701. cv_init(&tp->tun_cv, "tun_condvar");
  702. tp->tun_flags = drv->ident_flags;
  703. tp->tun_drv = drv;
  704. make_dev_args_init(&args);
  705. if (cr != NULL)
  706. args.mda_flags = MAKEDEV_REF;
  707. args.mda_devsw = &drv->cdevsw;
  708. args.mda_cr = cr;
  709. args.mda_uid = UID_UUCP;
  710. args.mda_gid = GID_DIALER;
  711. args.mda_mode = 0600;
  712. args.mda_unit = unit;
  713. args.mda_si_drv1 = tp;
  714. error = make_dev_s(&args, dev, "%s", name);
  715. if (error != 0) {
  716. free(tp, M_TUN);
  717. return (error);
  718. }
  719. KASSERT((*dev)->si_drv1 != NULL,
  720. ("Failed to set si_drv1 at %s creation", name));
  721. tp->tun_dev = *dev;
  722. knlist_init_mtx(&tp->tun_rsel.si_note, &tp->tun_mtx);
  723. mtx_lock(&tunmtx);
  724. TAILQ_INSERT_TAIL(&tunhead, tp, tun_list);
  725. mtx_unlock(&tunmtx);
  726. return (0);
  727. }
  728. static void
  729. tunstart(struct ifnet *ifp)
  730. {
  731. struct tuntap_softc *tp = ifp->if_softc;
  732. struct mbuf *m;
  733. TUNDEBUG(ifp, "starting\n");
  734. if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
  735. IFQ_LOCK(&ifp->if_snd);
  736. IFQ_POLL_NOLOCK(&ifp->if_snd, m);
  737. if (m == NULL) {
  738. IFQ_UNLOCK(&ifp->if_snd);
  739. return;
  740. }
  741. IFQ_UNLOCK(&ifp->if_snd);
  742. }
  743. TUN_LOCK(tp);
  744. if (tp->tun_flags & TUN_RWAIT) {
  745. tp->tun_flags &= ~TUN_RWAIT;
  746. wakeup(tp);
  747. }
  748. selwakeuppri(&tp->tun_rsel, PZERO + 1);
  749. KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
  750. if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
  751. TUN_UNLOCK(tp);
  752. pgsigio(&tp->tun_sigio, SIGIO, 0);
  753. } else
  754. TUN_UNLOCK(tp);
  755. }
  756. /*
  757. * tunstart_l2
  758. *
  759. * queue packets from higher level ready to put out
  760. */
  761. static void
  762. tunstart_l2(struct ifnet *ifp)
  763. {
  764. struct tuntap_softc *tp = ifp->if_softc;
  765. TUNDEBUG(ifp, "starting\n");
  766. /*
  767. * do not junk pending output if we are in VMnet mode.
  768. * XXX: can this do any harm because of queue overflow?
  769. */
  770. TUN_LOCK(tp);
  771. if (((tp->tun_flags & TUN_VMNET) == 0) &&
  772. ((tp->tun_flags & TUN_READY) != TUN_READY)) {
  773. struct mbuf *m;
  774. /* Unlocked read. */
  775. TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags);
  776. for (;;) {
  777. IF_DEQUEUE(&ifp->if_snd, m);
  778. if (m != NULL) {
  779. m_freem(m);
  780. if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  781. } else
  782. break;
  783. }
  784. TUN_UNLOCK(tp);
  785. return;
  786. }
  787. ifp->if_drv_flags |= IFF_DRV_OACTIVE;
  788. if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
  789. if (tp->tun_flags & TUN_RWAIT) {
  790. tp->tun_flags &= ~TUN_RWAIT;
  791. wakeup(tp);
  792. }
  793. if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) {
  794. TUN_UNLOCK(tp);
  795. pgsigio(&tp->tun_sigio, SIGIO, 0);
  796. TUN_LOCK(tp);
  797. }
  798. selwakeuppri(&tp->tun_rsel, PZERO+1);
  799. KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
  800. if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
  801. }
  802. ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
  803. TUN_UNLOCK(tp);
  804. } /* tunstart_l2 */
  805. /* XXX: should return an error code so it can fail. */
  806. static void
  807. tuncreate(struct cdev *dev)
  808. {
  809. struct tuntap_driver *drv;
  810. struct tuntap_softc *tp;
  811. struct ifnet *ifp;
  812. struct ether_addr eaddr;
  813. int iflags;
  814. u_char type;
  815. tp = dev->si_drv1;
  816. KASSERT(tp != NULL,
  817. ("si_drv1 should have been initialized at creation"));
  818. drv = tp->tun_drv;
  819. iflags = IFF_MULTICAST;
  820. if ((tp->tun_flags & TUN_L2) != 0) {
  821. type = IFT_ETHER;
  822. iflags |= IFF_BROADCAST | IFF_SIMPLEX;
  823. } else {
  824. type = IFT_PPP;
  825. iflags |= IFF_POINTOPOINT;
  826. }
  827. ifp = tp->tun_ifp = if_alloc(type);
  828. if (ifp == NULL)
  829. panic("%s%d: failed to if_alloc() interface.\n",
  830. drv->cdevsw.d_name, dev2unit(dev));
  831. ifp->if_softc = tp;
  832. if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev));
  833. ifp->if_ioctl = tunifioctl;
  834. ifp->if_flags = iflags;
  835. IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
  836. ifp->if_capabilities |= IFCAP_LINKSTATE;
  837. ifp->if_capenable |= IFCAP_LINKSTATE;
  838. if ((tp->tun_flags & TUN_L2) != 0) {
  839. ifp->if_init = tunifinit;
  840. ifp->if_start = tunstart_l2;
  841. ether_gen_addr(ifp, &eaddr);
  842. ether_ifattach(ifp, eaddr.octet);
  843. } else {
  844. ifp->if_mtu = TUNMTU;
  845. ifp->if_start = tunstart;
  846. ifp->if_output = tunoutput;
  847. ifp->if_snd.ifq_drv_maxlen = 0;
  848. IFQ_SET_READY(&ifp->if_snd);
  849. if_attach(ifp);
  850. bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
  851. }
  852. TUN_LOCK(tp);
  853. tp->tun_flags |= TUN_INITED;
  854. TUN_UNLOCK(tp);
  855. TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
  856. ifp->if_xname, dev2unit(dev));
  857. }
  858. static void
  859. tunrename(void *arg __unused, struct ifnet *ifp)
  860. {
  861. struct tuntap_softc *tp;
  862. int error;
  863. if ((ifp->if_flags & IFF_RENAMING) == 0)
  864. return;
  865. if (tuntap_driver_from_ifnet(ifp) == NULL)
  866. return;
  867. /*
  868. * We need to grab the ioctl sx long enough to make sure the softc is
  869. * still there. If it is, we can safely try to busy the tun device.
  870. * The busy may fail if the device is currently dying, in which case
  871. * we do nothing. If it doesn't fail, the busy count stops the device
  872. * from dying until we've created the alias (that will then be
  873. * subsequently destroyed).
  874. */
  875. sx_xlock(&tun_ioctl_sx);
  876. tp = ifp->if_softc;
  877. if (tp == NULL) {
  878. sx_xunlock(&tun_ioctl_sx);
  879. return;
  880. }
  881. error = tun_busy(tp);
  882. sx_xunlock(&tun_ioctl_sx);
  883. if (error != 0)
  884. return;
  885. if (tp->tun_alias != NULL) {
  886. destroy_dev(tp->tun_alias);
  887. tp->tun_alias = NULL;
  888. }
  889. if (strcmp(ifp->if_xname, tp->tun_dev->si_name) == 0)
  890. goto out;
  891. /*
  892. * Failure's ok, aliases are created on a best effort basis. If a
  893. * tun user/consumer decides to rename the interface to conflict with
  894. * another device (non-ifnet) on the system, we will assume they know
  895. * what they are doing. make_dev_alias_p won't touch tun_alias on
  896. * failure, so we use it but ignore the return value.
  897. */
  898. make_dev_alias_p(MAKEDEV_CHECKNAME, &tp->tun_alias, tp->tun_dev, "%s",
  899. ifp->if_xname);
  900. out:
  901. tun_unbusy(tp);
  902. }
  903. static int
  904. tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
  905. {
  906. struct ifnet *ifp;
  907. struct tuntap_softc *tp;
  908. int error, tunflags;
  909. tunflags = 0;
  910. CURVNET_SET(TD_TO_VNET(td));
  911. error = tuntap_name2info(dev->si_name, NULL, &tunflags);
  912. if (error != 0) {
  913. CURVNET_RESTORE();
  914. return (error); /* Shouldn't happen */
  915. }
  916. tp = dev->si_drv1;
  917. KASSERT(tp != NULL,
  918. ("si_drv1 should have been initialized at creation"));
  919. TUN_LOCK(tp);
  920. if ((tp->tun_flags & TUN_INITED) == 0) {
  921. TUN_UNLOCK(tp);
  922. CURVNET_RESTORE();
  923. return (ENXIO);
  924. }
  925. if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
  926. TUN_UNLOCK(tp);
  927. CURVNET_RESTORE();
  928. return (EBUSY);
  929. }
  930. error = tun_busy_locked(tp);
  931. KASSERT(error == 0, ("Must be able to busy an unopen tunnel"));
  932. ifp = TUN2IFP(tp);
  933. if ((tp->tun_flags & TUN_L2) != 0) {
  934. bcopy(IF_LLADDR(ifp), tp->tun_ether.octet,
  935. sizeof(tp->tun_ether.octet));
  936. ifp->if_drv_flags |= IFF_DRV_RUNNING;
  937. ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
  938. if (tapuponopen)
  939. ifp->if_flags |= IFF_UP;
  940. }
  941. tp->tun_pid = td->td_proc->p_pid;
  942. tp->tun_flags |= TUN_OPEN;
  943. if_link_state_change(ifp, LINK_STATE_UP);
  944. TUNDEBUG(ifp, "open\n");
  945. TUN_UNLOCK(tp);
  946. /*
  947. * This can fail with either ENOENT or EBUSY. This is in the middle of
  948. * d_open, so ENOENT should not be possible. EBUSY is possible, but
  949. * the only cdevpriv dtor being set will be tundtor and the softc being
  950. * passed is constant for a given cdev. We ignore the possible error
  951. * because of this as either "unlikely" or "not actually a problem."
  952. */
  953. (void)devfs_set_cdevpriv(tp, tundtor);
  954. CURVNET_RESTORE();
  955. return (0);
  956. }
  957. /*
  958. * tundtor - tear down the device - mark i/f down & delete
  959. * routing info
  960. */
  961. static void
  962. tundtor(void *data)
  963. {
  964. struct proc *p;
  965. struct tuntap_softc *tp;
  966. struct ifnet *ifp;
  967. bool l2tun;
  968. tp = data;
  969. p = curproc;
  970. ifp = TUN2IFP(tp);
  971. TUN_LOCK(tp);
  972. /*
  973. * Realistically, we can't be obstinate here. This only means that the
  974. * tuntap device was closed out of order, and the last closer wasn't the
  975. * controller. These are still good to know about, though, as software
  976. * should avoid multiple processes with a tuntap device open and
  977. * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in
  978. * parent).
  979. */
  980. if (p->p_pid != tp->tun_pid) {
  981. log(LOG_INFO,
  982. "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n",
  983. p->p_pid, p->p_comm, tp->tun_dev->si_name);
  984. }
  985. /*
  986. * junk all pending output
  987. */
  988. CURVNET_SET(ifp->if_vnet);
  989. l2tun = false;
  990. if ((tp->tun_flags & TUN_L2) != 0) {
  991. l2tun = true;
  992. IF_DRAIN(&ifp->if_snd);
  993. } else {
  994. IFQ_PURGE(&ifp->if_snd);
  995. }
  996. /* For vmnet, we won't do most of the address/route bits */
  997. if ((tp->tun_flags & TUN_VMNET) != 0 ||
  998. (l2tun && (ifp->if_flags & IFF_LINK0) != 0))
  999. goto out;
  1000. if (ifp->if_flags & IFF_UP) {
  1001. TUN_UNLOCK(tp);
  1002. if_down(ifp);
  1003. TUN_LOCK(tp);
  1004. }
  1005. /* Delete all addresses and routes which reference this interface. */
  1006. if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
  1007. ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
  1008. TUN_UNLOCK(tp);
  1009. if_purgeaddrs(ifp);
  1010. TUN_LOCK(tp);
  1011. }
  1012. out:
  1013. if_link_state_change(ifp, LINK_STATE_DOWN);
  1014. CURVNET_RESTORE();
  1015. funsetown(&tp->tun_sigio);
  1016. selwakeuppri(&tp->tun_rsel, PZERO + 1);
  1017. KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
  1018. TUNDEBUG (ifp, "closed\n");
  1019. tp->tun_flags &= ~TUN_OPEN;
  1020. tp->tun_pid = 0;
  1021. tun_vnethdr_set(ifp, 0);
  1022. tun_unbusy_locked(tp);
  1023. TUN_UNLOCK(tp);
  1024. }
  1025. static void
  1026. tuninit(struct ifnet *ifp)
  1027. {
  1028. struct tuntap_softc *tp = ifp->if_softc;
  1029. TUNDEBUG(ifp, "tuninit\n");
  1030. TUN_LOCK(tp);
  1031. ifp->if_drv_flags |= IFF_DRV_RUNNING;
  1032. if ((tp->tun_flags & TUN_L2) == 0) {
  1033. ifp->if_flags |= IFF_UP;
  1034. getmicrotime(&ifp->if_lastchange);
  1035. TUN_UNLOCK(tp);
  1036. } else {
  1037. ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
  1038. TUN_UNLOCK(tp);
  1039. /* attempt to start output */
  1040. tunstart_l2(ifp);
  1041. }
  1042. }
  1043. /*
  1044. * Used only for l2 tunnel.
  1045. */
  1046. static void
  1047. tunifinit(void *xtp)
  1048. {
  1049. struct tuntap_softc *tp;
  1050. tp = (struct tuntap_softc *)xtp;
  1051. tuninit(tp->tun_ifp);
  1052. }
  1053. /*
  1054. * To be called under TUN_LOCK. Update ifp->if_hwassist according to the
  1055. * current value of ifp->if_capenable.
  1056. */
  1057. static void
  1058. tun_caps_changed(struct ifnet *ifp)
  1059. {
  1060. uint64_t hwassist = 0;
  1061. TUN_LOCK_ASSERT((struct tuntap_softc *)ifp->if_softc);
  1062. if (ifp->if_capenable & IFCAP_TXCSUM)
  1063. hwassist |= CSUM_TCP | CSUM_UDP;
  1064. if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
  1065. hwassist |= CSUM_TCP_IPV6
  1066. | CSUM_UDP_IPV6;
  1067. if (ifp->if_capenable & IFCAP_TSO4)
  1068. hwassist |= CSUM_IP_TSO;
  1069. if (ifp->if_capenable & IFCAP_TSO6)
  1070. hwassist |= CSUM_IP6_TSO;
  1071. ifp->if_hwassist = hwassist;
  1072. }
  1073. /*
  1074. * To be called under TUN_LOCK. Update tp->tun_vhdrlen and adjust
  1075. * if_capabilities and if_capenable as needed.
  1076. */
  1077. static void
  1078. tun_vnethdr_set(struct ifnet *ifp, int vhdrlen)
  1079. {
  1080. struct tuntap_softc *tp = ifp->if_softc;
  1081. TUN_LOCK_ASSERT(tp);
  1082. if (tp->tun_vhdrlen == vhdrlen)
  1083. return;
  1084. /*
  1085. * Update if_capabilities to reflect the
  1086. * functionalities offered by the virtio-net
  1087. * header.
  1088. */
  1089. if (vhdrlen != 0)
  1090. ifp->if_capabilities |=
  1091. TAP_VNET_HDR_CAPS;
  1092. else
  1093. ifp->if_capabilities &=
  1094. ~TAP_VNET_HDR_CAPS;
  1095. /*
  1096. * Disable any capabilities that we don't
  1097. * support anymore.
  1098. */
  1099. ifp->if_capenable &= ifp->if_capabilities;
  1100. tun_caps_changed(ifp);
  1101. tp->tun_vhdrlen = vhdrlen;
  1102. TUNDEBUG(ifp, "vnet_hdr_len=%d, if_capabilities=%x\n",
  1103. vhdrlen, ifp->if_capabilities);
  1104. }
  1105. /*
  1106. * Process an ioctl request.
  1107. */
  1108. static int
  1109. tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  1110. {
  1111. struct ifreq *ifr = (struct ifreq *)data;
  1112. struct tuntap_softc *tp;
  1113. struct ifstat *ifs;
  1114. struct ifmediareq *ifmr;
  1115. int dummy, error = 0;
  1116. bool l2tun;
  1117. ifmr = NULL;
  1118. sx_xlock(&tun_ioctl_sx);
  1119. tp = ifp->if_softc;
  1120. if (tp == NULL) {
  1121. error = ENXIO;
  1122. goto bad;
  1123. }
  1124. l2tun = (tp->tun_flags & TUN_L2) != 0;
  1125. switch(cmd) {
  1126. case SIOCGIFSTATUS:
  1127. ifs = (struct ifstat *)data;
  1128. TUN_LOCK(tp);
  1129. if (tp->tun_pid)
  1130. snprintf(ifs->ascii, sizeof(ifs->ascii),
  1131. "\tOpened by PID %d\n", tp->tun_pid);
  1132. else
  1133. ifs->ascii[0] = '\0';
  1134. TUN_UNLOCK(tp);
  1135. break;
  1136. case SIOCSIFADDR:
  1137. if (l2tun)
  1138. error = ether_ioctl(ifp, cmd, data);
  1139. else
  1140. tuninit(ifp);
  1141. if (error == 0)
  1142. TUNDEBUG(ifp, "address set\n");
  1143. break;
  1144. case SIOCSIFMTU:
  1145. ifp->if_mtu = ifr->ifr_mtu;
  1146. TUNDEBUG(ifp, "mtu set\n");
  1147. break;
  1148. case SIOCSIFFLAGS:
  1149. case SIOCADDMULTI:
  1150. case SIOCDELMULTI:
  1151. break;
  1152. case SIOCGIFMEDIA:
  1153. if (!l2tun) {
  1154. error = EINVAL;
  1155. break;
  1156. }
  1157. ifmr = (struct ifmediareq *)data;
  1158. dummy = ifmr->ifm_count;
  1159. ifmr->ifm_count = 1;
  1160. ifmr->ifm_status = IFM_AVALID;
  1161. ifmr->ifm_active = IFM_ETHER;
  1162. if (tp->tun_flags & TUN_OPEN)
  1163. ifmr->ifm_status |= IFM_ACTIVE;
  1164. ifmr->ifm_current = ifmr->ifm_active;
  1165. if (dummy >= 1) {
  1166. int media = IFM_ETHER;
  1167. error = copyout(&media, ifmr->ifm_ulist, sizeof(int));
  1168. }
  1169. break;
  1170. case SIOCSIFCAP:
  1171. TUN_LOCK(tp);
  1172. ifp->if_capenable = ifr->ifr_reqcap;
  1173. tun_caps_changed(ifp);
  1174. TUN_UNLOCK(tp);
  1175. VLAN_CAPABILITIES(ifp);
  1176. break;
  1177. default:
  1178. if (l2tun) {
  1179. error = ether_ioctl(ifp, cmd, data);
  1180. } else {
  1181. error = EINVAL;
  1182. }
  1183. }
  1184. bad:
  1185. sx_xunlock(&tun_ioctl_sx);
  1186. return (error);
  1187. }
  1188. /*
  1189. * tunoutput - queue packets from higher level ready to put out.
  1190. */
  1191. static int
  1192. tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
  1193. struct route *ro)
  1194. {
  1195. struct tuntap_softc *tp = ifp->if_softc;
  1196. u_short cached_tun_flags;
  1197. int error;
  1198. u_int32_t af;
  1199. TUNDEBUG (ifp, "tunoutput\n");
  1200. #ifdef MAC
  1201. error = mac_ifnet_check_transmit(ifp, m0);
  1202. if (error) {
  1203. m_freem(m0);
  1204. return (error);
  1205. }
  1206. #endif
  1207. /* Could be unlocked read? */
  1208. TUN_LOCK(tp);
  1209. cached_tun_flags = tp->tun_flags;
  1210. TUN_UNLOCK(tp);
  1211. if ((cached_tun_flags & TUN_READY) != TUN_READY) {
  1212. TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
  1213. m_freem (m0);
  1214. return (EHOSTDOWN);
  1215. }
  1216. if ((ifp->if_flags & IFF_UP) != IFF_UP) {
  1217. m_freem (m0);
  1218. return (EHOSTDOWN);
  1219. }
  1220. /* BPF writes need to be handled specially. */
  1221. if (dst->sa_family == AF_UNSPEC)
  1222. bcopy(dst->sa_data, &af, sizeof(af));
  1223. else
  1224. af = dst->sa_family;
  1225. if (bpf_peers_present(ifp->if_bpf))
  1226. bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
  1227. /* prepend sockaddr? this may abort if the mbuf allocation fails */
  1228. if (cached_tun_flags & TUN_LMODE) {
  1229. /* allocate space for sockaddr */
  1230. M_PREPEND(m0, dst->sa_len, M_NOWAIT);
  1231. /* if allocation failed drop packet */
  1232. if (m0 == NULL) {
  1233. if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
  1234. if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  1235. return (ENOBUFS);
  1236. } else {
  1237. bcopy(dst, m0->m_data, dst->sa_len);
  1238. }
  1239. }
  1240. if (cached_tun_flags & TUN_IFHEAD) {
  1241. /* Prepend the address family */
  1242. M_PREPEND(m0, 4, M_NOWAIT);
  1243. /* if allocation failed drop packet */
  1244. if (m0 == NULL) {
  1245. if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
  1246. if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
  1247. return (ENOBUFS);
  1248. } else
  1249. *(u_int32_t *)m0->m_data = htonl(af);
  1250. } else {
  1251. #ifdef INET
  1252. if (af != AF_INET)
  1253. #endif
  1254. {
  1255. m_freem(m0);
  1256. return (EAFNOSUPPORT);
  1257. }
  1258. }
  1259. error = (ifp->if_transmit)(ifp, m0);
  1260. if (error)
  1261. return (ENOBUFS);
  1262. if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
  1263. return (0);
  1264. }
  1265. /*
  1266. * the cdevsw interface is now pretty minimal.
  1267. */
  1268. static int
  1269. tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
  1270. struct thread *td)
  1271. {
  1272. struct ifreq ifr, *ifrp;
  1273. struct tuntap_softc *tp = dev->si_drv1;
  1274. struct ifnet *ifp = TUN2IFP(tp);
  1275. struct tuninfo *tunp;
  1276. int error, iflags, ival;
  1277. bool l2tun;
  1278. l2tun = (tp->tun_flags & TUN_L2) != 0;
  1279. if (l2tun) {
  1280. /* tap specific ioctls */
  1281. switch(cmd) {
  1282. /* VMware/VMnet port ioctl's */
  1283. #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
  1284. defined(COMPAT_FREEBSD4)
  1285. case _IO('V', 0):
  1286. ival = IOCPARM_IVAL(data);
  1287. data = (caddr_t)&ival;
  1288. /* FALLTHROUGH */
  1289. #endif
  1290. case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
  1291. iflags = *(int *)data;
  1292. iflags &= TUN_VMIO_FLAG_MASK;
  1293. iflags &= ~IFF_CANTCHANGE;
  1294. iflags |= IFF_UP;
  1295. TUN_LOCK(tp);
  1296. ifp->if_flags = iflags |
  1297. (ifp->if_flags & IFF_CANTCHANGE);
  1298. TUN_UNLOCK(tp);
  1299. return (0);
  1300. case SIOCGIFADDR: /* get MAC address of the remote side */
  1301. TUN_LOCK(tp);
  1302. bcopy(&tp->tun_ether.octet, data,
  1303. sizeof(tp->tun_ether.octet));
  1304. TUN_UNLOCK(tp);
  1305. return (0);
  1306. case SIOCSIFADDR: /* set MAC address of the remote side */
  1307. TUN_LOCK(tp);
  1308. bcopy(data, &tp->tun_ether.octet,
  1309. sizeof(tp->tun_ether.octet));
  1310. TUN_UNLOCK(tp);
  1311. return (0);
  1312. case TAPSVNETHDR:
  1313. ival = *(int *)data;
  1314. if (ival != 0 &&
  1315. ival != sizeof(struct virtio_net_hdr) &&
  1316. ival != sizeof(struct virtio_net_hdr_mrg_rxbuf)) {
  1317. return (EINVAL);
  1318. }
  1319. TUN_LOCK(tp);
  1320. tun_vnethdr_set(ifp, ival);
  1321. TUN_UNLOCK(tp);
  1322. return (0);
  1323. case TAPGVNETHDR:
  1324. TUN_LOCK(tp);
  1325. *(int *)data = tp->tun_vhdrlen;
  1326. TUN_UNLOCK(tp);
  1327. return (0);
  1328. }
  1329. /* Fall through to the common ioctls if unhandled */
  1330. } else {
  1331. switch (cmd) {
  1332. case TUNSLMODE:
  1333. TUN_LOCK(tp);
  1334. if (*(int *)data) {
  1335. tp->tun_flags |= TUN_LMODE;
  1336. tp->tun_flags &= ~TUN_IFHEAD;
  1337. } else
  1338. tp->tun_flags &= ~TUN_LMODE;
  1339. TUN_UNLOCK(tp);
  1340. return (0);
  1341. case TUNSIFHEAD:
  1342. TUN_LOCK(tp);
  1343. if (*(int *)data) {
  1344. tp->tun_flags |= TUN_IFHEAD;
  1345. tp->tun_flags &= ~TUN_LMODE;
  1346. } else
  1347. tp->tun_flags &= ~TUN_IFHEAD;
  1348. TUN_UNLOCK(tp);
  1349. return (0);
  1350. case TUNGIFHEAD:
  1351. TUN_LOCK(tp);
  1352. *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
  1353. TUN_UNLOCK(tp);
  1354. return (0);
  1355. case TUNSIFMODE:
  1356. /* deny this if UP */
  1357. if (TUN2IFP(tp)->if_flags & IFF_UP)
  1358. return (EBUSY);
  1359. switch (*(int *)data & ~IFF_MULTICAST) {
  1360. case IFF_POINTOPOINT:
  1361. case IFF_BROADCAST:
  1362. TUN_LOCK(tp);
  1363. TUN2IFP(tp)->if_flags &=
  1364. ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
  1365. TUN2IFP(tp)->if_flags |= *(int *)data;
  1366. TUN_UNLOCK(tp);
  1367. break;
  1368. default:
  1369. return (EINVAL);
  1370. }
  1371. return (0);
  1372. case TUNSIFPID:
  1373. TUN_LOCK(tp);
  1374. tp->tun_pid = curthread->td_proc->p_pid;
  1375. TUN_UNLOCK(tp);
  1376. return (0);
  1377. }
  1378. /* Fall through to the common ioctls if unhandled */
  1379. }
  1380. switch (cmd) {
  1381. case TUNGIFNAME:
  1382. ifrp = (struct ifreq *)data;
  1383. strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
  1384. return (0);
  1385. case TUNSIFINFO:
  1386. tunp = (struct tuninfo *)data;
  1387. if (TUN2IFP(tp)->if_type != tunp->type)
  1388. return (EPROTOTYPE);
  1389. TUN_LOCK(tp);
  1390. if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
  1391. strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
  1392. ifr.ifr_mtu = tunp->mtu;
  1393. CURVNET_SET(TUN2IFP(tp)->if_vnet);
  1394. error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
  1395. (caddr_t)&ifr, td);
  1396. CURVNET_RESTORE();
  1397. if (error) {
  1398. TUN_UNLOCK(tp);
  1399. return (error);
  1400. }
  1401. }
  1402. TUN2IFP(tp)->if_baudrate = tunp->baudrate;
  1403. TUN_UNLOCK(tp);
  1404. break;
  1405. case TUNGIFINFO:
  1406. tunp = (struct tuninfo *)data;
  1407. TUN_LOCK(tp);
  1408. tunp->mtu = TUN2IFP(tp)->if_mtu;
  1409. tunp->type = TUN2IFP(tp)->if_type;
  1410. tunp->baudrate = TUN2IFP(tp)->if_baudrate;
  1411. TUN_UNLOCK(tp);
  1412. break;
  1413. case TUNSDEBUG:
  1414. tundebug = *(int *)data;
  1415. break;
  1416. case TUNGDEBUG:
  1417. *(int *)data = tundebug;
  1418. break;
  1419. case FIONBIO:
  1420. break;
  1421. case FIOASYNC:
  1422. TUN_LOCK(tp);
  1423. if (*(int *)data)
  1424. tp->tun_flags |= TUN_ASYNC;
  1425. else
  1426. tp->tun_flags &= ~TUN_ASYNC;
  1427. TUN_UNLOCK(tp);
  1428. break;
  1429. case FIONREAD:
  1430. if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
  1431. struct mbuf *mb;
  1432. IFQ_LOCK(&TUN2IFP(tp)->if_snd);
  1433. IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
  1434. for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
  1435. *(int *)data += mb->m_len;
  1436. IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
  1437. } else
  1438. *(int *)data = 0;
  1439. break;
  1440. case FIOSETOWN:
  1441. return (fsetown(*(int *)data, &tp->tun_sigio));
  1442. case FIOGETOWN:
  1443. *(int *)data = fgetown(&tp->tun_sigio);
  1444. return (0);
  1445. /* This is deprecated, FIOSETOWN should be used instead. */
  1446. case TIOCSPGRP:
  1447. return (fsetown(-(*(int *)data), &tp->tun_sigio));
  1448. /* This is deprecated, FIOGETOWN should be used instead. */
  1449. case TIOCGPGRP:
  1450. *(int *)data = -fgetown(&tp->tun_sigio);
  1451. return (0);
  1452. default:
  1453. return (ENOTTY);
  1454. }
  1455. return (0);
  1456. }
  1457. /*
  1458. * The cdevsw read interface - reads a packet at a time, or at
  1459. * least as much of a packet as can be read.
  1460. */
  1461. static int
  1462. tunread(struct cdev *dev, struct uio *uio, int flag)
  1463. {
  1464. struct tuntap_softc *tp = dev->si_drv1;
  1465. struct ifnet *ifp = TUN2IFP(tp);
  1466. struct mbuf *m;
  1467. size_t len;
  1468. int error = 0;
  1469. TUNDEBUG (ifp, "read\n");
  1470. TUN_LOCK(tp);
  1471. if ((tp->tun_flags & TUN_READY) != TUN_READY) {
  1472. TUN_UNLOCK(tp);
  1473. TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
  1474. return (EHOSTDOWN);
  1475. }
  1476. tp->tun_flags &= ~TUN_RWAIT;
  1477. for (;;) {
  1478. IFQ_DEQUEUE(&ifp->if_snd, m);
  1479. if (m != NULL)
  1480. break;
  1481. if (flag & O_NONBLOCK) {
  1482. TUN_UNLOCK(tp);
  1483. return (EWOULDBLOCK);
  1484. }
  1485. tp->tun_flags |= TUN_RWAIT;
  1486. error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
  1487. "tunread", 0);
  1488. if (error != 0) {
  1489. TUN_UNLOCK(tp);
  1490. return (error);
  1491. }
  1492. }
  1493. TUN_UNLOCK(tp);
  1494. if ((tp->tun_flags & TUN_L2) != 0)
  1495. BPF_MTAP(ifp, m);
  1496. len = min(tp->tun_vhdrlen, uio->uio_resid);
  1497. if (len > 0) {
  1498. struct virtio_net_hdr_mrg_rxbuf vhdr;
  1499. bzero(&vhdr, sizeof(vhdr));
  1500. if (m->m_pkthdr.csum_flags & TAP_ALL_OFFLOAD) {
  1501. m = virtio_net_tx_offload(ifp, m, false, &vhdr.hdr);
  1502. }
  1503. TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, "
  1504. "gs %u, cs %u, co %u\n", vhdr.hdr.flags,
  1505. vhdr.hdr.gso_type, vhdr.hdr.hdr_len,
  1506. vhdr.hdr.gso_size, vhdr.hdr.csum_start,
  1507. vhdr.hdr.csum_offset);
  1508. error = uiomove(&vhdr, len, uio);
  1509. }
  1510. while (m && uio->uio_resid > 0 && error == 0) {
  1511. len = min(uio->uio_resid, m->m_len);
  1512. if (len != 0)
  1513. error = uiomove(mtod(m, void *), len, uio);
  1514. m = m_free(m);
  1515. }
  1516. if (m) {
  1517. TUNDEBUG(ifp, "Dropping mbuf\n");
  1518. m_freem(m);
  1519. }
  1520. return (error);
  1521. }
  1522. static int
  1523. tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m,
  1524. struct virtio_net_hdr_mrg_rxbuf *vhdr)
  1525. {
  1526. struct epoch_tracker et;
  1527. struct ether_header *eh;
  1528. struct ifnet *ifp;
  1529. ifp = TUN2IFP(tp);
  1530. /*
  1531. * Only pass a unicast frame to ether_input(), if it would
  1532. * actually have been received by non-virtual hardware.
  1533. */
  1534. if (m->m_len < sizeof(struct ether_header)) {
  1535. m_freem(m);
  1536. return (0);
  1537. }
  1538. eh = mtod(m, struct ether_header *);
  1539. if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
  1540. !ETHER_IS_MULTICAST(eh->ether_dhost) &&
  1541. bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
  1542. m_freem(m);
  1543. return (0);
  1544. }
  1545. if (vhdr != NULL && virtio_net_rx_csum(m, &vhdr->hdr)) {
  1546. m_freem(m);
  1547. return (0);
  1548. }
  1549. /* Pass packet up to parent. */
  1550. CURVNET_SET(ifp->if_vnet);
  1551. NET_EPOCH_ENTER(et);
  1552. (*ifp->if_input)(ifp, m);
  1553. NET_EPOCH_EXIT(et);
  1554. CURVNET_RESTORE();
  1555. /* ibytes are counted in parent */
  1556. if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
  1557. return (0);
  1558. }
  1559. static int
  1560. tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m)
  1561. {
  1562. struct epoch_tracker et;
  1563. struct ifnet *ifp;
  1564. int family, isr;
  1565. ifp = TUN2IFP(tp);
  1566. /* Could be unlocked read? */
  1567. TUN_LOCK(tp);
  1568. if (tp->tun_flags & TUN_IFHEAD) {
  1569. TUN_UNLOCK(tp);
  1570. if (m->m_len < sizeof(family) &&
  1571. (m = m_pullup(m, sizeof(family))) == NULL)
  1572. return (ENOBUFS);
  1573. family = ntohl(*mtod(m, u_int32_t *));
  1574. m_adj(m, sizeof(family));
  1575. } else {
  1576. TUN_UNLOCK(tp);
  1577. family = AF_INET;
  1578. }
  1579. BPF_MTAP2(ifp, &family, sizeof(family), m);
  1580. switch (family) {
  1581. #ifdef INET
  1582. case AF_INET:
  1583. isr = NETISR_IP;
  1584. break;
  1585. #endif
  1586. #ifdef INET6
  1587. case AF_INET6:
  1588. isr = NETISR_IPV6;
  1589. break;
  1590. #endif
  1591. default:
  1592. m_freem(m);
  1593. return (EAFNOSUPPORT);
  1594. }
  1595. random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
  1596. if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
  1597. if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
  1598. CURVNET_SET(ifp->if_vnet);
  1599. M_SETFIB(m, ifp->if_fib);
  1600. NET_EPOCH_ENTER(et);
  1601. netisr_dispatch(isr, m);
  1602. NET_EPOCH_EXIT(et);
  1603. CURVNET_RESTORE();
  1604. return (0);
  1605. }
  1606. /*
  1607. * the cdevsw write interface - an atomic write is a packet - or else!
  1608. */
  1609. static int
  1610. tunwrite(struct cdev *dev, struct uio *uio, int flag)
  1611. {
  1612. struct virtio_net_hdr_mrg_rxbuf vhdr;
  1613. struct tuntap_softc *tp;
  1614. struct ifnet *ifp;
  1615. struct mbuf *m;
  1616. uint32_t mru;
  1617. int align, vhdrlen, error;
  1618. bool l2tun;
  1619. tp = dev->si_drv1;
  1620. ifp = TUN2IFP(tp);
  1621. TUNDEBUG(ifp, "tunwrite\n");
  1622. if ((ifp->if_flags & IFF_UP) != IFF_UP)
  1623. /* ignore silently */
  1624. return (0);
  1625. if (uio->uio_resid == 0)
  1626. return (0);
  1627. l2tun = (tp->tun_flags & TUN_L2) != 0;
  1628. mru = l2tun ? TAPMRU : TUNMRU;
  1629. vhdrlen = tp->tun_vhdrlen;
  1630. align = 0;
  1631. if (l2tun) {
  1632. align = ETHER_ALIGN;
  1633. mru += vhdrlen;
  1634. } else if ((tp->tun_flags & TUN_IFHEAD) != 0)
  1635. mru += sizeof(uint32_t); /* family */
  1636. if (uio->uio_resid < 0 || uio->uio_resid > mru) {
  1637. TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
  1638. return (EIO);
  1639. }
  1640. if (vhdrlen > 0) {
  1641. error = uiomove(&vhdr, vhdrlen, uio);
  1642. if (error != 0)
  1643. return (error);
  1644. TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, "
  1645. "gs %u, cs %u, co %u\n", vhdr.hdr.flags,
  1646. vhdr.hdr.gso_type, vhdr.hdr.hdr_len,
  1647. vhdr.hdr.gso_size, vhdr.hdr.csum_start,
  1648. vhdr.hdr.csum_offset);
  1649. }
  1650. if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) {
  1651. if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
  1652. return (ENOBUFS);
  1653. }
  1654. m->m_pkthdr.rcvif = ifp;
  1655. #ifdef MAC
  1656. mac_ifnet_create_mbuf(ifp, m);
  1657. #endif
  1658. if (l2tun)
  1659. return (tunwrite_l2(tp, m, vhdrlen > 0 ? &vhdr : NULL));
  1660. return (tunwrite_l3(tp, m));
  1661. }
  1662. /*
  1663. * tunpoll - the poll interface, this is only useful on reads
  1664. * really. The write detect always returns true, write never blocks
  1665. * anyway, it either accepts the packet or drops it.
  1666. */
  1667. static int
  1668. tunpoll(struct cdev *dev, int events, struct thread *td)
  1669. {
  1670. struct tuntap_softc *tp = dev->si_drv1;
  1671. struct ifnet *ifp = TUN2IFP(tp);
  1672. int revents = 0;
  1673. TUNDEBUG(ifp, "tunpoll\n");
  1674. if (events & (POLLIN | POLLRDNORM)) {
  1675. IFQ_LOCK(&ifp->if_snd);
  1676. if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
  1677. TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
  1678. revents |= events & (POLLIN | POLLRDNORM);
  1679. } else {
  1680. TUNDEBUG(ifp, "tunpoll waiting\n");
  1681. selrecord(td, &tp->tun_rsel);
  1682. }
  1683. IFQ_UNLOCK(&ifp->if_snd);
  1684. }
  1685. revents |= events & (POLLOUT | POLLWRNORM);
  1686. return (revents);
  1687. }
  1688. /*
  1689. * tunkqfilter - support for the kevent() system call.
  1690. */
  1691. static int
  1692. tunkqfilter(struct cdev *dev, struct knote *kn)
  1693. {
  1694. struct tuntap_softc *tp = dev->si_drv1;
  1695. struct ifnet *ifp = TUN2IFP(tp);
  1696. switch(kn->kn_filter) {
  1697. case EVFILT_READ:
  1698. TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
  1699. ifp->if_xname, dev2unit(dev));
  1700. kn->kn_fop = &tun_read_filterops;
  1701. break;
  1702. case EVFILT_WRITE:
  1703. TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
  1704. ifp->if_xname, dev2unit(dev));
  1705. kn->kn_fop = &tun_write_filterops;
  1706. break;
  1707. default:
  1708. TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
  1709. ifp->if_xname, dev2unit(dev));
  1710. return(EINVAL);
  1711. }
  1712. kn->kn_hook = tp;
  1713. knlist_add(&tp->tun_rsel.si_note, kn, 0);
  1714. return (0);
  1715. }
  1716. /*
  1717. * Return true of there is data in the interface queue.
  1718. */
  1719. static int
  1720. tunkqread(struct knote *kn, long hint)
  1721. {
  1722. int ret;
  1723. struct tuntap_softc *tp = kn->kn_hook;
  1724. struct cdev *dev = tp->tun_dev;
  1725. struct ifnet *ifp = TUN2IFP(tp);
  1726. if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
  1727. TUNDEBUG(ifp,
  1728. "%s have data in the queue. Len = %d, minor = %#x\n",
  1729. ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
  1730. ret = 1;
  1731. } else {
  1732. TUNDEBUG(ifp,
  1733. "%s waiting for data, minor = %#x\n", ifp->if_xname,
  1734. dev2unit(dev));
  1735. ret = 0;
  1736. }
  1737. return (ret);
  1738. }
  1739. /*
  1740. * Always can write, always return MTU in kn->data.
  1741. */
  1742. static int
  1743. tunkqwrite(struct knote *kn, long hint)
  1744. {
  1745. struct tuntap_softc *tp = kn->kn_hook;
  1746. struct ifnet *ifp = TUN2IFP(tp);
  1747. kn->kn_data = ifp->if_mtu;
  1748. return (1);
  1749. }
  1750. static void
  1751. tunkqdetach(struct knote *kn)
  1752. {
  1753. struct tuntap_softc *tp = kn->kn_hook;
  1754. knlist_remove(&tp->tun_rsel.si_note, kn, 0);
  1755. }