rtsock.c 67 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715
  1. /*-
  2. * SPDX-License-Identifier: BSD-3-Clause
  3. *
  4. * Copyright (c) 1988, 1991, 1993
  5. * The Regents of the University of California. All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. * 3. Neither the name of the University nor the names of its contributors
  16. * may be used to endorse or promote products derived from this software
  17. * without specific prior written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  20. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  23. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29. * SUCH DAMAGE.
  30. */
  31. #include "opt_ddb.h"
  32. #include "opt_route.h"
  33. #include "opt_inet.h"
  34. #include "opt_inet6.h"
  35. #include <sys/param.h>
  36. #include <sys/jail.h>
  37. #include <sys/kernel.h>
  38. #include <sys/eventhandler.h>
  39. #include <sys/domain.h>
  40. #include <sys/lock.h>
  41. #include <sys/malloc.h>
  42. #include <sys/mbuf.h>
  43. #include <sys/priv.h>
  44. #include <sys/proc.h>
  45. #include <sys/protosw.h>
  46. #include <sys/rmlock.h>
  47. #include <sys/rwlock.h>
  48. #include <sys/signalvar.h>
  49. #include <sys/socket.h>
  50. #include <sys/socketvar.h>
  51. #include <sys/sysctl.h>
  52. #include <sys/systm.h>
  53. #include <net/if.h>
  54. #include <net/if_var.h>
  55. #include <net/if_private.h>
  56. #include <net/if_dl.h>
  57. #include <net/if_llatbl.h>
  58. #include <net/if_types.h>
  59. #include <net/netisr.h>
  60. #include <net/route.h>
  61. #include <net/route/route_ctl.h>
  62. #include <net/route/route_var.h>
  63. #include <net/vnet.h>
  64. #include <netinet/in.h>
  65. #include <netinet/if_ether.h>
  66. #include <netinet/ip_carp.h>
  67. #ifdef INET6
  68. #include <netinet6/in6_var.h>
  69. #include <netinet6/ip6_var.h>
  70. #include <netinet6/scope6_var.h>
  71. #endif
  72. #include <net/route/nhop.h>
  73. #define DEBUG_MOD_NAME rtsock
  74. #define DEBUG_MAX_LEVEL LOG_DEBUG
  75. #include <net/route/route_debug.h>
  76. _DECLARE_DEBUG(LOG_INFO);
  77. #ifdef COMPAT_FREEBSD32
  78. #include <sys/mount.h>
  79. #include <compat/freebsd32/freebsd32.h>
  80. struct if_msghdr32 {
  81. uint16_t ifm_msglen;
  82. uint8_t ifm_version;
  83. uint8_t ifm_type;
  84. int32_t ifm_addrs;
  85. int32_t ifm_flags;
  86. uint16_t ifm_index;
  87. uint16_t _ifm_spare1;
  88. struct if_data ifm_data;
  89. };
  90. struct if_msghdrl32 {
  91. uint16_t ifm_msglen;
  92. uint8_t ifm_version;
  93. uint8_t ifm_type;
  94. int32_t ifm_addrs;
  95. int32_t ifm_flags;
  96. uint16_t ifm_index;
  97. uint16_t _ifm_spare1;
  98. uint16_t ifm_len;
  99. uint16_t ifm_data_off;
  100. uint32_t _ifm_spare2;
  101. struct if_data ifm_data;
  102. };
  103. struct ifa_msghdrl32 {
  104. uint16_t ifam_msglen;
  105. uint8_t ifam_version;
  106. uint8_t ifam_type;
  107. int32_t ifam_addrs;
  108. int32_t ifam_flags;
  109. uint16_t ifam_index;
  110. uint16_t _ifam_spare1;
  111. uint16_t ifam_len;
  112. uint16_t ifam_data_off;
  113. int32_t ifam_metric;
  114. struct if_data ifam_data;
  115. };
  116. #define SA_SIZE32(sa) \
  117. ( (((struct sockaddr *)(sa))->sa_len == 0) ? \
  118. sizeof(int) : \
  119. 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(int) - 1) ) )
  120. #endif /* COMPAT_FREEBSD32 */
  121. struct linear_buffer {
  122. char *base; /* Base allocated memory pointer */
  123. uint32_t offset; /* Currently used offset */
  124. uint32_t size; /* Total buffer size */
  125. };
  126. #define SCRATCH_BUFFER_SIZE 1024
  127. #define RTS_PID_LOG(_l, _fmt, ...) \
  128. RT_LOG_##_l(_l, "PID %d: " _fmt, curproc ? curproc->p_pid : 0, \
  129. ## __VA_ARGS__)
  130. MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
  131. /* NB: these are not modified */
  132. static struct sockaddr route_src = { 2, PF_ROUTE, };
  133. static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
  134. /* These are external hooks for CARP. */
  135. int (*carp_get_vhid_p)(struct ifaddr *);
  136. /*
  137. * Used by rtsock callback code to decide whether to filter the update
  138. * notification to a socket bound to a particular FIB.
  139. */
  140. #define RTS_FILTER_FIB M_PROTO8
  141. /*
  142. * Used to store address family of the notification.
  143. */
  144. #define m_rtsock_family m_pkthdr.PH_loc.eight[0]
  145. struct rcb {
  146. LIST_ENTRY(rcb) list;
  147. struct socket *rcb_socket;
  148. sa_family_t rcb_family;
  149. };
  150. typedef struct {
  151. LIST_HEAD(, rcb) cblist;
  152. int ip_count; /* attached w/ AF_INET */
  153. int ip6_count; /* attached w/ AF_INET6 */
  154. int any_count; /* total attached */
  155. } route_cb_t;
  156. VNET_DEFINE_STATIC(route_cb_t, route_cb);
  157. #define V_route_cb VNET(route_cb)
  158. struct mtx rtsock_mtx;
  159. MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
  160. #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx)
  161. #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
  162. #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)
  163. SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
  164. struct walkarg {
  165. int family;
  166. int w_tmemsize;
  167. int w_op, w_arg;
  168. caddr_t w_tmem;
  169. struct sysctl_req *w_req;
  170. struct sockaddr *dst;
  171. struct sockaddr *mask;
  172. };
  173. static void rts_input(struct mbuf *m);
  174. static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
  175. static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
  176. struct walkarg *w, int *plen);
  177. static int rt_xaddrs(caddr_t cp, caddr_t cplim,
  178. struct rt_addrinfo *rtinfo);
  179. static int cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb);
  180. static int sysctl_dumpentry(struct rtentry *rt, void *vw);
  181. static int sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh,
  182. uint32_t weight, struct walkarg *w);
  183. static int sysctl_iflist(int af, struct walkarg *w);
  184. static int sysctl_ifmalist(int af, struct walkarg *w);
  185. static void rt_getmetrics(const struct rtentry *rt,
  186. const struct nhop_object *nh, struct rt_metrics *out);
  187. static void rt_dispatch(struct mbuf *, sa_family_t);
  188. static void rt_ifannouncemsg(struct ifnet *ifp, int what);
  189. static int handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
  190. struct rt_msghdr *rtm, struct rib_cmd_info *rc);
  191. static int update_rtm_from_rc(struct rt_addrinfo *info,
  192. struct rt_msghdr **prtm, int alloc_len,
  193. struct rib_cmd_info *rc, struct nhop_object *nh);
  194. static void send_rtm_reply(struct socket *so, struct rt_msghdr *rtm,
  195. struct mbuf *m, sa_family_t saf, u_int fibnum,
  196. int rtm_errno);
  197. static void rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc);
  198. static void rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask);
  199. static struct netisr_handler rtsock_nh = {
  200. .nh_name = "rtsock",
  201. .nh_handler = rts_input,
  202. .nh_proto = NETISR_ROUTE,
  203. .nh_policy = NETISR_POLICY_SOURCE,
  204. };
  205. static int
  206. sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
  207. {
  208. int error, qlimit;
  209. netisr_getqlimit(&rtsock_nh, &qlimit);
  210. error = sysctl_handle_int(oidp, &qlimit, 0, req);
  211. if (error || !req->newptr)
  212. return (error);
  213. if (qlimit < 1)
  214. return (EINVAL);
  215. return (netisr_setqlimit(&rtsock_nh, qlimit));
  216. }
  217. SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen,
  218. CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
  219. 0, 0, sysctl_route_netisr_maxqlen, "I",
  220. "maximum routing socket dispatch queue length");
  221. static void
  222. vnet_rts_init(void)
  223. {
  224. int tmp;
  225. if (IS_DEFAULT_VNET(curvnet)) {
  226. if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
  227. rtsock_nh.nh_qlimit = tmp;
  228. netisr_register(&rtsock_nh);
  229. }
  230. #ifdef VIMAGE
  231. else
  232. netisr_register_vnet(&rtsock_nh);
  233. #endif
  234. }
  235. VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
  236. vnet_rts_init, 0);
  237. #ifdef VIMAGE
  238. static void
  239. vnet_rts_uninit(void)
  240. {
  241. netisr_unregister_vnet(&rtsock_nh);
  242. }
  243. VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
  244. vnet_rts_uninit, 0);
  245. #endif
  246. static void
  247. report_route_event(const struct rib_cmd_info *rc, void *_cbdata)
  248. {
  249. uint32_t fibnum = (uint32_t)(uintptr_t)_cbdata;
  250. struct nhop_object *nh;
  251. nh = rc->rc_cmd == RTM_DELETE ? rc->rc_nh_old : rc->rc_nh_new;
  252. rt_routemsg(rc->rc_cmd, rc->rc_rt, nh, fibnum);
  253. }
  254. static void
  255. rts_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
  256. {
  257. #ifdef ROUTE_MPATH
  258. if ((rc->rc_nh_new && NH_IS_NHGRP(rc->rc_nh_new)) ||
  259. (rc->rc_nh_old && NH_IS_NHGRP(rc->rc_nh_old))) {
  260. rib_decompose_notification(rc, report_route_event,
  261. (void *)(uintptr_t)fibnum);
  262. } else
  263. #endif
  264. report_route_event(rc, (void *)(uintptr_t)fibnum);
  265. }
  266. static struct rtbridge rtsbridge = {
  267. .route_f = rts_handle_route_event,
  268. .ifmsg_f = rtsock_ifmsg,
  269. };
  270. static struct rtbridge *rtsbridge_orig_p;
  271. static void
  272. rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc)
  273. {
  274. netlink_callback_p->route_f(fibnum, rc);
  275. }
  276. static void
  277. rtsock_init(void)
  278. {
  279. rtsbridge_orig_p = rtsock_callback_p;
  280. rtsock_callback_p = &rtsbridge;
  281. }
  282. SYSINIT(rtsock_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtsock_init, NULL);
  283. static void
  284. rts_handle_ifnet_arrival(void *arg __unused, struct ifnet *ifp)
  285. {
  286. rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
  287. }
  288. EVENTHANDLER_DEFINE(ifnet_arrival_event, rts_handle_ifnet_arrival, NULL, 0);
  289. static void
  290. rts_handle_ifnet_departure(void *arg __unused, struct ifnet *ifp)
  291. {
  292. rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
  293. }
  294. EVENTHANDLER_DEFINE(ifnet_departure_event, rts_handle_ifnet_departure, NULL, 0);
  295. static void
  296. rts_append_data(struct socket *so, struct mbuf *m)
  297. {
  298. if (sbappendaddr(&so->so_rcv, &route_src, m, NULL) == 0) {
  299. soroverflow(so);
  300. m_freem(m);
  301. } else
  302. sorwakeup(so);
  303. }
  304. static void
  305. rts_input(struct mbuf *m)
  306. {
  307. struct rcb *rcb;
  308. struct socket *last;
  309. last = NULL;
  310. RTSOCK_LOCK();
  311. LIST_FOREACH(rcb, &V_route_cb.cblist, list) {
  312. if (rcb->rcb_family != AF_UNSPEC &&
  313. rcb->rcb_family != m->m_rtsock_family)
  314. continue;
  315. if ((m->m_flags & RTS_FILTER_FIB) &&
  316. M_GETFIB(m) != rcb->rcb_socket->so_fibnum)
  317. continue;
  318. if (last != NULL) {
  319. struct mbuf *n;
  320. n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
  321. if (n != NULL)
  322. rts_append_data(last, n);
  323. }
  324. last = rcb->rcb_socket;
  325. }
  326. if (last != NULL)
  327. rts_append_data(last, m);
  328. else
  329. m_freem(m);
  330. RTSOCK_UNLOCK();
  331. }
  332. static void
  333. rts_close(struct socket *so)
  334. {
  335. soisdisconnected(so);
  336. }
  337. static SYSCTL_NODE(_net, OID_AUTO, rtsock, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  338. "Routing socket infrastructure");
  339. static u_long rts_sendspace = 8192;
  340. SYSCTL_ULONG(_net_rtsock, OID_AUTO, sendspace, CTLFLAG_RW, &rts_sendspace, 0,
  341. "Default routing socket send space");
  342. static u_long rts_recvspace = 8192;
  343. SYSCTL_ULONG(_net_rtsock, OID_AUTO, recvspace, CTLFLAG_RW, &rts_recvspace, 0,
  344. "Default routing socket receive space");
  345. static int
  346. rts_attach(struct socket *so, int proto, struct thread *td)
  347. {
  348. struct rcb *rcb;
  349. int error;
  350. error = soreserve(so, rts_sendspace, rts_recvspace);
  351. if (error)
  352. return (error);
  353. rcb = malloc(sizeof(*rcb), M_PCB, M_WAITOK);
  354. rcb->rcb_socket = so;
  355. rcb->rcb_family = proto;
  356. so->so_pcb = rcb;
  357. so->so_fibnum = td->td_proc->p_fibnum;
  358. so->so_options |= SO_USELOOPBACK;
  359. RTSOCK_LOCK();
  360. LIST_INSERT_HEAD(&V_route_cb.cblist, rcb, list);
  361. switch (proto) {
  362. case AF_INET:
  363. V_route_cb.ip_count++;
  364. break;
  365. case AF_INET6:
  366. V_route_cb.ip6_count++;
  367. break;
  368. }
  369. V_route_cb.any_count++;
  370. RTSOCK_UNLOCK();
  371. soisconnected(so);
  372. return (0);
  373. }
  374. static void
  375. rts_detach(struct socket *so)
  376. {
  377. struct rcb *rcb = so->so_pcb;
  378. RTSOCK_LOCK();
  379. LIST_REMOVE(rcb, list);
  380. switch(rcb->rcb_family) {
  381. case AF_INET:
  382. V_route_cb.ip_count--;
  383. break;
  384. case AF_INET6:
  385. V_route_cb.ip6_count--;
  386. break;
  387. }
  388. V_route_cb.any_count--;
  389. RTSOCK_UNLOCK();
  390. free(rcb, M_PCB);
  391. so->so_pcb = NULL;
  392. }
  393. static int
  394. rts_disconnect(struct socket *so)
  395. {
  396. return (ENOTCONN);
  397. }
  398. static int
  399. rts_shutdown(struct socket *so, enum shutdown_how how)
  400. {
  401. /*
  402. * Note: route socket marks itself as connected through its lifetime.
  403. */
  404. switch (how) {
  405. case SHUT_RD:
  406. sorflush(so);
  407. break;
  408. case SHUT_RDWR:
  409. sorflush(so);
  410. /* FALLTHROUGH */
  411. case SHUT_WR:
  412. socantsendmore(so);
  413. }
  414. return (0);
  415. }
  416. #ifndef _SOCKADDR_UNION_DEFINED
  417. #define _SOCKADDR_UNION_DEFINED
  418. /*
  419. * The union of all possible address formats we handle.
  420. */
  421. union sockaddr_union {
  422. struct sockaddr sa;
  423. struct sockaddr_in sin;
  424. struct sockaddr_in6 sin6;
  425. };
  426. #endif /* _SOCKADDR_UNION_DEFINED */
  427. static int
  428. rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
  429. struct nhop_object *nh, union sockaddr_union *saun, struct ucred *cred)
  430. {
  431. #if defined(INET) || defined(INET6)
  432. struct epoch_tracker et;
  433. #endif
  434. /* First, see if the returned address is part of the jail. */
  435. if (prison_if(cred, nh->nh_ifa->ifa_addr) == 0) {
  436. info->rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
  437. return (0);
  438. }
  439. switch (info->rti_info[RTAX_DST]->sa_family) {
  440. #ifdef INET
  441. case AF_INET:
  442. {
  443. struct in_addr ia;
  444. struct ifaddr *ifa;
  445. int found;
  446. found = 0;
  447. /*
  448. * Try to find an address on the given outgoing interface
  449. * that belongs to the jail.
  450. */
  451. NET_EPOCH_ENTER(et);
  452. CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  453. struct sockaddr *sa;
  454. sa = ifa->ifa_addr;
  455. if (sa->sa_family != AF_INET)
  456. continue;
  457. ia = ((struct sockaddr_in *)sa)->sin_addr;
  458. if (prison_check_ip4(cred, &ia) == 0) {
  459. found = 1;
  460. break;
  461. }
  462. }
  463. NET_EPOCH_EXIT(et);
  464. if (!found) {
  465. /*
  466. * As a last resort return the 'default' jail address.
  467. */
  468. ia = ((struct sockaddr_in *)nh->nh_ifa->ifa_addr)->
  469. sin_addr;
  470. if (prison_get_ip4(cred, &ia) != 0)
  471. return (ESRCH);
  472. }
  473. bzero(&saun->sin, sizeof(struct sockaddr_in));
  474. saun->sin.sin_len = sizeof(struct sockaddr_in);
  475. saun->sin.sin_family = AF_INET;
  476. saun->sin.sin_addr.s_addr = ia.s_addr;
  477. info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
  478. break;
  479. }
  480. #endif
  481. #ifdef INET6
  482. case AF_INET6:
  483. {
  484. struct in6_addr ia6;
  485. struct ifaddr *ifa;
  486. int found;
  487. found = 0;
  488. /*
  489. * Try to find an address on the given outgoing interface
  490. * that belongs to the jail.
  491. */
  492. NET_EPOCH_ENTER(et);
  493. CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  494. struct sockaddr *sa;
  495. sa = ifa->ifa_addr;
  496. if (sa->sa_family != AF_INET6)
  497. continue;
  498. bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
  499. &ia6, sizeof(struct in6_addr));
  500. if (prison_check_ip6(cred, &ia6) == 0) {
  501. found = 1;
  502. break;
  503. }
  504. }
  505. NET_EPOCH_EXIT(et);
  506. if (!found) {
  507. /*
  508. * As a last resort return the 'default' jail address.
  509. */
  510. ia6 = ((struct sockaddr_in6 *)nh->nh_ifa->ifa_addr)->
  511. sin6_addr;
  512. if (prison_get_ip6(cred, &ia6) != 0)
  513. return (ESRCH);
  514. }
  515. bzero(&saun->sin6, sizeof(struct sockaddr_in6));
  516. saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
  517. saun->sin6.sin6_family = AF_INET6;
  518. bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
  519. if (sa6_recoverscope(&saun->sin6) != 0)
  520. return (ESRCH);
  521. info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
  522. break;
  523. }
  524. #endif
  525. default:
  526. return (ESRCH);
  527. }
  528. return (0);
  529. }
  530. static int
  531. fill_blackholeinfo(struct rt_addrinfo *info, union sockaddr_union *saun)
  532. {
  533. struct ifaddr *ifa;
  534. sa_family_t saf;
  535. if (V_loif == NULL) {
  536. RTS_PID_LOG(LOG_INFO, "Unable to add blackhole/reject nhop without loopback");
  537. return (ENOTSUP);
  538. }
  539. info->rti_ifp = V_loif;
  540. saf = info->rti_info[RTAX_DST]->sa_family;
  541. CK_STAILQ_FOREACH(ifa, &info->rti_ifp->if_addrhead, ifa_link) {
  542. if (ifa->ifa_addr->sa_family == saf) {
  543. info->rti_ifa = ifa;
  544. break;
  545. }
  546. }
  547. if (info->rti_ifa == NULL) {
  548. RTS_PID_LOG(LOG_INFO, "Unable to find ifa for blackhole/reject nhop");
  549. return (ENOTSUP);
  550. }
  551. bzero(saun, sizeof(union sockaddr_union));
  552. switch (saf) {
  553. #ifdef INET
  554. case AF_INET:
  555. saun->sin.sin_family = AF_INET;
  556. saun->sin.sin_len = sizeof(struct sockaddr_in);
  557. saun->sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
  558. break;
  559. #endif
  560. #ifdef INET6
  561. case AF_INET6:
  562. saun->sin6.sin6_family = AF_INET6;
  563. saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
  564. saun->sin6.sin6_addr = in6addr_loopback;
  565. break;
  566. #endif
  567. default:
  568. RTS_PID_LOG(LOG_INFO, "unsupported family: %d", saf);
  569. return (ENOTSUP);
  570. }
  571. info->rti_info[RTAX_GATEWAY] = &saun->sa;
  572. info->rti_flags |= RTF_GATEWAY;
  573. return (0);
  574. }
  575. /*
  576. * Fills in @info based on userland-provided @rtm message.
  577. *
  578. * Returns 0 on success.
  579. */
  580. static int
  581. fill_addrinfo(struct rt_msghdr *rtm, int len, struct linear_buffer *lb, u_int fibnum,
  582. struct rt_addrinfo *info)
  583. {
  584. int error;
  585. rtm->rtm_pid = curproc->p_pid;
  586. info->rti_addrs = rtm->rtm_addrs;
  587. info->rti_mflags = rtm->rtm_inits;
  588. info->rti_rmx = &rtm->rtm_rmx;
  589. /*
  590. * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
  591. * link-local address because rtrequest requires addresses with
  592. * embedded scope id.
  593. */
  594. if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, info))
  595. return (EINVAL);
  596. info->rti_flags = rtm->rtm_flags;
  597. error = cleanup_xaddrs(info, lb);
  598. if (error != 0)
  599. return (error);
  600. /*
  601. * Verify that the caller has the appropriate privilege; RTM_GET
  602. * is the only operation the non-superuser is allowed.
  603. */
  604. if (rtm->rtm_type != RTM_GET) {
  605. error = priv_check(curthread, PRIV_NET_ROUTE);
  606. if (error != 0)
  607. return (error);
  608. }
  609. /*
  610. * The given gateway address may be an interface address.
  611. * For example, issuing a "route change" command on a route
  612. * entry that was created from a tunnel, and the gateway
  613. * address given is the local end point. In this case the
  614. * RTF_GATEWAY flag must be cleared or the destination will
  615. * not be reachable even though there is no error message.
  616. */
  617. if (info->rti_info[RTAX_GATEWAY] != NULL &&
  618. info->rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
  619. struct nhop_object *nh;
  620. /*
  621. * A host route through the loopback interface is
  622. * installed for each interface address. In pre 8.0
  623. * releases the interface address of a PPP link type
  624. * is not reachable locally. This behavior is fixed as
  625. * part of the new L2/L3 redesign and rewrite work. The
  626. * signature of this interface address route is the
  627. * AF_LINK sa_family type of the gateway, and the
  628. * rt_ifp has the IFF_LOOPBACK flag set.
  629. */
  630. nh = rib_lookup(fibnum, info->rti_info[RTAX_GATEWAY], NHR_NONE, 0);
  631. if (nh != NULL && nh->gw_sa.sa_family == AF_LINK &&
  632. nh->nh_ifp->if_flags & IFF_LOOPBACK) {
  633. info->rti_flags &= ~RTF_GATEWAY;
  634. info->rti_flags |= RTF_GWFLAG_COMPAT;
  635. }
  636. }
  637. return (0);
  638. }
  639. static struct nhop_object *
  640. select_nhop(struct nhop_object *nh, const struct sockaddr *gw)
  641. {
  642. if (!NH_IS_NHGRP(nh))
  643. return (nh);
  644. #ifdef ROUTE_MPATH
  645. const struct weightened_nhop *wn;
  646. uint32_t num_nhops;
  647. wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
  648. if (gw == NULL)
  649. return (wn[0].nh);
  650. for (int i = 0; i < num_nhops; i++) {
  651. if (match_nhop_gw(wn[i].nh, gw))
  652. return (wn[i].nh);
  653. }
  654. #endif
  655. return (NULL);
  656. }
  657. /*
  658. * Handles RTM_GET message from routing socket, returning matching rt.
  659. *
  660. * Returns:
  661. * 0 on success, with locked and referenced matching rt in @rt_nrt
  662. * errno of failure
  663. */
  664. static int
  665. handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
  666. struct rt_msghdr *rtm, struct rib_cmd_info *rc)
  667. {
  668. RIB_RLOCK_TRACKER;
  669. struct rib_head *rnh;
  670. struct nhop_object *nh;
  671. sa_family_t saf;
  672. saf = info->rti_info[RTAX_DST]->sa_family;
  673. rnh = rt_tables_get_rnh(fibnum, saf);
  674. if (rnh == NULL)
  675. return (EAFNOSUPPORT);
  676. RIB_RLOCK(rnh);
  677. /*
  678. * By (implicit) convention host route (one without netmask)
  679. * means longest-prefix-match request and the route with netmask
  680. * means exact-match lookup.
  681. * As cleanup_xaddrs() cleans up info flags&addrs for the /32,/128
  682. * prefixes, use original data to check for the netmask presence.
  683. */
  684. if ((rtm->rtm_addrs & RTA_NETMASK) == 0) {
  685. /*
  686. * Provide longest prefix match for
  687. * address lookup (no mask).
  688. * 'route -n get addr'
  689. */
  690. rc->rc_rt = (struct rtentry *) rnh->rnh_matchaddr(
  691. info->rti_info[RTAX_DST], &rnh->head);
  692. } else
  693. rc->rc_rt = (struct rtentry *) rnh->rnh_lookup(
  694. info->rti_info[RTAX_DST],
  695. info->rti_info[RTAX_NETMASK], &rnh->head);
  696. if (rc->rc_rt == NULL) {
  697. RIB_RUNLOCK(rnh);
  698. return (ESRCH);
  699. }
  700. nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
  701. if (nh == NULL) {
  702. RIB_RUNLOCK(rnh);
  703. return (ESRCH);
  704. }
  705. /*
  706. * If performing proxied L2 entry insertion, and
  707. * the actual PPP host entry is found, perform
  708. * another search to retrieve the prefix route of
  709. * the local end point of the PPP link.
  710. * TODO: move this logic to userland.
  711. */
  712. if (rtm->rtm_flags & RTF_ANNOUNCE) {
  713. struct sockaddr_storage laddr;
  714. if (nh->nh_ifp != NULL &&
  715. nh->nh_ifp->if_type == IFT_PROPVIRTUAL) {
  716. struct ifaddr *ifa;
  717. ifa = ifa_ifwithnet(info->rti_info[RTAX_DST], 1,
  718. RT_ALL_FIBS);
  719. if (ifa != NULL)
  720. rt_maskedcopy(ifa->ifa_addr,
  721. (struct sockaddr *)&laddr,
  722. ifa->ifa_netmask);
  723. } else
  724. rt_maskedcopy(nh->nh_ifa->ifa_addr,
  725. (struct sockaddr *)&laddr,
  726. nh->nh_ifa->ifa_netmask);
  727. /*
  728. * refactor rt and no lock operation necessary
  729. */
  730. rc->rc_rt = (struct rtentry *)rnh->rnh_matchaddr(
  731. (struct sockaddr *)&laddr, &rnh->head);
  732. if (rc->rc_rt == NULL) {
  733. RIB_RUNLOCK(rnh);
  734. return (ESRCH);
  735. }
  736. nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
  737. if (nh == NULL) {
  738. RIB_RUNLOCK(rnh);
  739. return (ESRCH);
  740. }
  741. }
  742. rc->rc_nh_new = nh;
  743. rc->rc_nh_weight = rc->rc_rt->rt_weight;
  744. RIB_RUNLOCK(rnh);
  745. return (0);
  746. }
  747. static void
  748. init_sockaddrs_family(int family, struct sockaddr *dst, struct sockaddr *mask)
  749. {
  750. #ifdef INET
  751. if (family == AF_INET) {
  752. struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
  753. struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
  754. bzero(dst4, sizeof(struct sockaddr_in));
  755. bzero(mask4, sizeof(struct sockaddr_in));
  756. dst4->sin_family = AF_INET;
  757. dst4->sin_len = sizeof(struct sockaddr_in);
  758. mask4->sin_family = AF_INET;
  759. mask4->sin_len = sizeof(struct sockaddr_in);
  760. }
  761. #endif
  762. #ifdef INET6
  763. if (family == AF_INET6) {
  764. struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
  765. struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
  766. bzero(dst6, sizeof(struct sockaddr_in6));
  767. bzero(mask6, sizeof(struct sockaddr_in6));
  768. dst6->sin6_family = AF_INET6;
  769. dst6->sin6_len = sizeof(struct sockaddr_in6);
  770. mask6->sin6_family = AF_INET6;
  771. mask6->sin6_len = sizeof(struct sockaddr_in6);
  772. }
  773. #endif
  774. }
  775. static void
  776. export_rtaddrs(const struct rtentry *rt, struct sockaddr *dst,
  777. struct sockaddr *mask)
  778. {
  779. #ifdef INET
  780. if (dst->sa_family == AF_INET) {
  781. struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
  782. struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
  783. uint32_t scopeid = 0;
  784. rt_get_inet_prefix_pmask(rt, &dst4->sin_addr, &mask4->sin_addr,
  785. &scopeid);
  786. return;
  787. }
  788. #endif
  789. #ifdef INET6
  790. if (dst->sa_family == AF_INET6) {
  791. struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
  792. struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
  793. uint32_t scopeid = 0;
  794. rt_get_inet6_prefix_pmask(rt, &dst6->sin6_addr,
  795. &mask6->sin6_addr, &scopeid);
  796. dst6->sin6_scope_id = scopeid;
  797. return;
  798. }
  799. #endif
  800. }
  801. static int
  802. update_rtm_from_info(struct rt_addrinfo *info, struct rt_msghdr **prtm,
  803. int alloc_len)
  804. {
  805. struct rt_msghdr *rtm, *orig_rtm = NULL;
  806. struct walkarg w;
  807. int len;
  808. rtm = *prtm;
  809. /* Check if we need to realloc storage */
  810. rtsock_msg_buffer(rtm->rtm_type, info, NULL, &len);
  811. if (len > alloc_len) {
  812. struct rt_msghdr *tmp_rtm;
  813. tmp_rtm = malloc(len, M_TEMP, M_NOWAIT);
  814. if (tmp_rtm == NULL)
  815. return (ENOBUFS);
  816. bcopy(rtm, tmp_rtm, rtm->rtm_msglen);
  817. orig_rtm = rtm;
  818. rtm = tmp_rtm;
  819. alloc_len = len;
  820. /*
  821. * Delay freeing original rtm as info contains
  822. * data referencing it.
  823. */
  824. }
  825. w.w_tmem = (caddr_t)rtm;
  826. w.w_tmemsize = alloc_len;
  827. rtsock_msg_buffer(rtm->rtm_type, info, &w, &len);
  828. rtm->rtm_addrs = info->rti_addrs;
  829. if (orig_rtm != NULL)
  830. free(orig_rtm, M_TEMP);
  831. *prtm = rtm;
  832. return (0);
  833. }
  834. /*
  835. * Update sockaddrs, flags, etc in @prtm based on @rc data.
  836. * rtm can be reallocated.
  837. *
  838. * Returns 0 on success, along with pointer to (potentially reallocated)
  839. * rtm.
  840. *
  841. */
  842. static int
  843. update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm,
  844. int alloc_len, struct rib_cmd_info *rc, struct nhop_object *nh)
  845. {
  846. union sockaddr_union saun;
  847. struct rt_msghdr *rtm;
  848. struct ifnet *ifp;
  849. int error;
  850. rtm = *prtm;
  851. union sockaddr_union sa_dst, sa_mask;
  852. int family = info->rti_info[RTAX_DST]->sa_family;
  853. init_sockaddrs_family(family, &sa_dst.sa, &sa_mask.sa);
  854. export_rtaddrs(rc->rc_rt, &sa_dst.sa, &sa_mask.sa);
  855. info->rti_info[RTAX_DST] = &sa_dst.sa;
  856. info->rti_info[RTAX_NETMASK] = rt_is_host(rc->rc_rt) ? NULL : &sa_mask.sa;
  857. info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
  858. info->rti_info[RTAX_GENMASK] = 0;
  859. ifp = nh->nh_ifp;
  860. if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
  861. if (ifp) {
  862. info->rti_info[RTAX_IFP] =
  863. ifp->if_addr->ifa_addr;
  864. error = rtm_get_jailed(info, ifp, nh,
  865. &saun, curthread->td_ucred);
  866. if (error != 0)
  867. return (error);
  868. if (ifp->if_flags & IFF_POINTOPOINT)
  869. info->rti_info[RTAX_BRD] =
  870. nh->nh_ifa->ifa_dstaddr;
  871. rtm->rtm_index = ifp->if_index;
  872. } else {
  873. info->rti_info[RTAX_IFP] = NULL;
  874. info->rti_info[RTAX_IFA] = NULL;
  875. }
  876. } else if (ifp != NULL)
  877. rtm->rtm_index = ifp->if_index;
  878. if ((error = update_rtm_from_info(info, prtm, alloc_len)) != 0)
  879. return (error);
  880. rtm = *prtm;
  881. rtm->rtm_flags = rc->rc_rt->rte_flags | nhop_get_rtflags(nh);
  882. if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
  883. rtm->rtm_flags = RTF_GATEWAY |
  884. (rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
  885. rt_getmetrics(rc->rc_rt, nh, &rtm->rtm_rmx);
  886. rtm->rtm_rmx.rmx_weight = rc->rc_nh_weight;
  887. return (0);
  888. }
  889. #ifdef ROUTE_MPATH
  890. static void
  891. save_del_notification(const struct rib_cmd_info *rc, void *_cbdata)
  892. {
  893. struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
  894. if (rc->rc_cmd == RTM_DELETE)
  895. *rc_new = *rc;
  896. }
  897. static void
  898. save_add_notification(const struct rib_cmd_info *rc, void *_cbdata)
  899. {
  900. struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
  901. if (rc->rc_cmd == RTM_ADD)
  902. *rc_new = *rc;
  903. }
  904. #endif
  905. #if defined(INET6) || defined(INET)
  906. static struct sockaddr *
  907. alloc_sockaddr_aligned(struct linear_buffer *lb, int len)
  908. {
  909. len = roundup2(len, sizeof(uint64_t));
  910. if (lb->offset + len > lb->size)
  911. return (NULL);
  912. struct sockaddr *sa = (struct sockaddr *)(lb->base + lb->offset);
  913. lb->offset += len;
  914. return (sa);
  915. }
  916. #endif
  917. static int
  918. rts_send(struct socket *so, int flags, struct mbuf *m,
  919. struct sockaddr *nam, struct mbuf *control, struct thread *td)
  920. {
  921. struct rt_msghdr *rtm = NULL;
  922. struct rt_addrinfo info;
  923. struct epoch_tracker et;
  924. #ifdef INET6
  925. struct sockaddr_storage ss;
  926. struct sockaddr_in6 *sin6;
  927. int i, rti_need_deembed = 0;
  928. #endif
  929. int alloc_len = 0, len, error = 0, fibnum;
  930. sa_family_t saf = AF_UNSPEC;
  931. struct rib_cmd_info rc;
  932. struct nhop_object *nh;
  933. if ((flags & PRUS_OOB) || control != NULL) {
  934. m_freem(m);
  935. if (control != NULL)
  936. m_freem(control);
  937. return (EOPNOTSUPP);
  938. }
  939. fibnum = so->so_fibnum;
  940. #define senderr(e) { error = e; goto flush;}
  941. if (m == NULL || ((m->m_len < sizeof(long)) &&
  942. (m = m_pullup(m, sizeof(long))) == NULL))
  943. return (ENOBUFS);
  944. if ((m->m_flags & M_PKTHDR) == 0)
  945. panic("route_output");
  946. NET_EPOCH_ENTER(et);
  947. len = m->m_pkthdr.len;
  948. if (len < sizeof(*rtm) ||
  949. len != mtod(m, struct rt_msghdr *)->rtm_msglen)
  950. senderr(EINVAL);
  951. /*
  952. * Most of current messages are in range 200-240 bytes,
  953. * minimize possible re-allocation on reply using larger size
  954. * buffer aligned on 1k boundaty.
  955. */
  956. alloc_len = roundup2(len, 1024);
  957. int total_len = alloc_len + SCRATCH_BUFFER_SIZE;
  958. if ((rtm = malloc(total_len, M_TEMP, M_NOWAIT)) == NULL)
  959. senderr(ENOBUFS);
  960. m_copydata(m, 0, len, (caddr_t)rtm);
  961. bzero(&info, sizeof(info));
  962. nh = NULL;
  963. struct linear_buffer lb = {
  964. .base = (char *)rtm + alloc_len,
  965. .size = SCRATCH_BUFFER_SIZE,
  966. };
  967. if (rtm->rtm_version != RTM_VERSION) {
  968. /* Do not touch message since format is unknown */
  969. free(rtm, M_TEMP);
  970. rtm = NULL;
  971. senderr(EPROTONOSUPPORT);
  972. }
  973. /*
  974. * Starting from here, it is possible
  975. * to alter original message and insert
  976. * caller PID and error value.
  977. */
  978. if ((error = fill_addrinfo(rtm, len, &lb, fibnum, &info)) != 0) {
  979. senderr(error);
  980. }
  981. /* fill_addringo() embeds scope into IPv6 addresses */
  982. #ifdef INET6
  983. rti_need_deembed = 1;
  984. #endif
  985. saf = info.rti_info[RTAX_DST]->sa_family;
  986. /* support for new ARP code */
  987. if (rtm->rtm_flags & RTF_LLDATA) {
  988. error = lla_rt_output(rtm, &info);
  989. goto flush;
  990. }
  991. union sockaddr_union gw_saun;
  992. int blackhole_flags = rtm->rtm_flags & (RTF_BLACKHOLE|RTF_REJECT);
  993. if (blackhole_flags != 0) {
  994. if (blackhole_flags != (RTF_BLACKHOLE | RTF_REJECT))
  995. error = fill_blackholeinfo(&info, &gw_saun);
  996. else {
  997. RTS_PID_LOG(LOG_DEBUG, "both BLACKHOLE and REJECT flags specifiied");
  998. error = EINVAL;
  999. }
  1000. if (error != 0)
  1001. senderr(error);
  1002. }
  1003. switch (rtm->rtm_type) {
  1004. case RTM_ADD:
  1005. case RTM_CHANGE:
  1006. if (rtm->rtm_type == RTM_ADD) {
  1007. if (info.rti_info[RTAX_GATEWAY] == NULL) {
  1008. RTS_PID_LOG(LOG_DEBUG, "RTM_ADD w/o gateway");
  1009. senderr(EINVAL);
  1010. }
  1011. }
  1012. error = rib_action(fibnum, rtm->rtm_type, &info, &rc);
  1013. if (error == 0) {
  1014. rtsock_notify_event(fibnum, &rc);
  1015. #ifdef ROUTE_MPATH
  1016. if (NH_IS_NHGRP(rc.rc_nh_new) ||
  1017. (rc.rc_nh_old && NH_IS_NHGRP(rc.rc_nh_old))) {
  1018. struct rib_cmd_info rc_simple = {};
  1019. rib_decompose_notification(&rc,
  1020. save_add_notification, (void *)&rc_simple);
  1021. rc = rc_simple;
  1022. }
  1023. #endif
  1024. /* nh MAY be empty if RTM_CHANGE request is no-op */
  1025. nh = rc.rc_nh_new;
  1026. if (nh != NULL) {
  1027. rtm->rtm_index = nh->nh_ifp->if_index;
  1028. rtm->rtm_flags = rc.rc_rt->rte_flags | nhop_get_rtflags(nh);
  1029. }
  1030. }
  1031. break;
  1032. case RTM_DELETE:
  1033. error = rib_action(fibnum, RTM_DELETE, &info, &rc);
  1034. if (error == 0) {
  1035. rtsock_notify_event(fibnum, &rc);
  1036. #ifdef ROUTE_MPATH
  1037. if (NH_IS_NHGRP(rc.rc_nh_old) ||
  1038. (rc.rc_nh_new && NH_IS_NHGRP(rc.rc_nh_new))) {
  1039. struct rib_cmd_info rc_simple = {};
  1040. rib_decompose_notification(&rc,
  1041. save_del_notification, (void *)&rc_simple);
  1042. rc = rc_simple;
  1043. }
  1044. #endif
  1045. nh = rc.rc_nh_old;
  1046. }
  1047. break;
  1048. case RTM_GET:
  1049. error = handle_rtm_get(&info, fibnum, rtm, &rc);
  1050. if (error != 0)
  1051. senderr(error);
  1052. nh = rc.rc_nh_new;
  1053. if (!rt_is_exportable(rc.rc_rt, curthread->td_ucred))
  1054. senderr(ESRCH);
  1055. break;
  1056. default:
  1057. senderr(EOPNOTSUPP);
  1058. }
  1059. if (error == 0 && nh != NULL) {
  1060. error = update_rtm_from_rc(&info, &rtm, alloc_len, &rc, nh);
  1061. /*
  1062. * Note that some sockaddr pointers may have changed to
  1063. * point to memory outsize @rtm. Some may be pointing
  1064. * to the on-stack variables.
  1065. * Given that, any pointer in @info CANNOT BE USED.
  1066. */
  1067. /*
  1068. * scopeid deembedding has been performed while
  1069. * writing updated rtm in rtsock_msg_buffer().
  1070. * With that in mind, skip deembedding procedure below.
  1071. */
  1072. #ifdef INET6
  1073. rti_need_deembed = 0;
  1074. #endif
  1075. }
  1076. flush:
  1077. NET_EPOCH_EXIT(et);
  1078. #ifdef INET6
  1079. if (rtm != NULL) {
  1080. if (rti_need_deembed) {
  1081. /* sin6_scope_id is recovered before sending rtm. */
  1082. sin6 = (struct sockaddr_in6 *)&ss;
  1083. for (i = 0; i < RTAX_MAX; i++) {
  1084. if (info.rti_info[i] == NULL)
  1085. continue;
  1086. if (info.rti_info[i]->sa_family != AF_INET6)
  1087. continue;
  1088. bcopy(info.rti_info[i], sin6, sizeof(*sin6));
  1089. if (sa6_recoverscope(sin6) == 0)
  1090. bcopy(sin6, info.rti_info[i],
  1091. sizeof(*sin6));
  1092. }
  1093. if (update_rtm_from_info(&info, &rtm, alloc_len) != 0) {
  1094. if (error != 0)
  1095. error = ENOBUFS;
  1096. }
  1097. }
  1098. }
  1099. #endif
  1100. send_rtm_reply(so, rtm, m, saf, fibnum, error);
  1101. return (error);
  1102. }
  1103. /*
  1104. * Sends the prepared reply message in @rtm to all rtsock clients.
  1105. * Frees @m and @rtm.
  1106. *
  1107. */
  1108. static void
  1109. send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m,
  1110. sa_family_t saf, u_int fibnum, int rtm_errno)
  1111. {
  1112. struct rcb *rcb = NULL;
  1113. /*
  1114. * Check to see if we don't want our own messages.
  1115. */
  1116. if ((so->so_options & SO_USELOOPBACK) == 0) {
  1117. if (V_route_cb.any_count <= 1) {
  1118. if (rtm != NULL)
  1119. free(rtm, M_TEMP);
  1120. m_freem(m);
  1121. return;
  1122. }
  1123. /* There is another listener, so construct message */
  1124. rcb = so->so_pcb;
  1125. }
  1126. if (rtm != NULL) {
  1127. if (rtm_errno!= 0)
  1128. rtm->rtm_errno = rtm_errno;
  1129. else
  1130. rtm->rtm_flags |= RTF_DONE;
  1131. m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
  1132. if (m->m_pkthdr.len < rtm->rtm_msglen) {
  1133. m_freem(m);
  1134. m = NULL;
  1135. } else if (m->m_pkthdr.len > rtm->rtm_msglen)
  1136. m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
  1137. free(rtm, M_TEMP);
  1138. }
  1139. if (m != NULL) {
  1140. M_SETFIB(m, fibnum);
  1141. m->m_flags |= RTS_FILTER_FIB;
  1142. if (rcb) {
  1143. /*
  1144. * XXX insure we don't get a copy by
  1145. * invalidating our protocol
  1146. */
  1147. sa_family_t family = rcb->rcb_family;
  1148. rcb->rcb_family = AF_UNSPEC;
  1149. rt_dispatch(m, saf);
  1150. rcb->rcb_family = family;
  1151. } else
  1152. rt_dispatch(m, saf);
  1153. }
  1154. }
  1155. static void
  1156. rt_getmetrics(const struct rtentry *rt, const struct nhop_object *nh,
  1157. struct rt_metrics *out)
  1158. {
  1159. bzero(out, sizeof(*out));
  1160. out->rmx_mtu = nh->nh_mtu;
  1161. out->rmx_weight = rt->rt_weight;
  1162. out->rmx_nhidx = nhop_get_idx(nh);
  1163. /* Kernel -> userland timebase conversion. */
  1164. out->rmx_expire = nhop_get_expire(nh) ?
  1165. nhop_get_expire(nh) - time_uptime + time_second : 0;
  1166. }
  1167. /*
  1168. * Extract the addresses of the passed sockaddrs.
  1169. * Do a little sanity checking so as to avoid bad memory references.
  1170. * This data is derived straight from userland.
  1171. */
  1172. static int
  1173. rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
  1174. {
  1175. struct sockaddr *sa;
  1176. int i;
  1177. for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
  1178. if ((rtinfo->rti_addrs & (1 << i)) == 0)
  1179. continue;
  1180. sa = (struct sockaddr *)cp;
  1181. /*
  1182. * It won't fit.
  1183. */
  1184. if (cp + sa->sa_len > cplim) {
  1185. RTS_PID_LOG(LOG_DEBUG, "sa_len too big for sa type %d", i);
  1186. return (EINVAL);
  1187. }
  1188. /*
  1189. * there are no more.. quit now
  1190. * If there are more bits, they are in error.
  1191. * I've seen this. route(1) can evidently generate these.
  1192. * This causes kernel to core dump.
  1193. * for compatibility, If we see this, point to a safe address.
  1194. */
  1195. if (sa->sa_len == 0) {
  1196. rtinfo->rti_info[i] = &sa_zero;
  1197. return (0); /* should be EINVAL but for compat */
  1198. }
  1199. /* accept it */
  1200. #ifdef INET6
  1201. if (sa->sa_family == AF_INET6)
  1202. sa6_embedscope((struct sockaddr_in6 *)sa,
  1203. V_ip6_use_defzone);
  1204. #endif
  1205. rtinfo->rti_info[i] = sa;
  1206. cp += SA_SIZE(sa);
  1207. }
  1208. return (0);
  1209. }
  1210. #ifdef INET
  1211. static inline void
  1212. fill_sockaddr_inet(struct sockaddr_in *sin, struct in_addr addr)
  1213. {
  1214. const struct sockaddr_in nsin = {
  1215. .sin_family = AF_INET,
  1216. .sin_len = sizeof(struct sockaddr_in),
  1217. .sin_addr = addr,
  1218. };
  1219. *sin = nsin;
  1220. }
  1221. #endif
  1222. #ifdef INET6
  1223. static inline void
  1224. fill_sockaddr_inet6(struct sockaddr_in6 *sin6, const struct in6_addr *addr6,
  1225. uint32_t scopeid)
  1226. {
  1227. const struct sockaddr_in6 nsin6 = {
  1228. .sin6_family = AF_INET6,
  1229. .sin6_len = sizeof(struct sockaddr_in6),
  1230. .sin6_addr = *addr6,
  1231. .sin6_scope_id = scopeid,
  1232. };
  1233. *sin6 = nsin6;
  1234. }
  1235. #endif
  1236. #if defined(INET6) || defined(INET)
  1237. /*
  1238. * Checks if gateway is suitable for lltable operations.
  1239. * Lltable code requires AF_LINK gateway with ifindex
  1240. * and mac address specified.
  1241. * Returns 0 on success.
  1242. */
  1243. static int
  1244. cleanup_xaddrs_lladdr(struct rt_addrinfo *info)
  1245. {
  1246. struct sockaddr_dl *sdl = (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
  1247. if (sdl->sdl_family != AF_LINK)
  1248. return (EINVAL);
  1249. if (sdl->sdl_index == 0) {
  1250. RTS_PID_LOG(LOG_DEBUG, "AF_LINK gateway w/o ifindex");
  1251. return (EINVAL);
  1252. }
  1253. if (offsetof(struct sockaddr_dl, sdl_data) + sdl->sdl_nlen + sdl->sdl_alen > sdl->sdl_len) {
  1254. RTS_PID_LOG(LOG_DEBUG, "AF_LINK gw: sdl_nlen/sdl_alen too large");
  1255. return (EINVAL);
  1256. }
  1257. return (0);
  1258. }
  1259. static int
  1260. cleanup_xaddrs_gateway(struct rt_addrinfo *info, struct linear_buffer *lb)
  1261. {
  1262. struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
  1263. struct sockaddr *sa;
  1264. if (info->rti_flags & RTF_LLDATA)
  1265. return (cleanup_xaddrs_lladdr(info));
  1266. switch (gw->sa_family) {
  1267. #ifdef INET
  1268. case AF_INET:
  1269. {
  1270. struct sockaddr_in *gw_sin = (struct sockaddr_in *)gw;
  1271. /* Ensure reads do not go beyoud SA boundary */
  1272. if (SA_SIZE(gw) < offsetof(struct sockaddr_in, sin_zero)) {
  1273. RTS_PID_LOG(LOG_DEBUG, "gateway sin_len too small: %d",
  1274. gw->sa_len);
  1275. return (EINVAL);
  1276. }
  1277. sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_in));
  1278. if (sa == NULL)
  1279. return (ENOBUFS);
  1280. fill_sockaddr_inet((struct sockaddr_in *)sa, gw_sin->sin_addr);
  1281. info->rti_info[RTAX_GATEWAY] = sa;
  1282. }
  1283. break;
  1284. #endif
  1285. #ifdef INET6
  1286. case AF_INET6:
  1287. {
  1288. struct sockaddr_in6 *gw_sin6 = (struct sockaddr_in6 *)gw;
  1289. if (gw_sin6->sin6_len < sizeof(struct sockaddr_in6)) {
  1290. RTS_PID_LOG(LOG_DEBUG, "gateway sin6_len too small: %d",
  1291. gw->sa_len);
  1292. return (EINVAL);
  1293. }
  1294. fill_sockaddr_inet6(gw_sin6, &gw_sin6->sin6_addr, 0);
  1295. break;
  1296. }
  1297. #endif
  1298. case AF_LINK:
  1299. {
  1300. struct sockaddr_dl *gw_sdl;
  1301. size_t sdl_min_len = offsetof(struct sockaddr_dl, sdl_data);
  1302. gw_sdl = (struct sockaddr_dl *)gw;
  1303. if (gw_sdl->sdl_len < sdl_min_len) {
  1304. RTS_PID_LOG(LOG_DEBUG, "gateway sdl_len too small: %d",
  1305. gw_sdl->sdl_len);
  1306. return (EINVAL);
  1307. }
  1308. sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_dl_short));
  1309. if (sa == NULL)
  1310. return (ENOBUFS);
  1311. const struct sockaddr_dl_short sdl = {
  1312. .sdl_family = AF_LINK,
  1313. .sdl_len = sizeof(struct sockaddr_dl_short),
  1314. .sdl_index = gw_sdl->sdl_index,
  1315. };
  1316. *((struct sockaddr_dl_short *)sa) = sdl;
  1317. info->rti_info[RTAX_GATEWAY] = sa;
  1318. break;
  1319. }
  1320. }
  1321. return (0);
  1322. }
  1323. #endif
  1324. static void
  1325. remove_netmask(struct rt_addrinfo *info)
  1326. {
  1327. info->rti_info[RTAX_NETMASK] = NULL;
  1328. info->rti_flags |= RTF_HOST;
  1329. info->rti_addrs &= ~RTA_NETMASK;
  1330. }
  1331. #ifdef INET
  1332. static int
  1333. cleanup_xaddrs_inet(struct rt_addrinfo *info, struct linear_buffer *lb)
  1334. {
  1335. struct sockaddr_in *dst_sa, *mask_sa;
  1336. const int sa_len = sizeof(struct sockaddr_in);
  1337. struct in_addr dst, mask;
  1338. /* Check & fixup dst/netmask combination first */
  1339. dst_sa = (struct sockaddr_in *)info->rti_info[RTAX_DST];
  1340. mask_sa = (struct sockaddr_in *)info->rti_info[RTAX_NETMASK];
  1341. /* Ensure reads do not go beyound the buffer size */
  1342. if (SA_SIZE(dst_sa) < offsetof(struct sockaddr_in, sin_zero)) {
  1343. RTS_PID_LOG(LOG_DEBUG, "prefix dst sin_len too small: %d",
  1344. dst_sa->sin_len);
  1345. return (EINVAL);
  1346. }
  1347. if ((mask_sa != NULL) && mask_sa->sin_len < sizeof(struct sockaddr_in)) {
  1348. /*
  1349. * Some older routing software encode mask length into the
  1350. * sin_len, thus resulting in "truncated" sockaddr.
  1351. */
  1352. int len = mask_sa->sin_len - offsetof(struct sockaddr_in, sin_addr);
  1353. if (len >= 0) {
  1354. mask.s_addr = 0;
  1355. if (len > sizeof(struct in_addr))
  1356. len = sizeof(struct in_addr);
  1357. memcpy(&mask, &mask_sa->sin_addr, len);
  1358. } else {
  1359. RTS_PID_LOG(LOG_DEBUG, "prefix mask sin_len too small: %d",
  1360. mask_sa->sin_len);
  1361. return (EINVAL);
  1362. }
  1363. } else
  1364. mask.s_addr = mask_sa ? mask_sa->sin_addr.s_addr : INADDR_BROADCAST;
  1365. dst.s_addr = htonl(ntohl(dst_sa->sin_addr.s_addr) & ntohl(mask.s_addr));
  1366. /* Construct new "clean" dst/mask sockaddresses */
  1367. if ((dst_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
  1368. return (ENOBUFS);
  1369. fill_sockaddr_inet(dst_sa, dst);
  1370. info->rti_info[RTAX_DST] = (struct sockaddr *)dst_sa;
  1371. if (mask.s_addr != INADDR_BROADCAST) {
  1372. if ((mask_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
  1373. return (ENOBUFS);
  1374. fill_sockaddr_inet(mask_sa, mask);
  1375. info->rti_info[RTAX_NETMASK] = (struct sockaddr *)mask_sa;
  1376. info->rti_flags &= ~RTF_HOST;
  1377. } else
  1378. remove_netmask(info);
  1379. /* Check gateway */
  1380. if (info->rti_info[RTAX_GATEWAY] != NULL)
  1381. return (cleanup_xaddrs_gateway(info, lb));
  1382. return (0);
  1383. }
  1384. #endif
  1385. #ifdef INET6
  1386. static int
  1387. cleanup_xaddrs_inet6(struct rt_addrinfo *info, struct linear_buffer *lb)
  1388. {
  1389. struct sockaddr *sa;
  1390. struct sockaddr_in6 *dst_sa, *mask_sa;
  1391. struct in6_addr mask, *dst;
  1392. const int sa_len = sizeof(struct sockaddr_in6);
  1393. /* Check & fixup dst/netmask combination first */
  1394. dst_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_DST];
  1395. mask_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_NETMASK];
  1396. if (dst_sa->sin6_len < sizeof(struct sockaddr_in6)) {
  1397. RTS_PID_LOG(LOG_DEBUG, "prefix dst sin6_len too small: %d",
  1398. dst_sa->sin6_len);
  1399. return (EINVAL);
  1400. }
  1401. if (mask_sa && mask_sa->sin6_len < sizeof(struct sockaddr_in6)) {
  1402. /*
  1403. * Some older routing software encode mask length into the
  1404. * sin6_len, thus resulting in "truncated" sockaddr.
  1405. */
  1406. int len = mask_sa->sin6_len - offsetof(struct sockaddr_in6, sin6_addr);
  1407. if (len >= 0) {
  1408. bzero(&mask, sizeof(mask));
  1409. if (len > sizeof(struct in6_addr))
  1410. len = sizeof(struct in6_addr);
  1411. memcpy(&mask, &mask_sa->sin6_addr, len);
  1412. } else {
  1413. RTS_PID_LOG(LOG_DEBUG, "rtsock: prefix mask sin6_len too small: %d",
  1414. mask_sa->sin6_len);
  1415. return (EINVAL);
  1416. }
  1417. } else
  1418. mask = mask_sa ? mask_sa->sin6_addr : in6mask128;
  1419. dst = &dst_sa->sin6_addr;
  1420. IN6_MASK_ADDR(dst, &mask);
  1421. if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
  1422. return (ENOBUFS);
  1423. fill_sockaddr_inet6((struct sockaddr_in6 *)sa, dst, 0);
  1424. info->rti_info[RTAX_DST] = sa;
  1425. if (!IN6_ARE_ADDR_EQUAL(&mask, &in6mask128)) {
  1426. if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
  1427. return (ENOBUFS);
  1428. fill_sockaddr_inet6((struct sockaddr_in6 *)sa, &mask, 0);
  1429. info->rti_info[RTAX_NETMASK] = sa;
  1430. info->rti_flags &= ~RTF_HOST;
  1431. } else
  1432. remove_netmask(info);
  1433. /* Check gateway */
  1434. if (info->rti_info[RTAX_GATEWAY] != NULL)
  1435. return (cleanup_xaddrs_gateway(info, lb));
  1436. return (0);
  1437. }
  1438. #endif
  1439. static int
  1440. cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb)
  1441. {
  1442. int error = EAFNOSUPPORT;
  1443. if (info->rti_info[RTAX_DST] == NULL) {
  1444. RTS_PID_LOG(LOG_DEBUG, "prefix dst is not set");
  1445. return (EINVAL);
  1446. }
  1447. if (info->rti_flags & RTF_LLDATA) {
  1448. /*
  1449. * arp(8)/ndp(8) sends RTA_NETMASK for the associated
  1450. * prefix along with the actual address in RTA_DST.
  1451. * Remove netmask to avoid unnecessary address masking.
  1452. */
  1453. remove_netmask(info);
  1454. }
  1455. switch (info->rti_info[RTAX_DST]->sa_family) {
  1456. #ifdef INET
  1457. case AF_INET:
  1458. error = cleanup_xaddrs_inet(info, lb);
  1459. break;
  1460. #endif
  1461. #ifdef INET6
  1462. case AF_INET6:
  1463. error = cleanup_xaddrs_inet6(info, lb);
  1464. break;
  1465. #endif
  1466. }
  1467. return (error);
  1468. }
  1469. /*
  1470. * Fill in @dmask with valid netmask leaving original @smask
  1471. * intact. Mostly used with radix netmasks.
  1472. */
  1473. struct sockaddr *
  1474. rtsock_fix_netmask(const struct sockaddr *dst, const struct sockaddr *smask,
  1475. struct sockaddr_storage *dmask)
  1476. {
  1477. if (dst == NULL || smask == NULL)
  1478. return (NULL);
  1479. memset(dmask, 0, dst->sa_len);
  1480. memcpy(dmask, smask, smask->sa_len);
  1481. dmask->ss_len = dst->sa_len;
  1482. dmask->ss_family = dst->sa_family;
  1483. return ((struct sockaddr *)dmask);
  1484. }
  1485. /*
  1486. * Writes information related to @rtinfo object to newly-allocated mbuf.
  1487. * Assumes MCLBYTES is enough to construct any message.
  1488. * Used for OS notifications of vaious events (if/ifa announces,etc)
  1489. *
  1490. * Returns allocated mbuf or NULL on failure.
  1491. */
  1492. static struct mbuf *
  1493. rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
  1494. {
  1495. struct sockaddr_storage ss;
  1496. struct rt_msghdr *rtm;
  1497. struct mbuf *m;
  1498. int i;
  1499. struct sockaddr *sa;
  1500. #ifdef INET6
  1501. struct sockaddr_in6 *sin6;
  1502. #endif
  1503. int len, dlen;
  1504. switch (type) {
  1505. case RTM_DELADDR:
  1506. case RTM_NEWADDR:
  1507. len = sizeof(struct ifa_msghdr);
  1508. break;
  1509. case RTM_DELMADDR:
  1510. case RTM_NEWMADDR:
  1511. len = sizeof(struct ifma_msghdr);
  1512. break;
  1513. case RTM_IFINFO:
  1514. len = sizeof(struct if_msghdr);
  1515. break;
  1516. case RTM_IFANNOUNCE:
  1517. case RTM_IEEE80211:
  1518. len = sizeof(struct if_announcemsghdr);
  1519. break;
  1520. default:
  1521. len = sizeof(struct rt_msghdr);
  1522. }
  1523. /* XXXGL: can we use MJUMPAGESIZE cluster here? */
  1524. KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
  1525. if (len > MHLEN)
  1526. m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
  1527. else
  1528. m = m_gethdr(M_NOWAIT, MT_DATA);
  1529. if (m == NULL)
  1530. return (m);
  1531. m->m_pkthdr.len = m->m_len = len;
  1532. rtm = mtod(m, struct rt_msghdr *);
  1533. bzero((caddr_t)rtm, len);
  1534. for (i = 0; i < RTAX_MAX; i++) {
  1535. if ((sa = rtinfo->rti_info[i]) == NULL)
  1536. continue;
  1537. rtinfo->rti_addrs |= (1 << i);
  1538. dlen = SA_SIZE(sa);
  1539. KASSERT(dlen <= sizeof(ss),
  1540. ("%s: sockaddr size overflow", __func__));
  1541. bzero(&ss, sizeof(ss));
  1542. bcopy(sa, &ss, sa->sa_len);
  1543. sa = (struct sockaddr *)&ss;
  1544. #ifdef INET6
  1545. if (sa->sa_family == AF_INET6) {
  1546. sin6 = (struct sockaddr_in6 *)sa;
  1547. (void)sa6_recoverscope(sin6);
  1548. }
  1549. #endif
  1550. m_copyback(m, len, dlen, (caddr_t)sa);
  1551. len += dlen;
  1552. }
  1553. if (m->m_pkthdr.len != len) {
  1554. m_freem(m);
  1555. return (NULL);
  1556. }
  1557. rtm->rtm_msglen = len;
  1558. rtm->rtm_version = RTM_VERSION;
  1559. rtm->rtm_type = type;
  1560. return (m);
  1561. }
  1562. /*
  1563. * Writes information related to @rtinfo object to preallocated buffer.
  1564. * Stores needed size in @plen. If @w is NULL, calculates size without
  1565. * writing.
  1566. * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
  1567. *
  1568. * Returns 0 on success.
  1569. *
  1570. */
  1571. static int
  1572. rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
  1573. {
  1574. struct sockaddr_storage ss;
  1575. int len, buflen = 0, dlen, i;
  1576. caddr_t cp = NULL;
  1577. struct rt_msghdr *rtm = NULL;
  1578. #ifdef INET6
  1579. struct sockaddr_in6 *sin6;
  1580. #endif
  1581. #ifdef COMPAT_FREEBSD32
  1582. bool compat32 = false;
  1583. #endif
  1584. switch (type) {
  1585. case RTM_DELADDR:
  1586. case RTM_NEWADDR:
  1587. if (w != NULL && w->w_op == NET_RT_IFLISTL) {
  1588. #ifdef COMPAT_FREEBSD32
  1589. if (w->w_req->flags & SCTL_MASK32) {
  1590. len = sizeof(struct ifa_msghdrl32);
  1591. compat32 = true;
  1592. } else
  1593. #endif
  1594. len = sizeof(struct ifa_msghdrl);
  1595. } else
  1596. len = sizeof(struct ifa_msghdr);
  1597. break;
  1598. case RTM_IFINFO:
  1599. #ifdef COMPAT_FREEBSD32
  1600. if (w != NULL && w->w_req->flags & SCTL_MASK32) {
  1601. if (w->w_op == NET_RT_IFLISTL)
  1602. len = sizeof(struct if_msghdrl32);
  1603. else
  1604. len = sizeof(struct if_msghdr32);
  1605. compat32 = true;
  1606. break;
  1607. }
  1608. #endif
  1609. if (w != NULL && w->w_op == NET_RT_IFLISTL)
  1610. len = sizeof(struct if_msghdrl);
  1611. else
  1612. len = sizeof(struct if_msghdr);
  1613. break;
  1614. case RTM_NEWMADDR:
  1615. len = sizeof(struct ifma_msghdr);
  1616. break;
  1617. default:
  1618. len = sizeof(struct rt_msghdr);
  1619. }
  1620. if (w != NULL) {
  1621. rtm = (struct rt_msghdr *)w->w_tmem;
  1622. buflen = w->w_tmemsize - len;
  1623. cp = (caddr_t)w->w_tmem + len;
  1624. }
  1625. rtinfo->rti_addrs = 0;
  1626. for (i = 0; i < RTAX_MAX; i++) {
  1627. struct sockaddr *sa;
  1628. if ((sa = rtinfo->rti_info[i]) == NULL)
  1629. continue;
  1630. rtinfo->rti_addrs |= (1 << i);
  1631. #ifdef COMPAT_FREEBSD32
  1632. if (compat32)
  1633. dlen = SA_SIZE32(sa);
  1634. else
  1635. #endif
  1636. dlen = SA_SIZE(sa);
  1637. if (cp != NULL && buflen >= dlen) {
  1638. KASSERT(dlen <= sizeof(ss),
  1639. ("%s: sockaddr size overflow", __func__));
  1640. bzero(&ss, sizeof(ss));
  1641. bcopy(sa, &ss, sa->sa_len);
  1642. sa = (struct sockaddr *)&ss;
  1643. #ifdef INET6
  1644. if (sa->sa_family == AF_INET6) {
  1645. sin6 = (struct sockaddr_in6 *)sa;
  1646. (void)sa6_recoverscope(sin6);
  1647. }
  1648. #endif
  1649. bcopy((caddr_t)sa, cp, (unsigned)dlen);
  1650. cp += dlen;
  1651. buflen -= dlen;
  1652. } else if (cp != NULL) {
  1653. /*
  1654. * Buffer too small. Count needed size
  1655. * and return with error.
  1656. */
  1657. cp = NULL;
  1658. }
  1659. len += dlen;
  1660. }
  1661. if (cp != NULL) {
  1662. dlen = ALIGN(len) - len;
  1663. if (buflen < dlen)
  1664. cp = NULL;
  1665. else {
  1666. bzero(cp, dlen);
  1667. cp += dlen;
  1668. buflen -= dlen;
  1669. }
  1670. }
  1671. len = ALIGN(len);
  1672. if (cp != NULL) {
  1673. /* fill header iff buffer is large enough */
  1674. rtm->rtm_version = RTM_VERSION;
  1675. rtm->rtm_type = type;
  1676. rtm->rtm_msglen = len;
  1677. }
  1678. *plen = len;
  1679. if (w != NULL && cp == NULL)
  1680. return (ENOBUFS);
  1681. return (0);
  1682. }
  1683. /*
  1684. * This routine is called to generate a message from the routing
  1685. * socket indicating that a redirect has occurred, a routing lookup
  1686. * has failed, or that a protocol has detected timeouts to a particular
  1687. * destination.
  1688. */
  1689. void
  1690. rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
  1691. int fibnum)
  1692. {
  1693. struct rt_msghdr *rtm;
  1694. struct mbuf *m;
  1695. struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
  1696. if (V_route_cb.any_count == 0)
  1697. return;
  1698. m = rtsock_msg_mbuf(type, rtinfo);
  1699. if (m == NULL)
  1700. return;
  1701. if (fibnum != RT_ALL_FIBS) {
  1702. KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
  1703. "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
  1704. M_SETFIB(m, fibnum);
  1705. m->m_flags |= RTS_FILTER_FIB;
  1706. }
  1707. rtm = mtod(m, struct rt_msghdr *);
  1708. rtm->rtm_flags = RTF_DONE | flags;
  1709. rtm->rtm_errno = error;
  1710. rtm->rtm_addrs = rtinfo->rti_addrs;
  1711. rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
  1712. }
  1713. void
  1714. rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
  1715. {
  1716. rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
  1717. }
  1718. /*
  1719. * This routine is called to generate a message from the routing
  1720. * socket indicating that the status of a network interface has changed.
  1721. */
  1722. static void
  1723. rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask __unused)
  1724. {
  1725. struct if_msghdr *ifm;
  1726. struct mbuf *m;
  1727. struct rt_addrinfo info;
  1728. if (V_route_cb.any_count == 0)
  1729. return;
  1730. bzero((caddr_t)&info, sizeof(info));
  1731. m = rtsock_msg_mbuf(RTM_IFINFO, &info);
  1732. if (m == NULL)
  1733. return;
  1734. ifm = mtod(m, struct if_msghdr *);
  1735. ifm->ifm_index = ifp->if_index;
  1736. ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
  1737. if_data_copy(ifp, &ifm->ifm_data);
  1738. ifm->ifm_addrs = 0;
  1739. rt_dispatch(m, AF_UNSPEC);
  1740. }
  1741. /*
  1742. * Announce interface address arrival/withdraw.
  1743. * Please do not call directly, use rt_addrmsg().
  1744. * Assume input data to be valid.
  1745. * Returns 0 on success.
  1746. */
  1747. int
  1748. rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
  1749. {
  1750. struct rt_addrinfo info;
  1751. struct sockaddr *sa;
  1752. int ncmd;
  1753. struct mbuf *m;
  1754. struct ifa_msghdr *ifam;
  1755. struct ifnet *ifp = ifa->ifa_ifp;
  1756. struct sockaddr_storage ss;
  1757. if (V_route_cb.any_count == 0)
  1758. return (0);
  1759. ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
  1760. bzero((caddr_t)&info, sizeof(info));
  1761. info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
  1762. info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
  1763. info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
  1764. info.rti_info[RTAX_IFA], ifa->ifa_netmask, &ss);
  1765. info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
  1766. if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
  1767. return (ENOBUFS);
  1768. ifam = mtod(m, struct ifa_msghdr *);
  1769. ifam->ifam_index = ifp->if_index;
  1770. ifam->ifam_metric = ifa->ifa_ifp->if_metric;
  1771. ifam->ifam_flags = ifa->ifa_flags;
  1772. ifam->ifam_addrs = info.rti_addrs;
  1773. if (fibnum != RT_ALL_FIBS) {
  1774. M_SETFIB(m, fibnum);
  1775. m->m_flags |= RTS_FILTER_FIB;
  1776. }
  1777. rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
  1778. return (0);
  1779. }
  1780. /*
  1781. * Announce route addition/removal to rtsock based on @rt data.
  1782. * Callers are advives to use rt_routemsg() instead of using this
  1783. * function directly.
  1784. * Assume @rt data is consistent.
  1785. *
  1786. * Returns 0 on success.
  1787. */
  1788. int
  1789. rtsock_routemsg(int cmd, struct rtentry *rt, struct nhop_object *nh,
  1790. int fibnum)
  1791. {
  1792. union sockaddr_union dst, mask;
  1793. struct rt_addrinfo info;
  1794. if (V_route_cb.any_count == 0)
  1795. return (0);
  1796. int family = rt_get_family(rt);
  1797. init_sockaddrs_family(family, &dst.sa, &mask.sa);
  1798. export_rtaddrs(rt, &dst.sa, &mask.sa);
  1799. bzero((caddr_t)&info, sizeof(info));
  1800. info.rti_info[RTAX_DST] = &dst.sa;
  1801. info.rti_info[RTAX_NETMASK] = &mask.sa;
  1802. info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
  1803. info.rti_flags = rt->rte_flags | nhop_get_rtflags(nh);
  1804. info.rti_ifp = nh->nh_ifp;
  1805. return (rtsock_routemsg_info(cmd, &info, fibnum));
  1806. }
  1807. int
  1808. rtsock_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum)
  1809. {
  1810. struct rt_msghdr *rtm;
  1811. struct sockaddr *sa;
  1812. struct mbuf *m;
  1813. if (V_route_cb.any_count == 0)
  1814. return (0);
  1815. if (info->rti_flags & RTF_HOST)
  1816. info->rti_info[RTAX_NETMASK] = NULL;
  1817. m = rtsock_msg_mbuf(cmd, info);
  1818. if (m == NULL)
  1819. return (ENOBUFS);
  1820. if (fibnum != RT_ALL_FIBS) {
  1821. KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
  1822. "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
  1823. M_SETFIB(m, fibnum);
  1824. m->m_flags |= RTS_FILTER_FIB;
  1825. }
  1826. rtm = mtod(m, struct rt_msghdr *);
  1827. rtm->rtm_addrs = info->rti_addrs;
  1828. if (info->rti_ifp != NULL)
  1829. rtm->rtm_index = info->rti_ifp->if_index;
  1830. /* Add RTF_DONE to indicate command 'completion' required by API */
  1831. info->rti_flags |= RTF_DONE;
  1832. /* Reported routes has to be up */
  1833. if (cmd == RTM_ADD || cmd == RTM_CHANGE)
  1834. info->rti_flags |= RTF_UP;
  1835. rtm->rtm_flags = info->rti_flags;
  1836. sa = info->rti_info[RTAX_DST];
  1837. rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
  1838. return (0);
  1839. }
  1840. /*
  1841. * This is the analogue to the rt_newaddrmsg which performs the same
  1842. * function but for multicast group memberhips. This is easier since
  1843. * there is no route state to worry about.
  1844. */
  1845. void
  1846. rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
  1847. {
  1848. struct rt_addrinfo info;
  1849. struct mbuf *m = NULL;
  1850. struct ifnet *ifp = ifma->ifma_ifp;
  1851. struct ifma_msghdr *ifmam;
  1852. if (V_route_cb.any_count == 0)
  1853. return;
  1854. bzero((caddr_t)&info, sizeof(info));
  1855. info.rti_info[RTAX_IFA] = ifma->ifma_addr;
  1856. if (ifp && ifp->if_addr)
  1857. info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
  1858. else
  1859. info.rti_info[RTAX_IFP] = NULL;
  1860. /*
  1861. * If a link-layer address is present, present it as a ``gateway''
  1862. * (similarly to how ARP entries, e.g., are presented).
  1863. */
  1864. info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
  1865. m = rtsock_msg_mbuf(cmd, &info);
  1866. if (m == NULL)
  1867. return;
  1868. ifmam = mtod(m, struct ifma_msghdr *);
  1869. KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
  1870. __func__));
  1871. ifmam->ifmam_index = ifp->if_index;
  1872. ifmam->ifmam_addrs = info.rti_addrs;
  1873. rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
  1874. }
  1875. static struct mbuf *
  1876. rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
  1877. struct rt_addrinfo *info)
  1878. {
  1879. struct if_announcemsghdr *ifan;
  1880. struct mbuf *m;
  1881. if (V_route_cb.any_count == 0)
  1882. return NULL;
  1883. bzero((caddr_t)info, sizeof(*info));
  1884. m = rtsock_msg_mbuf(type, info);
  1885. if (m != NULL) {
  1886. ifan = mtod(m, struct if_announcemsghdr *);
  1887. ifan->ifan_index = ifp->if_index;
  1888. strlcpy(ifan->ifan_name, ifp->if_xname,
  1889. sizeof(ifan->ifan_name));
  1890. ifan->ifan_what = what;
  1891. }
  1892. return m;
  1893. }
  1894. /*
  1895. * This is called to generate routing socket messages indicating
  1896. * IEEE80211 wireless events.
  1897. * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
  1898. */
  1899. void
  1900. rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
  1901. {
  1902. struct mbuf *m;
  1903. struct rt_addrinfo info;
  1904. m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
  1905. if (m != NULL) {
  1906. /*
  1907. * Append the ieee80211 data. Try to stick it in the
  1908. * mbuf containing the ifannounce msg; otherwise allocate
  1909. * a new mbuf and append.
  1910. *
  1911. * NB: we assume m is a single mbuf.
  1912. */
  1913. if (data_len > M_TRAILINGSPACE(m)) {
  1914. struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
  1915. if (n == NULL) {
  1916. m_freem(m);
  1917. return;
  1918. }
  1919. bcopy(data, mtod(n, void *), data_len);
  1920. n->m_len = data_len;
  1921. m->m_next = n;
  1922. } else if (data_len > 0) {
  1923. bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
  1924. m->m_len += data_len;
  1925. }
  1926. if (m->m_flags & M_PKTHDR)
  1927. m->m_pkthdr.len += data_len;
  1928. mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
  1929. rt_dispatch(m, AF_UNSPEC);
  1930. }
  1931. }
  1932. /*
  1933. * This is called to generate routing socket messages indicating
  1934. * network interface arrival and departure.
  1935. */
  1936. static void
  1937. rt_ifannouncemsg(struct ifnet *ifp, int what)
  1938. {
  1939. struct mbuf *m;
  1940. struct rt_addrinfo info;
  1941. m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
  1942. if (m != NULL)
  1943. rt_dispatch(m, AF_UNSPEC);
  1944. }
  1945. static void
  1946. rt_dispatch(struct mbuf *m, sa_family_t saf)
  1947. {
  1948. M_ASSERTPKTHDR(m);
  1949. m->m_rtsock_family = saf;
  1950. if (V_loif)
  1951. m->m_pkthdr.rcvif = V_loif;
  1952. else {
  1953. m_freem(m);
  1954. return;
  1955. }
  1956. netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */
  1957. }
  1958. /*
  1959. * This is used in dumping the kernel table via sysctl().
  1960. */
  1961. static int
  1962. sysctl_dumpentry(struct rtentry *rt, void *vw)
  1963. {
  1964. struct walkarg *w = vw;
  1965. struct nhop_object *nh;
  1966. NET_EPOCH_ASSERT();
  1967. if (!rt_is_exportable(rt, w->w_req->td->td_ucred))
  1968. return (0);
  1969. export_rtaddrs(rt, w->dst, w->mask);
  1970. nh = rt_get_raw_nhop(rt);
  1971. #ifdef ROUTE_MPATH
  1972. if (NH_IS_NHGRP(nh)) {
  1973. const struct weightened_nhop *wn;
  1974. uint32_t num_nhops;
  1975. int error;
  1976. wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
  1977. for (int i = 0; i < num_nhops; i++) {
  1978. error = sysctl_dumpnhop(rt, wn[i].nh, wn[i].weight, w);
  1979. if (error != 0)
  1980. return (error);
  1981. }
  1982. } else
  1983. #endif
  1984. sysctl_dumpnhop(rt, nh, rt->rt_weight, w);
  1985. return (0);
  1986. }
  1987. static int
  1988. sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh, uint32_t weight,
  1989. struct walkarg *w)
  1990. {
  1991. struct rt_addrinfo info;
  1992. int error = 0, size;
  1993. uint32_t rtflags;
  1994. rtflags = nhop_get_rtflags(nh);
  1995. if (w->w_op == NET_RT_FLAGS && !(rtflags & w->w_arg))
  1996. return (0);
  1997. bzero((caddr_t)&info, sizeof(info));
  1998. info.rti_info[RTAX_DST] = w->dst;
  1999. info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
  2000. info.rti_info[RTAX_NETMASK] = (rtflags & RTF_HOST) ? NULL : w->mask;
  2001. info.rti_info[RTAX_GENMASK] = 0;
  2002. if (nh->nh_ifp && !(nh->nh_ifp->if_flags & IFF_DYING)) {
  2003. info.rti_info[RTAX_IFP] = nh->nh_ifp->if_addr->ifa_addr;
  2004. info.rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
  2005. if (nh->nh_ifp->if_flags & IFF_POINTOPOINT)
  2006. info.rti_info[RTAX_BRD] = nh->nh_ifa->ifa_dstaddr;
  2007. }
  2008. if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
  2009. return (error);
  2010. if (w->w_req && w->w_tmem) {
  2011. struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
  2012. bzero(&rtm->rtm_index,
  2013. sizeof(*rtm) - offsetof(struct rt_msghdr, rtm_index));
  2014. /*
  2015. * rte flags may consist of RTF_HOST (duplicated in nhop rtflags)
  2016. * and RTF_UP (if entry is linked, which is always true here).
  2017. * Given that, use nhop rtflags & add RTF_UP.
  2018. */
  2019. rtm->rtm_flags = rtflags | RTF_UP;
  2020. if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
  2021. rtm->rtm_flags = RTF_GATEWAY |
  2022. (rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
  2023. rt_getmetrics(rt, nh, &rtm->rtm_rmx);
  2024. rtm->rtm_rmx.rmx_weight = weight;
  2025. rtm->rtm_index = nh->nh_ifp->if_index;
  2026. rtm->rtm_addrs = info.rti_addrs;
  2027. error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
  2028. return (error);
  2029. }
  2030. return (error);
  2031. }
  2032. static int
  2033. sysctl_iflist_ifml(struct ifnet *ifp, const struct if_data *src_ifd,
  2034. struct rt_addrinfo *info, struct walkarg *w, int len)
  2035. {
  2036. struct if_msghdrl *ifm;
  2037. struct if_data *ifd;
  2038. ifm = (struct if_msghdrl *)w->w_tmem;
  2039. #ifdef COMPAT_FREEBSD32
  2040. if (w->w_req->flags & SCTL_MASK32) {
  2041. struct if_msghdrl32 *ifm32;
  2042. ifm32 = (struct if_msghdrl32 *)ifm;
  2043. ifm32->ifm_addrs = info->rti_addrs;
  2044. ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
  2045. ifm32->ifm_index = ifp->if_index;
  2046. ifm32->_ifm_spare1 = 0;
  2047. ifm32->ifm_len = sizeof(*ifm32);
  2048. ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
  2049. ifm32->_ifm_spare2 = 0;
  2050. ifd = &ifm32->ifm_data;
  2051. } else
  2052. #endif
  2053. {
  2054. ifm->ifm_addrs = info->rti_addrs;
  2055. ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
  2056. ifm->ifm_index = ifp->if_index;
  2057. ifm->_ifm_spare1 = 0;
  2058. ifm->ifm_len = sizeof(*ifm);
  2059. ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
  2060. ifm->_ifm_spare2 = 0;
  2061. ifd = &ifm->ifm_data;
  2062. }
  2063. memcpy(ifd, src_ifd, sizeof(*ifd));
  2064. return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
  2065. }
  2066. static int
  2067. sysctl_iflist_ifm(struct ifnet *ifp, const struct if_data *src_ifd,
  2068. struct rt_addrinfo *info, struct walkarg *w, int len)
  2069. {
  2070. struct if_msghdr *ifm;
  2071. struct if_data *ifd;
  2072. ifm = (struct if_msghdr *)w->w_tmem;
  2073. #ifdef COMPAT_FREEBSD32
  2074. if (w->w_req->flags & SCTL_MASK32) {
  2075. struct if_msghdr32 *ifm32;
  2076. ifm32 = (struct if_msghdr32 *)ifm;
  2077. ifm32->ifm_addrs = info->rti_addrs;
  2078. ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
  2079. ifm32->ifm_index = ifp->if_index;
  2080. ifm32->_ifm_spare1 = 0;
  2081. ifd = &ifm32->ifm_data;
  2082. } else
  2083. #endif
  2084. {
  2085. ifm->ifm_addrs = info->rti_addrs;
  2086. ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
  2087. ifm->ifm_index = ifp->if_index;
  2088. ifm->_ifm_spare1 = 0;
  2089. ifd = &ifm->ifm_data;
  2090. }
  2091. memcpy(ifd, src_ifd, sizeof(*ifd));
  2092. return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
  2093. }
  2094. static int
  2095. sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
  2096. struct walkarg *w, int len)
  2097. {
  2098. struct ifa_msghdrl *ifam;
  2099. struct if_data *ifd;
  2100. ifam = (struct ifa_msghdrl *)w->w_tmem;
  2101. #ifdef COMPAT_FREEBSD32
  2102. if (w->w_req->flags & SCTL_MASK32) {
  2103. struct ifa_msghdrl32 *ifam32;
  2104. ifam32 = (struct ifa_msghdrl32 *)ifam;
  2105. ifam32->ifam_addrs = info->rti_addrs;
  2106. ifam32->ifam_flags = ifa->ifa_flags;
  2107. ifam32->ifam_index = ifa->ifa_ifp->if_index;
  2108. ifam32->_ifam_spare1 = 0;
  2109. ifam32->ifam_len = sizeof(*ifam32);
  2110. ifam32->ifam_data_off =
  2111. offsetof(struct ifa_msghdrl32, ifam_data);
  2112. ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
  2113. ifd = &ifam32->ifam_data;
  2114. } else
  2115. #endif
  2116. {
  2117. ifam->ifam_addrs = info->rti_addrs;
  2118. ifam->ifam_flags = ifa->ifa_flags;
  2119. ifam->ifam_index = ifa->ifa_ifp->if_index;
  2120. ifam->_ifam_spare1 = 0;
  2121. ifam->ifam_len = sizeof(*ifam);
  2122. ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
  2123. ifam->ifam_metric = ifa->ifa_ifp->if_metric;
  2124. ifd = &ifam->ifam_data;
  2125. }
  2126. bzero(ifd, sizeof(*ifd));
  2127. ifd->ifi_datalen = sizeof(struct if_data);
  2128. ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
  2129. ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
  2130. ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
  2131. ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
  2132. /* Fixup if_data carp(4) vhid. */
  2133. if (carp_get_vhid_p != NULL)
  2134. ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
  2135. return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
  2136. }
  2137. static int
  2138. sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
  2139. struct walkarg *w, int len)
  2140. {
  2141. struct ifa_msghdr *ifam;
  2142. ifam = (struct ifa_msghdr *)w->w_tmem;
  2143. ifam->ifam_addrs = info->rti_addrs;
  2144. ifam->ifam_flags = ifa->ifa_flags;
  2145. ifam->ifam_index = ifa->ifa_ifp->if_index;
  2146. ifam->_ifam_spare1 = 0;
  2147. ifam->ifam_metric = ifa->ifa_ifp->if_metric;
  2148. return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
  2149. }
  2150. static int
  2151. sysctl_iflist(int af, struct walkarg *w)
  2152. {
  2153. struct ifnet *ifp;
  2154. struct ifaddr *ifa;
  2155. struct if_data ifd;
  2156. struct rt_addrinfo info;
  2157. int len, error = 0;
  2158. struct sockaddr_storage ss;
  2159. bzero((caddr_t)&info, sizeof(info));
  2160. bzero(&ifd, sizeof(ifd));
  2161. CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
  2162. if (w->w_arg && w->w_arg != ifp->if_index)
  2163. continue;
  2164. if_data_copy(ifp, &ifd);
  2165. ifa = ifp->if_addr;
  2166. info.rti_info[RTAX_IFP] = ifa->ifa_addr;
  2167. error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
  2168. if (error != 0)
  2169. goto done;
  2170. info.rti_info[RTAX_IFP] = NULL;
  2171. if (w->w_req && w->w_tmem) {
  2172. if (w->w_op == NET_RT_IFLISTL)
  2173. error = sysctl_iflist_ifml(ifp, &ifd, &info, w,
  2174. len);
  2175. else
  2176. error = sysctl_iflist_ifm(ifp, &ifd, &info, w,
  2177. len);
  2178. if (error)
  2179. goto done;
  2180. }
  2181. while ((ifa = CK_STAILQ_NEXT(ifa, ifa_link)) != NULL) {
  2182. if (af && af != ifa->ifa_addr->sa_family)
  2183. continue;
  2184. if (prison_if(w->w_req->td->td_ucred,
  2185. ifa->ifa_addr) != 0)
  2186. continue;
  2187. info.rti_info[RTAX_IFA] = ifa->ifa_addr;
  2188. info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
  2189. ifa->ifa_addr, ifa->ifa_netmask, &ss);
  2190. info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
  2191. error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
  2192. if (error != 0)
  2193. goto done;
  2194. if (w->w_req && w->w_tmem) {
  2195. if (w->w_op == NET_RT_IFLISTL)
  2196. error = sysctl_iflist_ifaml(ifa, &info,
  2197. w, len);
  2198. else
  2199. error = sysctl_iflist_ifam(ifa, &info,
  2200. w, len);
  2201. if (error)
  2202. goto done;
  2203. }
  2204. }
  2205. info.rti_info[RTAX_IFA] = NULL;
  2206. info.rti_info[RTAX_NETMASK] = NULL;
  2207. info.rti_info[RTAX_BRD] = NULL;
  2208. }
  2209. done:
  2210. return (error);
  2211. }
  2212. static int
  2213. sysctl_ifmalist(int af, struct walkarg *w)
  2214. {
  2215. struct rt_addrinfo info;
  2216. struct ifaddr *ifa;
  2217. struct ifmultiaddr *ifma;
  2218. struct ifnet *ifp;
  2219. int error, len;
  2220. NET_EPOCH_ASSERT();
  2221. error = 0;
  2222. bzero((caddr_t)&info, sizeof(info));
  2223. CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
  2224. if (w->w_arg && w->w_arg != ifp->if_index)
  2225. continue;
  2226. ifa = ifp->if_addr;
  2227. info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
  2228. CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
  2229. if (af && af != ifma->ifma_addr->sa_family)
  2230. continue;
  2231. if (prison_if(w->w_req->td->td_ucred,
  2232. ifma->ifma_addr) != 0)
  2233. continue;
  2234. info.rti_info[RTAX_IFA] = ifma->ifma_addr;
  2235. info.rti_info[RTAX_GATEWAY] =
  2236. (ifma->ifma_addr->sa_family != AF_LINK) ?
  2237. ifma->ifma_lladdr : NULL;
  2238. error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
  2239. if (error != 0)
  2240. break;
  2241. if (w->w_req && w->w_tmem) {
  2242. struct ifma_msghdr *ifmam;
  2243. ifmam = (struct ifma_msghdr *)w->w_tmem;
  2244. ifmam->ifmam_index = ifma->ifma_ifp->if_index;
  2245. ifmam->ifmam_flags = 0;
  2246. ifmam->ifmam_addrs = info.rti_addrs;
  2247. ifmam->_ifmam_spare1 = 0;
  2248. error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
  2249. if (error != 0)
  2250. break;
  2251. }
  2252. }
  2253. if (error != 0)
  2254. break;
  2255. }
  2256. return (error);
  2257. }
  2258. static void
  2259. rtable_sysctl_dump(uint32_t fibnum, int family, struct walkarg *w)
  2260. {
  2261. union sockaddr_union sa_dst, sa_mask;
  2262. w->family = family;
  2263. w->dst = (struct sockaddr *)&sa_dst;
  2264. w->mask = (struct sockaddr *)&sa_mask;
  2265. init_sockaddrs_family(family, w->dst, w->mask);
  2266. rib_walk(fibnum, family, false, sysctl_dumpentry, w);
  2267. }
  2268. static int
  2269. sysctl_rtsock(SYSCTL_HANDLER_ARGS)
  2270. {
  2271. struct epoch_tracker et;
  2272. int *name = (int *)arg1;
  2273. u_int namelen = arg2;
  2274. struct rib_head *rnh = NULL; /* silence compiler. */
  2275. int i, lim, error = EINVAL;
  2276. int fib = 0;
  2277. u_char af;
  2278. struct walkarg w;
  2279. if (namelen < 3)
  2280. return (EINVAL);
  2281. name++;
  2282. namelen--;
  2283. if (req->newptr)
  2284. return (EPERM);
  2285. if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP || name[1] == NET_RT_NHGRP) {
  2286. if (namelen == 3)
  2287. fib = req->td->td_proc->p_fibnum;
  2288. else if (namelen == 4)
  2289. fib = (name[3] == RT_ALL_FIBS) ?
  2290. req->td->td_proc->p_fibnum : name[3];
  2291. else
  2292. return ((namelen < 3) ? EISDIR : ENOTDIR);
  2293. if (fib < 0 || fib >= rt_numfibs)
  2294. return (EINVAL);
  2295. } else if (namelen != 3)
  2296. return ((namelen < 3) ? EISDIR : ENOTDIR);
  2297. af = name[0];
  2298. if (af > AF_MAX)
  2299. return (EINVAL);
  2300. bzero(&w, sizeof(w));
  2301. w.w_op = name[1];
  2302. w.w_arg = name[2];
  2303. w.w_req = req;
  2304. error = sysctl_wire_old_buffer(req, 0);
  2305. if (error)
  2306. return (error);
  2307. /*
  2308. * Allocate reply buffer in advance.
  2309. * All rtsock messages has maximum length of u_short.
  2310. */
  2311. w.w_tmemsize = 65536;
  2312. w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
  2313. NET_EPOCH_ENTER(et);
  2314. switch (w.w_op) {
  2315. case NET_RT_DUMP:
  2316. case NET_RT_FLAGS:
  2317. if (af == 0) { /* dump all tables */
  2318. i = 1;
  2319. lim = AF_MAX;
  2320. } else /* dump only one table */
  2321. i = lim = af;
  2322. /*
  2323. * take care of llinfo entries, the caller must
  2324. * specify an AF
  2325. */
  2326. if (w.w_op == NET_RT_FLAGS &&
  2327. (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
  2328. if (af != 0)
  2329. error = lltable_sysctl_dumparp(af, w.w_req);
  2330. else
  2331. error = EINVAL;
  2332. break;
  2333. }
  2334. /*
  2335. * take care of routing entries
  2336. */
  2337. for (error = 0; error == 0 && i <= lim; i++) {
  2338. rnh = rt_tables_get_rnh(fib, i);
  2339. if (rnh != NULL) {
  2340. rtable_sysctl_dump(fib, i, &w);
  2341. } else if (af != 0)
  2342. error = EAFNOSUPPORT;
  2343. }
  2344. break;
  2345. case NET_RT_NHOP:
  2346. case NET_RT_NHGRP:
  2347. /* Allow dumping one specific af/fib at a time */
  2348. if (namelen < 4) {
  2349. error = EINVAL;
  2350. break;
  2351. }
  2352. fib = name[3];
  2353. if (fib < 0 || fib > rt_numfibs) {
  2354. error = EINVAL;
  2355. break;
  2356. }
  2357. rnh = rt_tables_get_rnh(fib, af);
  2358. if (rnh == NULL) {
  2359. error = EAFNOSUPPORT;
  2360. break;
  2361. }
  2362. if (w.w_op == NET_RT_NHOP)
  2363. error = nhops_dump_sysctl(rnh, w.w_req);
  2364. else
  2365. #ifdef ROUTE_MPATH
  2366. error = nhgrp_dump_sysctl(rnh, w.w_req);
  2367. #else
  2368. error = ENOTSUP;
  2369. #endif
  2370. break;
  2371. case NET_RT_IFLIST:
  2372. case NET_RT_IFLISTL:
  2373. error = sysctl_iflist(af, &w);
  2374. break;
  2375. case NET_RT_IFMALIST:
  2376. error = sysctl_ifmalist(af, &w);
  2377. break;
  2378. }
  2379. NET_EPOCH_EXIT(et);
  2380. free(w.w_tmem, M_TEMP);
  2381. return (error);
  2382. }
  2383. static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_MPSAFE,
  2384. sysctl_rtsock, "Return route tables and interface/address lists");
  2385. /*
  2386. * Definitions of protocols supported in the ROUTE domain.
  2387. */
  2388. static struct domain routedomain; /* or at least forward */
  2389. static struct protosw routesw = {
  2390. .pr_type = SOCK_RAW,
  2391. .pr_flags = PR_ATOMIC|PR_ADDR,
  2392. .pr_abort = rts_close,
  2393. .pr_attach = rts_attach,
  2394. .pr_detach = rts_detach,
  2395. .pr_send = rts_send,
  2396. .pr_shutdown = rts_shutdown,
  2397. .pr_disconnect = rts_disconnect,
  2398. .pr_close = rts_close,
  2399. };
  2400. static struct domain routedomain = {
  2401. .dom_family = PF_ROUTE,
  2402. .dom_name = "route",
  2403. .dom_nprotosw = 1,
  2404. .dom_protosw = { &routesw },
  2405. };
  2406. DOMAIN_SET(route);