uipc_usrreq.c 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. /* $OpenBSD: uipc_usrreq.c,v 1.83 2015/07/28 14:20:10 bluhm Exp $ */
  2. /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */
  3. /*
  4. * Copyright (c) 1982, 1986, 1989, 1991, 1993
  5. * The Regents of the University of California. All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. * 3. Neither the name of the University nor the names of its contributors
  16. * may be used to endorse or promote products derived from this software
  17. * without specific prior written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  20. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  23. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29. * SUCH DAMAGE.
  30. *
  31. * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
  32. */
  33. #include <sys/param.h>
  34. #include <sys/systm.h>
  35. #include <sys/proc.h>
  36. #include <sys/filedesc.h>
  37. #include <sys/domain.h>
  38. #include <sys/protosw.h>
  39. #include <sys/queue.h>
  40. #include <sys/socket.h>
  41. #include <sys/socketvar.h>
  42. #include <sys/unpcb.h>
  43. #include <sys/un.h>
  44. #include <sys/namei.h>
  45. #include <sys/vnode.h>
  46. #include <sys/file.h>
  47. #include <sys/stat.h>
  48. #include <sys/mbuf.h>
  49. void uipc_setaddr(const struct unpcb *, struct mbuf *);
  50. /*
  51. * Unix communications domain.
  52. *
  53. * TODO:
  54. * RDM
  55. * rethink name space problems
  56. * need a proper out-of-band
  57. */
  58. struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
  59. ino_t unp_ino; /* prototype for fake inode numbers */
  60. void
  61. uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
  62. {
  63. if (unp != NULL && unp->unp_addr != NULL) {
  64. nam->m_len = unp->unp_addr->m_len;
  65. memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
  66. nam->m_len);
  67. } else {
  68. nam->m_len = sizeof(sun_noname);
  69. memcpy(mtod(nam, struct sockaddr *), &sun_noname,
  70. nam->m_len);
  71. }
  72. }
  73. /*ARGSUSED*/
  74. int
  75. uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
  76. struct mbuf *control, struct proc *p)
  77. {
  78. struct unpcb *unp = sotounpcb(so);
  79. struct socket *so2;
  80. int error = 0;
  81. if (req == PRU_CONTROL)
  82. return (EOPNOTSUPP);
  83. if (req != PRU_SEND && control && control->m_len) {
  84. error = EOPNOTSUPP;
  85. goto release;
  86. }
  87. if (unp == NULL && req != PRU_ATTACH) {
  88. error = EINVAL;
  89. goto release;
  90. }
  91. switch (req) {
  92. case PRU_ATTACH:
  93. if (unp) {
  94. error = EISCONN;
  95. break;
  96. }
  97. error = unp_attach(so);
  98. break;
  99. case PRU_DETACH:
  100. unp_detach(unp);
  101. break;
  102. case PRU_BIND:
  103. error = unp_bind(unp, nam, p);
  104. break;
  105. case PRU_LISTEN:
  106. if (unp->unp_vnode == NULL)
  107. error = EINVAL;
  108. break;
  109. case PRU_CONNECT:
  110. error = unp_connect(so, nam, p);
  111. break;
  112. case PRU_CONNECT2:
  113. error = unp_connect2(so, (struct socket *)nam);
  114. break;
  115. case PRU_DISCONNECT:
  116. unp_disconnect(unp);
  117. break;
  118. case PRU_ACCEPT:
  119. /*
  120. * Pass back name of connected socket,
  121. * if it was bound and we are still connected
  122. * (our peer may have closed already!).
  123. */
  124. uipc_setaddr(unp->unp_conn, nam);
  125. break;
  126. case PRU_SHUTDOWN:
  127. socantsendmore(so);
  128. unp_shutdown(unp);
  129. break;
  130. case PRU_RCVD:
  131. switch (so->so_type) {
  132. case SOCK_DGRAM:
  133. panic("uipc 1");
  134. /*NOTREACHED*/
  135. case SOCK_STREAM:
  136. case SOCK_SEQPACKET:
  137. #define rcv (&so->so_rcv)
  138. #define snd (&so2->so_snd)
  139. if (unp->unp_conn == NULL)
  140. break;
  141. so2 = unp->unp_conn->unp_socket;
  142. /*
  143. * Adjust backpressure on sender
  144. * and wakeup any waiting to write.
  145. */
  146. snd->sb_mbcnt = rcv->sb_mbcnt;
  147. snd->sb_cc = rcv->sb_cc;
  148. sowwakeup(so2);
  149. #undef snd
  150. #undef rcv
  151. break;
  152. default:
  153. panic("uipc 2");
  154. }
  155. break;
  156. case PRU_SEND:
  157. if (control && (error = unp_internalize(control, p)))
  158. break;
  159. switch (so->so_type) {
  160. case SOCK_DGRAM: {
  161. struct sockaddr *from;
  162. if (nam) {
  163. if (unp->unp_conn) {
  164. error = EISCONN;
  165. break;
  166. }
  167. error = unp_connect(so, nam, p);
  168. if (error)
  169. break;
  170. } else {
  171. if (unp->unp_conn == NULL) {
  172. error = ENOTCONN;
  173. break;
  174. }
  175. }
  176. so2 = unp->unp_conn->unp_socket;
  177. if (unp->unp_addr)
  178. from = mtod(unp->unp_addr, struct sockaddr *);
  179. else
  180. from = &sun_noname;
  181. if (sbappendaddr(&so2->so_rcv, from, m, control)) {
  182. sorwakeup(so2);
  183. m = NULL;
  184. control = NULL;
  185. } else
  186. error = ENOBUFS;
  187. if (nam)
  188. unp_disconnect(unp);
  189. break;
  190. }
  191. case SOCK_STREAM:
  192. case SOCK_SEQPACKET:
  193. #define rcv (&so2->so_rcv)
  194. #define snd (&so->so_snd)
  195. if (so->so_state & SS_CANTSENDMORE) {
  196. error = EPIPE;
  197. break;
  198. }
  199. if (unp->unp_conn == NULL) {
  200. error = ENOTCONN;
  201. break;
  202. }
  203. so2 = unp->unp_conn->unp_socket;
  204. /*
  205. * Send to paired receive port, and then raise
  206. * send buffer counts to maintain backpressure.
  207. * Wake up readers.
  208. */
  209. if (control) {
  210. if (sbappendcontrol(rcv, m, control))
  211. control = NULL;
  212. } else if (so->so_type == SOCK_SEQPACKET)
  213. sbappendrecord(rcv, m);
  214. else
  215. sbappend(rcv, m);
  216. snd->sb_mbcnt = rcv->sb_mbcnt;
  217. snd->sb_cc = rcv->sb_cc;
  218. sorwakeup(so2);
  219. m = NULL;
  220. #undef snd
  221. #undef rcv
  222. break;
  223. default:
  224. panic("uipc 4");
  225. }
  226. /* we need to undo unp_internalize in case of errors */
  227. if (control && error)
  228. unp_dispose(control);
  229. break;
  230. case PRU_ABORT:
  231. unp_drop(unp, ECONNABORTED);
  232. break;
  233. case PRU_SENSE: {
  234. struct stat *sb = (struct stat *)m;
  235. sb->st_blksize = so->so_snd.sb_hiwat;
  236. sb->st_dev = NODEV;
  237. if (unp->unp_ino == 0)
  238. unp->unp_ino = unp_ino++;
  239. sb->st_atim.tv_sec =
  240. sb->st_mtim.tv_sec =
  241. sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
  242. sb->st_atim.tv_nsec =
  243. sb->st_mtim.tv_nsec =
  244. sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
  245. sb->st_ino = unp->unp_ino;
  246. return (0);
  247. }
  248. case PRU_RCVOOB:
  249. return (EOPNOTSUPP);
  250. case PRU_SENDOOB:
  251. error = EOPNOTSUPP;
  252. break;
  253. case PRU_SOCKADDR:
  254. uipc_setaddr(unp, nam);
  255. break;
  256. case PRU_PEERADDR:
  257. uipc_setaddr(unp->unp_conn, nam);
  258. break;
  259. case PRU_SLOWTIMO:
  260. break;
  261. default:
  262. panic("piusrreq");
  263. }
  264. release:
  265. if (control)
  266. m_freem(control);
  267. if (m)
  268. m_freem(m);
  269. return (error);
  270. }
  271. /*
  272. * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  273. * for stream sockets, although the total for sender and receiver is
  274. * actually only PIPSIZ.
  275. * Datagram sockets really use the sendspace as the maximum datagram size,
  276. * and don't really want to reserve the sendspace. Their recvspace should
  277. * be large enough for at least one max-size datagram plus address.
  278. */
  279. #define PIPSIZ 4096
  280. u_long unpst_sendspace = PIPSIZ;
  281. u_long unpst_recvspace = PIPSIZ;
  282. u_long unpdg_sendspace = 2*1024; /* really max datagram size */
  283. u_long unpdg_recvspace = 4*1024;
  284. int unp_rights; /* file descriptors in flight */
  285. int
  286. unp_attach(struct socket *so)
  287. {
  288. struct unpcb *unp;
  289. int error;
  290. if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  291. switch (so->so_type) {
  292. case SOCK_STREAM:
  293. case SOCK_SEQPACKET:
  294. error = soreserve(so, unpst_sendspace, unpst_recvspace);
  295. break;
  296. case SOCK_DGRAM:
  297. error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  298. break;
  299. default:
  300. panic("unp_attach");
  301. }
  302. if (error)
  303. return (error);
  304. }
  305. unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT|M_ZERO);
  306. if (unp == NULL)
  307. return (ENOBUFS);
  308. unp->unp_socket = so;
  309. so->so_pcb = unp;
  310. getnanotime(&unp->unp_ctime);
  311. return (0);
  312. }
  313. void
  314. unp_detach(struct unpcb *unp)
  315. {
  316. struct vnode *vp;
  317. if (unp->unp_vnode) {
  318. unp->unp_vnode->v_socket = NULL;
  319. vp = unp->unp_vnode;
  320. unp->unp_vnode = NULL;
  321. vrele(vp);
  322. }
  323. if (unp->unp_conn)
  324. unp_disconnect(unp);
  325. while (!SLIST_EMPTY(&unp->unp_refs))
  326. unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET);
  327. soisdisconnected(unp->unp_socket);
  328. unp->unp_socket->so_pcb = NULL;
  329. m_freem(unp->unp_addr);
  330. if (unp_rights) {
  331. /*
  332. * Normally the receive buffer is flushed later,
  333. * in sofree, but if our receive buffer holds references
  334. * to descriptors that are now garbage, we will dispose
  335. * of those descriptor references after the garbage collector
  336. * gets them (resulting in a "panic: closef: count < 0").
  337. */
  338. sorflush(unp->unp_socket);
  339. free(unp, M_PCB, 0);
  340. unp_gc();
  341. } else
  342. free(unp, M_PCB, 0);
  343. }
  344. int
  345. unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
  346. {
  347. struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
  348. struct mbuf *nam2;
  349. struct vnode *vp;
  350. struct vattr vattr;
  351. int error;
  352. struct nameidata nd;
  353. size_t pathlen;
  354. if (unp->unp_vnode != NULL)
  355. return (EINVAL);
  356. if (soun->sun_len > sizeof(struct sockaddr_un) ||
  357. soun->sun_len < offsetof(struct sockaddr_un, sun_path))
  358. return (EINVAL);
  359. if (soun->sun_family != AF_UNIX)
  360. return (EAFNOSUPPORT);
  361. pathlen = strnlen(soun->sun_path, soun->sun_len -
  362. offsetof(struct sockaddr_un, sun_path));
  363. if (pathlen == sizeof(soun->sun_path))
  364. return (EINVAL);
  365. nam2 = m_getclr(M_WAITOK, MT_SONAME);
  366. nam2->m_len = sizeof(struct sockaddr_un);
  367. memcpy(mtod(nam2, struct sockaddr_un *), soun,
  368. offsetof(struct sockaddr_un, sun_path) + pathlen);
  369. /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
  370. soun = mtod(nam2, struct sockaddr_un *);
  371. /* Fixup sun_len to keep it in sync with m_len. */
  372. soun->sun_len = nam2->m_len;
  373. NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
  374. soun->sun_path, p);
  375. /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  376. if ((error = namei(&nd)) != 0) {
  377. m_freem(nam2);
  378. return (error);
  379. }
  380. vp = nd.ni_vp;
  381. if (vp != NULL) {
  382. VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
  383. if (nd.ni_dvp == vp)
  384. vrele(nd.ni_dvp);
  385. else
  386. vput(nd.ni_dvp);
  387. vrele(vp);
  388. m_freem(nam2);
  389. return (EADDRINUSE);
  390. }
  391. VATTR_NULL(&vattr);
  392. vattr.va_type = VSOCK;
  393. vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
  394. error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  395. if (error) {
  396. m_freem(nam2);
  397. return (error);
  398. }
  399. unp->unp_addr = nam2;
  400. vp = nd.ni_vp;
  401. vp->v_socket = unp->unp_socket;
  402. unp->unp_vnode = vp;
  403. unp->unp_connid.uid = p->p_ucred->cr_uid;
  404. unp->unp_connid.gid = p->p_ucred->cr_gid;
  405. unp->unp_connid.pid = p->p_p->ps_pid;
  406. unp->unp_flags |= UNP_FEIDSBIND;
  407. VOP_UNLOCK(vp, 0, p);
  408. return (0);
  409. }
  410. int
  411. unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
  412. {
  413. struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
  414. struct vnode *vp;
  415. struct socket *so2, *so3;
  416. struct unpcb *unp, *unp2, *unp3;
  417. int error;
  418. struct nameidata nd;
  419. if (soun->sun_family != AF_UNIX)
  420. return (EAFNOSUPPORT);
  421. if (nam->m_len < sizeof(struct sockaddr_un))
  422. *(mtod(nam, caddr_t) + nam->m_len) = 0;
  423. else if (nam->m_len > sizeof(struct sockaddr_un))
  424. return (EINVAL);
  425. else if (memchr(soun->sun_path, '\0', sizeof(soun->sun_path)) == NULL)
  426. return (EINVAL);
  427. NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
  428. if ((error = namei(&nd)) != 0)
  429. return (error);
  430. vp = nd.ni_vp;
  431. if (vp->v_type != VSOCK) {
  432. error = ENOTSOCK;
  433. goto bad;
  434. }
  435. if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
  436. goto bad;
  437. so2 = vp->v_socket;
  438. if (so2 == NULL) {
  439. error = ECONNREFUSED;
  440. goto bad;
  441. }
  442. if (so->so_type != so2->so_type) {
  443. error = EPROTOTYPE;
  444. goto bad;
  445. }
  446. if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  447. if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
  448. (so3 = sonewconn(so2, 0)) == 0) {
  449. error = ECONNREFUSED;
  450. goto bad;
  451. }
  452. unp = sotounpcb(so);
  453. unp2 = sotounpcb(so2);
  454. unp3 = sotounpcb(so3);
  455. if (unp2->unp_addr)
  456. unp3->unp_addr =
  457. m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
  458. unp3->unp_connid.uid = p->p_ucred->cr_uid;
  459. unp3->unp_connid.gid = p->p_ucred->cr_gid;
  460. unp3->unp_connid.pid = p->p_p->ps_pid;
  461. unp3->unp_flags |= UNP_FEIDS;
  462. so2 = so3;
  463. if (unp2->unp_flags & UNP_FEIDSBIND) {
  464. unp->unp_connid = unp2->unp_connid;
  465. unp->unp_flags |= UNP_FEIDS;
  466. }
  467. }
  468. error = unp_connect2(so, so2);
  469. bad:
  470. vput(vp);
  471. return (error);
  472. }
  473. int
  474. unp_connect2(struct socket *so, struct socket *so2)
  475. {
  476. struct unpcb *unp = sotounpcb(so);
  477. struct unpcb *unp2;
  478. if (so2->so_type != so->so_type)
  479. return (EPROTOTYPE);
  480. unp2 = sotounpcb(so2);
  481. unp->unp_conn = unp2;
  482. switch (so->so_type) {
  483. case SOCK_DGRAM:
  484. SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
  485. soisconnected(so);
  486. break;
  487. case SOCK_STREAM:
  488. case SOCK_SEQPACKET:
  489. unp2->unp_conn = unp;
  490. soisconnected(so);
  491. soisconnected(so2);
  492. break;
  493. default:
  494. panic("unp_connect2");
  495. }
  496. return (0);
  497. }
  498. void
  499. unp_disconnect(struct unpcb *unp)
  500. {
  501. struct unpcb *unp2 = unp->unp_conn;
  502. if (unp2 == NULL)
  503. return;
  504. unp->unp_conn = NULL;
  505. switch (unp->unp_socket->so_type) {
  506. case SOCK_DGRAM:
  507. SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
  508. unp->unp_socket->so_state &= ~SS_ISCONNECTED;
  509. break;
  510. case SOCK_STREAM:
  511. case SOCK_SEQPACKET:
  512. unp->unp_socket->so_snd.sb_mbcnt = 0;
  513. unp->unp_socket->so_snd.sb_cc = 0;
  514. soisdisconnected(unp->unp_socket);
  515. unp2->unp_conn = NULL;
  516. unp2->unp_socket->so_snd.sb_mbcnt = 0;
  517. unp2->unp_socket->so_snd.sb_cc = 0;
  518. soisdisconnected(unp2->unp_socket);
  519. break;
  520. }
  521. }
  522. void
  523. unp_shutdown(struct unpcb *unp)
  524. {
  525. struct socket *so;
  526. switch (unp->unp_socket->so_type) {
  527. case SOCK_STREAM:
  528. case SOCK_SEQPACKET:
  529. if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
  530. socantrcvmore(so);
  531. break;
  532. default:
  533. break;
  534. }
  535. }
  536. void
  537. unp_drop(struct unpcb *unp, int errno)
  538. {
  539. struct socket *so = unp->unp_socket;
  540. so->so_error = errno;
  541. unp_disconnect(unp);
  542. if (so->so_head) {
  543. so->so_pcb = NULL;
  544. sofree(so);
  545. m_freem(unp->unp_addr);
  546. free(unp, M_PCB, sizeof(*unp));
  547. }
  548. }
  549. #ifdef notdef
  550. unp_drain(void)
  551. {
  552. }
  553. #endif
  554. int
  555. unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
  556. {
  557. struct proc *p = curproc; /* XXX */
  558. struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
  559. int i, *fdp = NULL;
  560. struct file **rp;
  561. struct file *fp;
  562. int nfds, error = 0;
  563. nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
  564. sizeof(struct file *);
  565. if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
  566. controllen = 0;
  567. else
  568. controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
  569. if (nfds > controllen / sizeof(int)) {
  570. error = EMSGSIZE;
  571. goto restart;
  572. }
  573. rp = (struct file **)CMSG_DATA(cm);
  574. fdp = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
  575. /* Make sure the recipient should be able to see the descriptors.. */
  576. if (p->p_fd->fd_rdir != NULL) {
  577. rp = (struct file **)CMSG_DATA(cm);
  578. for (i = 0; i < nfds; i++) {
  579. fp = *rp++;
  580. /*
  581. * No to block devices. If passing a directory,
  582. * make sure that it is underneath the root.
  583. */
  584. if (fp->f_type == DTYPE_VNODE) {
  585. struct vnode *vp = (struct vnode *)fp->f_data;
  586. if (vp->v_type == VBLK ||
  587. (vp->v_type == VDIR &&
  588. !vn_isunder(vp, p->p_fd->fd_rdir, p))) {
  589. error = EPERM;
  590. break;
  591. }
  592. }
  593. }
  594. }
  595. restart:
  596. fdplock(p->p_fd);
  597. if (error != 0) {
  598. rp = ((struct file **)CMSG_DATA(cm));
  599. unp_discard(rp, nfds);
  600. goto out;
  601. }
  602. /*
  603. * First loop -- allocate file descriptor table slots for the
  604. * new descriptors.
  605. */
  606. rp = ((struct file **)CMSG_DATA(cm));
  607. for (i = 0; i < nfds; i++) {
  608. if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
  609. /*
  610. * Back out what we've done so far.
  611. */
  612. for (--i; i >= 0; i--)
  613. fdremove(p->p_fd, fdp[i]);
  614. if (error == ENOSPC) {
  615. fdexpand(p);
  616. error = 0;
  617. } else {
  618. /*
  619. * This is the error that has historically
  620. * been returned, and some callers may
  621. * expect it.
  622. */
  623. error = EMSGSIZE;
  624. }
  625. fdpunlock(p->p_fd);
  626. goto restart;
  627. }
  628. /*
  629. * Make the slot reference the descriptor so that
  630. * fdalloc() works properly.. We finalize it all
  631. * in the loop below.
  632. */
  633. p->p_fd->fd_ofiles[fdp[i]] = *rp++;
  634. if (flags & MSG_CMSG_CLOEXEC)
  635. p->p_fd->fd_ofileflags[fdp[i]] |= UF_EXCLOSE;
  636. }
  637. /*
  638. * Now that adding them has succeeded, update all of the
  639. * descriptor passing state.
  640. */
  641. rp = (struct file **)CMSG_DATA(cm);
  642. for (i = 0; i < nfds; i++) {
  643. fp = *rp++;
  644. fp->f_msgcount--;
  645. unp_rights--;
  646. }
  647. /*
  648. * Copy temporary array to message and adjust length, in case of
  649. * transition from large struct file pointers to ints.
  650. */
  651. memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
  652. cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
  653. rights->m_len = CMSG_LEN(nfds * sizeof(int));
  654. out:
  655. fdpunlock(p->p_fd);
  656. if (fdp)
  657. free(fdp, M_TEMP, 0);
  658. return (error);
  659. }
  660. int
  661. unp_internalize(struct mbuf *control, struct proc *p)
  662. {
  663. struct filedesc *fdp = p->p_fd;
  664. struct cmsghdr *cm = mtod(control, struct cmsghdr *);
  665. struct file **rp, *fp;
  666. int i, error;
  667. int nfds, *ip, fd, neededspace;
  668. /*
  669. * Check for two potential msg_controllen values because
  670. * IETF stuck their nose in a place it does not belong.
  671. */
  672. if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
  673. !(cm->cmsg_len == control->m_len ||
  674. control->m_len == CMSG_ALIGN(cm->cmsg_len)))
  675. return (EINVAL);
  676. nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
  677. if (unp_rights + nfds > maxfiles / 10)
  678. return (EMFILE);
  679. /* Make sure we have room for the struct file pointers */
  680. morespace:
  681. neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
  682. control->m_len;
  683. if (neededspace > M_TRAILINGSPACE(control)) {
  684. char *tmp;
  685. /* if we already have a cluster, the message is just too big */
  686. if (control->m_flags & M_EXT)
  687. return (E2BIG);
  688. /* copy cmsg data temporarily out of the mbuf */
  689. tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
  690. memcpy(tmp, mtod(control, caddr_t), control->m_len);
  691. /* allocate a cluster and try again */
  692. MCLGET(control, M_WAIT);
  693. if ((control->m_flags & M_EXT) == 0) {
  694. free(tmp, M_TEMP, control->m_len);
  695. return (ENOBUFS); /* allocation failed */
  696. }
  697. /* copy the data back into the cluster */
  698. cm = mtod(control, struct cmsghdr *);
  699. memcpy(cm, tmp, control->m_len);
  700. free(tmp, M_TEMP, control->m_len);
  701. goto morespace;
  702. }
  703. /* adjust message & mbuf to note amount of space actually used. */
  704. cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
  705. control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
  706. ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
  707. rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1;
  708. for (i = 0; i < nfds; i++) {
  709. memcpy(&fd, ip, sizeof fd);
  710. ip--;
  711. if ((fp = fd_getfile(fdp, fd)) == NULL) {
  712. error = EBADF;
  713. goto fail;
  714. }
  715. if (fp->f_count == LONG_MAX-2 ||
  716. fp->f_msgcount == LONG_MAX-2) {
  717. error = EDEADLK;
  718. goto fail;
  719. }
  720. /* kq and systrace descriptors cannot be copied */
  721. if (fp->f_type == DTYPE_KQUEUE ||
  722. fp->f_type == DTYPE_SYSTRACE) {
  723. error = EINVAL;
  724. goto fail;
  725. }
  726. memcpy(rp, &fp, sizeof fp);
  727. rp--;
  728. fp->f_count++;
  729. fp->f_msgcount++;
  730. unp_rights++;
  731. }
  732. return (0);
  733. fail:
  734. /* Back out what we just did. */
  735. for ( ; i > 0; i--) {
  736. rp++;
  737. memcpy(&fp, rp, sizeof(fp));
  738. fp->f_count--;
  739. fp->f_msgcount--;
  740. unp_rights--;
  741. }
  742. return (error);
  743. }
  744. int unp_defer, unp_gcing;
  745. extern struct domain unixdomain;
  746. void
  747. unp_gc(void)
  748. {
  749. struct file *fp, *nextfp;
  750. struct socket *so;
  751. struct file **extra_ref, **fpp;
  752. int nunref, i;
  753. if (unp_gcing)
  754. return;
  755. unp_gcing = 1;
  756. unp_defer = 0;
  757. LIST_FOREACH(fp, &filehead, f_list)
  758. fp->f_iflags &= ~(FIF_MARK|FIF_DEFER);
  759. do {
  760. LIST_FOREACH(fp, &filehead, f_list) {
  761. if (fp->f_iflags & FIF_DEFER) {
  762. fp->f_iflags &= ~FIF_DEFER;
  763. unp_defer--;
  764. } else {
  765. if (fp->f_count == 0)
  766. continue;
  767. if (fp->f_iflags & FIF_MARK)
  768. continue;
  769. if (fp->f_count == fp->f_msgcount)
  770. continue;
  771. }
  772. fp->f_iflags |= FIF_MARK;
  773. if (fp->f_type != DTYPE_SOCKET ||
  774. (so = fp->f_data) == NULL)
  775. continue;
  776. if (so->so_proto->pr_domain != &unixdomain ||
  777. (so->so_proto->pr_flags&PR_RIGHTS) == 0)
  778. continue;
  779. #ifdef notdef
  780. if (so->so_rcv.sb_flags & SB_LOCK) {
  781. /*
  782. * This is problematical; it's not clear
  783. * we need to wait for the sockbuf to be
  784. * unlocked (on a uniprocessor, at least),
  785. * and it's also not clear what to do
  786. * if sbwait returns an error due to receipt
  787. * of a signal. If sbwait does return
  788. * an error, we'll go into an infinite
  789. * loop. Delete all of this for now.
  790. */
  791. (void) sbwait(&so->so_rcv);
  792. goto restart;
  793. }
  794. #endif
  795. unp_scan(so->so_rcv.sb_mb, unp_mark);
  796. }
  797. } while (unp_defer);
  798. /*
  799. * We grab an extra reference to each of the file table entries
  800. * that are not otherwise accessible and then free the rights
  801. * that are stored in messages on them.
  802. *
  803. * The bug in the original code is a little tricky, so I'll describe
  804. * what's wrong with it here.
  805. *
  806. * It is incorrect to simply unp_discard each entry for f_msgcount
  807. * times -- consider the case of sockets A and B that contain
  808. * references to each other. On a last close of some other socket,
  809. * we trigger a gc since the number of outstanding rights (unp_rights)
  810. * is non-zero. If during the sweep phase the gc code un_discards,
  811. * we end up doing a (full) closef on the descriptor. A closef on A
  812. * results in the following chain. Closef calls soo_close, which
  813. * calls soclose. Soclose calls first (through the switch
  814. * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
  815. * returns because the previous instance had set unp_gcing, and
  816. * we return all the way back to soclose, which marks the socket
  817. * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
  818. * to free up the rights that are queued in messages on the socket A,
  819. * i.e., the reference on B. The sorflush calls via the dom_dispose
  820. * switch unp_dispose, which unp_scans with unp_discard. This second
  821. * instance of unp_discard just calls closef on B.
  822. *
  823. * Well, a similar chain occurs on B, resulting in a sorflush on B,
  824. * which results in another closef on A. Unfortunately, A is already
  825. * being closed, and the descriptor has already been marked with
  826. * SS_NOFDREF, and soclose panics at this point.
  827. *
  828. * Here, we first take an extra reference to each inaccessible
  829. * descriptor. Then, we call sorflush ourself, since we know
  830. * it is a Unix domain socket anyhow. After we destroy all the
  831. * rights carried in messages, we do a last closef to get rid
  832. * of our extra reference. This is the last close, and the
  833. * unp_detach etc will shut down the socket.
  834. *
  835. * 91/09/19, bsy@cs.cmu.edu
  836. */
  837. extra_ref = mallocarray(nfiles, sizeof(struct file *), M_FILE, M_WAITOK);
  838. for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
  839. fp != NULL; fp = nextfp) {
  840. nextfp = LIST_NEXT(fp, f_list);
  841. if (fp->f_count == 0)
  842. continue;
  843. if (fp->f_count == fp->f_msgcount &&
  844. !(fp->f_iflags & FIF_MARK)) {
  845. *fpp++ = fp;
  846. nunref++;
  847. FREF(fp);
  848. fp->f_count++;
  849. }
  850. }
  851. for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
  852. if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL)
  853. sorflush((*fpp)->f_data);
  854. for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
  855. (void) closef(*fpp, NULL);
  856. free(extra_ref, M_FILE, 0);
  857. unp_gcing = 0;
  858. }
  859. void
  860. unp_dispose(struct mbuf *m)
  861. {
  862. if (m)
  863. unp_scan(m, unp_discard);
  864. }
  865. void
  866. unp_scan(struct mbuf *m0, void (*op)(struct file **, int))
  867. {
  868. struct mbuf *m;
  869. struct file **rp;
  870. struct cmsghdr *cm;
  871. int qfds;
  872. while (m0) {
  873. for (m = m0; m; m = m->m_next) {
  874. if (m->m_type == MT_CONTROL &&
  875. m->m_len >= sizeof(*cm)) {
  876. cm = mtod(m, struct cmsghdr *);
  877. if (cm->cmsg_level != SOL_SOCKET ||
  878. cm->cmsg_type != SCM_RIGHTS)
  879. continue;
  880. qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
  881. / sizeof(struct file *);
  882. if (qfds > 0) {
  883. rp = (struct file **)CMSG_DATA(cm);
  884. op(rp, qfds);
  885. }
  886. break; /* XXX, but saves time */
  887. }
  888. }
  889. m0 = m0->m_nextpkt;
  890. }
  891. }
  892. void
  893. unp_mark(struct file **rp, int nfds)
  894. {
  895. int i;
  896. for (i = 0; i < nfds; i++) {
  897. if (rp[i] == NULL)
  898. continue;
  899. if (rp[i]->f_iflags & (FIF_MARK|FIF_DEFER))
  900. continue;
  901. if (rp[i]->f_type == DTYPE_SOCKET) {
  902. unp_defer++;
  903. rp[i]->f_iflags |= FIF_DEFER;
  904. } else {
  905. rp[i]->f_iflags |= FIF_MARK;
  906. }
  907. }
  908. }
  909. void
  910. unp_discard(struct file **rp, int nfds)
  911. {
  912. struct file *fp;
  913. int i;
  914. for (i = 0; i < nfds; i++) {
  915. if ((fp = rp[i]) == NULL)
  916. continue;
  917. rp[i] = NULL;
  918. FREF(fp);
  919. fp->f_msgcount--;
  920. unp_rights--;
  921. (void) closef(fp, NULL);
  922. }
  923. }