tcp_usrreq.c 25 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022
  1. /* $OpenBSD: tcp_usrreq.c,v 1.126 2015/07/15 22:16:42 deraadt Exp $ */
  2. /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */
  3. /*
  4. * Copyright (c) 1982, 1986, 1988, 1993
  5. * The Regents of the University of California. All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. * 3. Neither the name of the University nor the names of its contributors
  16. * may be used to endorse or promote products derived from this software
  17. * without specific prior written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  20. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  23. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29. * SUCH DAMAGE.
  30. *
  31. * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
  32. *
  33. * NRL grants permission for redistribution and use in source and binary
  34. * forms, with or without modification, of the software and documentation
  35. * created at NRL provided that the following conditions are met:
  36. *
  37. * 1. Redistributions of source code must retain the above copyright
  38. * notice, this list of conditions and the following disclaimer.
  39. * 2. Redistributions in binary form must reproduce the above copyright
  40. * notice, this list of conditions and the following disclaimer in the
  41. * documentation and/or other materials provided with the distribution.
  42. * 3. All advertising materials mentioning features or use of this software
  43. * must display the following acknowledgements:
  44. * This product includes software developed by the University of
  45. * California, Berkeley and its contributors.
  46. * This product includes software developed at the Information
  47. * Technology Division, US Naval Research Laboratory.
  48. * 4. Neither the name of the NRL nor the names of its contributors
  49. * may be used to endorse or promote products derived from this software
  50. * without specific prior written permission.
  51. *
  52. * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
  53. * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  54. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  55. * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
  56. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  57. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  58. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  59. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  60. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  61. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  62. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  63. *
  64. * The views and conclusions contained in the software and documentation
  65. * are those of the authors and should not be interpreted as representing
  66. * official policies, either expressed or implied, of the US Naval
  67. * Research Laboratory (NRL).
  68. */
  69. #include <sys/param.h>
  70. #include <sys/systm.h>
  71. #include <sys/mbuf.h>
  72. #include <sys/socket.h>
  73. #include <sys/socketvar.h>
  74. #include <sys/protosw.h>
  75. #include <sys/stat.h>
  76. #include <sys/sysctl.h>
  77. #include <sys/domain.h>
  78. #include <sys/kernel.h>
  79. #include <sys/pool.h>
  80. #include <net/if.h>
  81. #include <net/if_var.h>
  82. #include <net/route.h>
  83. #include <netinet/in.h>
  84. #include <netinet/in_var.h>
  85. #include <netinet/ip.h>
  86. #include <netinet/in_pcb.h>
  87. #include <netinet/ip_var.h>
  88. #include <netinet/tcp.h>
  89. #include <netinet/tcp_fsm.h>
  90. #include <netinet/tcp_seq.h>
  91. #include <netinet/tcp_timer.h>
  92. #include <netinet/tcp_var.h>
  93. #include <netinet/tcpip.h>
  94. #include <netinet/tcp_debug.h>
  95. #ifdef INET6
  96. #include <netinet6/in6_var.h>
  97. #endif
  98. #ifndef TCP_SENDSPACE
  99. #define TCP_SENDSPACE 1024*16
  100. #endif
  101. u_int tcp_sendspace = TCP_SENDSPACE;
  102. #ifndef TCP_RECVSPACE
  103. #define TCP_RECVSPACE 1024*16
  104. #endif
  105. u_int tcp_recvspace = TCP_RECVSPACE;
  106. u_int tcp_autorcvbuf_inc = 16 * 1024;
  107. int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS;
  108. struct inpcbtable tcbtable;
  109. int tcp_ident(void *, size_t *, void *, size_t, int);
  110. /*
  111. * Process a TCP user request for TCP tb. If this is a send request
  112. * then m is the mbuf chain of send data. If this is a timer expiration
  113. * (called from the software clock routine), then timertype tells which timer.
  114. */
  115. /*ARGSUSED*/
  116. int
  117. tcp_usrreq(so, req, m, nam, control, p)
  118. struct socket *so;
  119. int req;
  120. struct mbuf *m, *nam, *control;
  121. struct proc *p;
  122. {
  123. struct sockaddr_in *sin;
  124. struct inpcb *inp;
  125. struct tcpcb *tp = NULL;
  126. int s;
  127. int error = 0;
  128. short ostate;
  129. if (req == PRU_CONTROL) {
  130. #ifdef INET6
  131. if (sotopf(so) == PF_INET6)
  132. return in6_control(so, (u_long)m, (caddr_t)nam,
  133. (struct ifnet *)control);
  134. else
  135. #endif /* INET6 */
  136. return (in_control(so, (u_long)m, (caddr_t)nam,
  137. (struct ifnet *)control));
  138. }
  139. if (control && control->m_len) {
  140. m_freem(control);
  141. m_freem(m);
  142. return (EINVAL);
  143. }
  144. s = splsoftnet();
  145. inp = sotoinpcb(so);
  146. /*
  147. * When a TCP is attached to a socket, then there will be
  148. * a (struct inpcb) pointed at by the socket, and this
  149. * structure will point at a subsidiary (struct tcpcb).
  150. */
  151. if (inp == NULL && req != PRU_ATTACH) {
  152. error = so->so_error;
  153. if (error == 0)
  154. error = EINVAL;
  155. splx(s);
  156. /*
  157. * The following corrects an mbuf leak under rare
  158. * circumstances
  159. */
  160. if (req == PRU_SEND || req == PRU_SENDOOB)
  161. m_freem(m);
  162. return (error);
  163. }
  164. if (inp) {
  165. tp = intotcpcb(inp);
  166. /* tp might get 0 when using socket splicing */
  167. if (tp == NULL) {
  168. splx(s);
  169. return (0);
  170. }
  171. #ifdef KPROF
  172. tcp_acounts[tp->t_state][req]++;
  173. #endif
  174. ostate = tp->t_state;
  175. } else
  176. ostate = 0;
  177. switch (req) {
  178. /*
  179. * TCP attaches to socket via PRU_ATTACH, reserving space,
  180. * and an internet control block.
  181. */
  182. case PRU_ATTACH:
  183. if (inp) {
  184. error = EISCONN;
  185. break;
  186. }
  187. error = tcp_attach(so);
  188. if (error)
  189. break;
  190. if ((so->so_options & SO_LINGER) && so->so_linger == 0)
  191. so->so_linger = TCP_LINGERTIME;
  192. tp = sototcpcb(so);
  193. break;
  194. /*
  195. * PRU_DETACH detaches the TCP protocol from the socket.
  196. * If the protocol state is non-embryonic, then can't
  197. * do this directly: have to initiate a PRU_DISCONNECT,
  198. * which may finish later; embryonic TCB's can just
  199. * be discarded here.
  200. */
  201. case PRU_DETACH:
  202. tp = tcp_disconnect(tp);
  203. break;
  204. /*
  205. * Give the socket an address.
  206. */
  207. case PRU_BIND:
  208. #ifdef INET6
  209. if (inp->inp_flags & INP_IPV6)
  210. error = in6_pcbbind(inp, nam, p);
  211. else
  212. #endif
  213. error = in_pcbbind(inp, nam, p);
  214. if (error)
  215. break;
  216. break;
  217. /*
  218. * Prepare to accept connections.
  219. */
  220. case PRU_LISTEN:
  221. if (inp->inp_lport == 0) {
  222. #ifdef INET6
  223. if (inp->inp_flags & INP_IPV6)
  224. error = in6_pcbbind(inp, NULL, p);
  225. else
  226. #endif
  227. error = in_pcbbind(inp, NULL, p);
  228. }
  229. /* If the in_pcbbind() above is called, the tp->pf
  230. should still be whatever it was before. */
  231. if (error == 0)
  232. tp->t_state = TCPS_LISTEN;
  233. break;
  234. /*
  235. * Initiate connection to peer.
  236. * Create a template for use in transmissions on this connection.
  237. * Enter SYN_SENT state, and mark socket as connecting.
  238. * Start keep-alive timer, and seed output sequence space.
  239. * Send initial segment on connection.
  240. */
  241. case PRU_CONNECT:
  242. sin = mtod(nam, struct sockaddr_in *);
  243. #ifdef INET6
  244. if (sin->sin_family == AF_INET6) {
  245. struct in6_addr *in6_addr = &mtod(nam,
  246. struct sockaddr_in6 *)->sin6_addr;
  247. if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) ||
  248. IN6_IS_ADDR_MULTICAST(in6_addr) ||
  249. IN6_IS_ADDR_V4MAPPED(in6_addr)) {
  250. error = EINVAL;
  251. break;
  252. }
  253. error = in6_pcbconnect(inp, nam);
  254. } else if (sin->sin_family == AF_INET)
  255. #endif /* INET6 */
  256. {
  257. if ((sin->sin_addr.s_addr == INADDR_ANY) ||
  258. (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
  259. IN_MULTICAST(sin->sin_addr.s_addr) ||
  260. in_broadcast(sin->sin_addr, inp->inp_rtableid)) {
  261. error = EINVAL;
  262. break;
  263. }
  264. error = in_pcbconnect(inp, nam);
  265. }
  266. if (error)
  267. break;
  268. tp->t_template = tcp_template(tp);
  269. if (tp->t_template == 0) {
  270. in_pcbdisconnect(inp);
  271. error = ENOBUFS;
  272. break;
  273. }
  274. so->so_state |= SS_CONNECTOUT;
  275. /* Compute window scaling to request. */
  276. tcp_rscale(tp, sb_max);
  277. soisconnecting(so);
  278. tcpstat.tcps_connattempt++;
  279. tp->t_state = TCPS_SYN_SENT;
  280. TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
  281. tcp_set_iss_tsm(tp);
  282. tcp_sendseqinit(tp);
  283. #if defined(TCP_SACK)
  284. tp->snd_last = tp->snd_una;
  285. #endif
  286. #if defined(TCP_SACK) && defined(TCP_FACK)
  287. tp->snd_fack = tp->snd_una;
  288. tp->retran_data = 0;
  289. tp->snd_awnd = 0;
  290. #endif
  291. error = tcp_output(tp);
  292. break;
  293. /*
  294. * Create a TCP connection between two sockets.
  295. */
  296. case PRU_CONNECT2:
  297. error = EOPNOTSUPP;
  298. break;
  299. /*
  300. * Initiate disconnect from peer.
  301. * If connection never passed embryonic stage, just drop;
  302. * else if don't need to let data drain, then can just drop anyways,
  303. * else have to begin TCP shutdown process: mark socket disconnecting,
  304. * drain unread data, state switch to reflect user close, and
  305. * send segment (e.g. FIN) to peer. Socket will be really disconnected
  306. * when peer sends FIN and acks ours.
  307. *
  308. * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
  309. */
  310. case PRU_DISCONNECT:
  311. tp = tcp_disconnect(tp);
  312. break;
  313. /*
  314. * Accept a connection. Essentially all the work is
  315. * done at higher levels; just return the address
  316. * of the peer, storing through addr.
  317. */
  318. case PRU_ACCEPT:
  319. #ifdef INET6
  320. if (inp->inp_flags & INP_IPV6)
  321. in6_setpeeraddr(inp, nam);
  322. else
  323. #endif
  324. in_setpeeraddr(inp, nam);
  325. break;
  326. /*
  327. * Mark the connection as being incapable of further output.
  328. */
  329. case PRU_SHUTDOWN:
  330. if (so->so_state & SS_CANTSENDMORE)
  331. break;
  332. socantsendmore(so);
  333. tp = tcp_usrclosed(tp);
  334. if (tp)
  335. error = tcp_output(tp);
  336. break;
  337. /*
  338. * After a receive, possibly send window update to peer.
  339. */
  340. case PRU_RCVD:
  341. /*
  342. * soreceive() calls this function when a user receives
  343. * ancillary data on a listening socket. We don't call
  344. * tcp_output in such a case, since there is no header
  345. * template for a listening socket and hence the kernel
  346. * will panic.
  347. */
  348. if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
  349. (void) tcp_output(tp);
  350. break;
  351. /*
  352. * Do a send by putting data in output queue and updating urgent
  353. * marker if URG set. Possibly send more data.
  354. */
  355. case PRU_SEND:
  356. sbappendstream(&so->so_snd, m);
  357. error = tcp_output(tp);
  358. break;
  359. /*
  360. * Abort the TCP.
  361. */
  362. case PRU_ABORT:
  363. tp = tcp_drop(tp, ECONNABORTED);
  364. break;
  365. case PRU_SENSE:
  366. ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
  367. splx(s);
  368. return (0);
  369. case PRU_RCVOOB:
  370. if ((so->so_oobmark == 0 &&
  371. (so->so_state & SS_RCVATMARK) == 0) ||
  372. so->so_options & SO_OOBINLINE ||
  373. tp->t_oobflags & TCPOOB_HADDATA) {
  374. error = EINVAL;
  375. break;
  376. }
  377. if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
  378. error = EWOULDBLOCK;
  379. break;
  380. }
  381. m->m_len = 1;
  382. *mtod(m, caddr_t) = tp->t_iobc;
  383. if (((long)nam & MSG_PEEK) == 0)
  384. tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
  385. break;
  386. case PRU_SENDOOB:
  387. if (sbspace(&so->so_snd) < -512) {
  388. m_freem(m);
  389. error = ENOBUFS;
  390. break;
  391. }
  392. /*
  393. * According to RFC961 (Assigned Protocols),
  394. * the urgent pointer points to the last octet
  395. * of urgent data. We continue, however,
  396. * to consider it to indicate the first octet
  397. * of data past the urgent section.
  398. * Otherwise, snd_up should be one lower.
  399. */
  400. sbappendstream(&so->so_snd, m);
  401. tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  402. tp->t_force = 1;
  403. error = tcp_output(tp);
  404. tp->t_force = 0;
  405. break;
  406. case PRU_SOCKADDR:
  407. #ifdef INET6
  408. if (inp->inp_flags & INP_IPV6)
  409. in6_setsockaddr(inp, nam);
  410. else
  411. #endif
  412. in_setsockaddr(inp, nam);
  413. break;
  414. case PRU_PEERADDR:
  415. #ifdef INET6
  416. if (inp->inp_flags & INP_IPV6)
  417. in6_setpeeraddr(inp, nam);
  418. else
  419. #endif
  420. in_setpeeraddr(inp, nam);
  421. break;
  422. default:
  423. panic("tcp_usrreq");
  424. }
  425. if (tp && (so->so_options & SO_DEBUG))
  426. tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0);
  427. splx(s);
  428. return (error);
  429. }
  430. int
  431. tcp_ctloutput(op, so, level, optname, mp)
  432. int op;
  433. struct socket *so;
  434. int level, optname;
  435. struct mbuf **mp;
  436. {
  437. int error = 0, s;
  438. struct inpcb *inp;
  439. struct tcpcb *tp;
  440. struct mbuf *m;
  441. int i;
  442. s = splsoftnet();
  443. inp = sotoinpcb(so);
  444. if (inp == NULL) {
  445. splx(s);
  446. if (op == PRCO_SETOPT)
  447. (void) m_free(*mp);
  448. return (ECONNRESET);
  449. }
  450. if (level != IPPROTO_TCP) {
  451. switch (so->so_proto->pr_domain->dom_family) {
  452. #ifdef INET6
  453. case PF_INET6:
  454. error = ip6_ctloutput(op, so, level, optname, mp);
  455. break;
  456. #endif /* INET6 */
  457. case PF_INET:
  458. error = ip_ctloutput(op, so, level, optname, mp);
  459. break;
  460. default:
  461. error = EAFNOSUPPORT; /*?*/
  462. break;
  463. }
  464. splx(s);
  465. return (error);
  466. }
  467. tp = intotcpcb(inp);
  468. switch (op) {
  469. case PRCO_SETOPT:
  470. m = *mp;
  471. switch (optname) {
  472. case TCP_NODELAY:
  473. if (m == NULL || m->m_len < sizeof (int))
  474. error = EINVAL;
  475. else if (*mtod(m, int *))
  476. tp->t_flags |= TF_NODELAY;
  477. else
  478. tp->t_flags &= ~TF_NODELAY;
  479. break;
  480. case TCP_NOPUSH:
  481. if (m == NULL || m->m_len < sizeof (int))
  482. error = EINVAL;
  483. else if (*mtod(m, int *))
  484. tp->t_flags |= TF_NOPUSH;
  485. else if (tp->t_flags & TF_NOPUSH) {
  486. tp->t_flags &= ~TF_NOPUSH;
  487. if (TCPS_HAVEESTABLISHED(tp->t_state))
  488. error = tcp_output(tp);
  489. }
  490. break;
  491. case TCP_MAXSEG:
  492. if (m == NULL || m->m_len < sizeof (int)) {
  493. error = EINVAL;
  494. break;
  495. }
  496. i = *mtod(m, int *);
  497. if (i > 0 && i <= tp->t_maxseg)
  498. tp->t_maxseg = i;
  499. else
  500. error = EINVAL;
  501. break;
  502. #ifdef TCP_SACK
  503. case TCP_SACK_ENABLE:
  504. if (m == NULL || m->m_len < sizeof (int)) {
  505. error = EINVAL;
  506. break;
  507. }
  508. if (TCPS_HAVEESTABLISHED(tp->t_state)) {
  509. error = EPERM;
  510. break;
  511. }
  512. if (tp->t_flags & TF_SIGNATURE) {
  513. error = EPERM;
  514. break;
  515. }
  516. if (*mtod(m, int *))
  517. tp->sack_enable = 1;
  518. else
  519. tp->sack_enable = 0;
  520. break;
  521. #endif
  522. #ifdef TCP_SIGNATURE
  523. case TCP_MD5SIG:
  524. if (m == NULL || m->m_len < sizeof (int)) {
  525. error = EINVAL;
  526. break;
  527. }
  528. if (TCPS_HAVEESTABLISHED(tp->t_state)) {
  529. error = EPERM;
  530. break;
  531. }
  532. if (*mtod(m, int *)) {
  533. tp->t_flags |= TF_SIGNATURE;
  534. #ifdef TCP_SACK
  535. tp->sack_enable = 0;
  536. #endif /* TCP_SACK */
  537. } else
  538. tp->t_flags &= ~TF_SIGNATURE;
  539. break;
  540. #endif /* TCP_SIGNATURE */
  541. default:
  542. error = ENOPROTOOPT;
  543. break;
  544. }
  545. if (m)
  546. (void) m_free(m);
  547. break;
  548. case PRCO_GETOPT:
  549. *mp = m = m_get(M_WAIT, MT_SOOPTS);
  550. m->m_len = sizeof(int);
  551. switch (optname) {
  552. case TCP_NODELAY:
  553. *mtod(m, int *) = tp->t_flags & TF_NODELAY;
  554. break;
  555. case TCP_NOPUSH:
  556. *mtod(m, int *) = tp->t_flags & TF_NOPUSH;
  557. break;
  558. case TCP_MAXSEG:
  559. *mtod(m, int *) = tp->t_maxseg;
  560. break;
  561. #ifdef TCP_SACK
  562. case TCP_SACK_ENABLE:
  563. *mtod(m, int *) = tp->sack_enable;
  564. break;
  565. #endif
  566. #ifdef TCP_SIGNATURE
  567. case TCP_MD5SIG:
  568. *mtod(m, int *) = tp->t_flags & TF_SIGNATURE;
  569. break;
  570. #endif
  571. default:
  572. error = ENOPROTOOPT;
  573. break;
  574. }
  575. break;
  576. }
  577. splx(s);
  578. return (error);
  579. }
  580. /*
  581. * Attach TCP protocol to socket, allocating
  582. * internet protocol control block, tcp control block,
  583. * bufer space, and entering LISTEN state if to accept connections.
  584. */
  585. int
  586. tcp_attach(so)
  587. struct socket *so;
  588. {
  589. struct tcpcb *tp;
  590. struct inpcb *inp;
  591. int error;
  592. if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 ||
  593. sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) ||
  594. sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace)) {
  595. error = soreserve(so, tcp_sendspace, tcp_recvspace);
  596. if (error)
  597. return (error);
  598. }
  599. error = in_pcballoc(so, &tcbtable);
  600. if (error)
  601. return (error);
  602. inp = sotoinpcb(so);
  603. tp = tcp_newtcpcb(inp);
  604. if (tp == NULL) {
  605. int nofd = so->so_state & SS_NOFDREF; /* XXX */
  606. so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
  607. in_pcbdetach(inp);
  608. so->so_state |= nofd;
  609. return (ENOBUFS);
  610. }
  611. tp->t_state = TCPS_CLOSED;
  612. #ifdef INET6
  613. /* we disallow IPv4 mapped address completely. */
  614. if (inp->inp_flags & INP_IPV6)
  615. tp->pf = PF_INET6;
  616. else
  617. tp->pf = PF_INET;
  618. #else
  619. tp->pf = PF_INET;
  620. #endif
  621. return (0);
  622. }
  623. /*
  624. * Initiate (or continue) disconnect.
  625. * If embryonic state, just send reset (once).
  626. * If in ``let data drain'' option and linger null, just drop.
  627. * Otherwise (hard), mark socket disconnecting and drop
  628. * current input data; switch states based on user close, and
  629. * send segment to peer (with FIN).
  630. */
  631. struct tcpcb *
  632. tcp_disconnect(tp)
  633. struct tcpcb *tp;
  634. {
  635. struct socket *so = tp->t_inpcb->inp_socket;
  636. if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
  637. tp = tcp_close(tp);
  638. else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
  639. tp = tcp_drop(tp, 0);
  640. else {
  641. soisdisconnecting(so);
  642. sbflush(&so->so_rcv);
  643. tp = tcp_usrclosed(tp);
  644. if (tp)
  645. (void) tcp_output(tp);
  646. }
  647. return (tp);
  648. }
  649. /*
  650. * User issued close, and wish to trail through shutdown states:
  651. * if never received SYN, just forget it. If got a SYN from peer,
  652. * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
  653. * If already got a FIN from peer, then almost done; go to LAST_ACK
  654. * state. In all other cases, have already sent FIN to peer (e.g.
  655. * after PRU_SHUTDOWN), and just have to play tedious game waiting
  656. * for peer to send FIN or not respond to keep-alives, etc.
  657. * We can let the user exit from the close as soon as the FIN is acked.
  658. */
  659. struct tcpcb *
  660. tcp_usrclosed(tp)
  661. struct tcpcb *tp;
  662. {
  663. switch (tp->t_state) {
  664. case TCPS_CLOSED:
  665. case TCPS_LISTEN:
  666. case TCPS_SYN_SENT:
  667. tp->t_state = TCPS_CLOSED;
  668. tp = tcp_close(tp);
  669. break;
  670. case TCPS_SYN_RECEIVED:
  671. case TCPS_ESTABLISHED:
  672. tp->t_state = TCPS_FIN_WAIT_1;
  673. break;
  674. case TCPS_CLOSE_WAIT:
  675. tp->t_state = TCPS_LAST_ACK;
  676. break;
  677. }
  678. if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
  679. soisdisconnected(tp->t_inpcb->inp_socket);
  680. /*
  681. * If we are in FIN_WAIT_2, we arrived here because the
  682. * application did a shutdown of the send side. Like the
  683. * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
  684. * a full close, we start a timer to make sure sockets are
  685. * not left in FIN_WAIT_2 forever.
  686. */
  687. if (tp->t_state == TCPS_FIN_WAIT_2)
  688. TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
  689. }
  690. return (tp);
  691. }
  692. /*
  693. * Look up a socket for ident or tcpdrop, ...
  694. */
  695. int
  696. tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop)
  697. {
  698. int error = 0, s;
  699. struct tcp_ident_mapping tir;
  700. struct inpcb *inp;
  701. struct tcpcb *tp = NULL;
  702. struct sockaddr_in *fin, *lin;
  703. #ifdef INET6
  704. struct sockaddr_in6 *fin6, *lin6;
  705. struct in6_addr f6, l6;
  706. #endif
  707. if (dodrop) {
  708. if (oldp != NULL || *oldlenp != 0)
  709. return (EINVAL);
  710. if (newp == NULL)
  711. return (EPERM);
  712. if (newlen < sizeof(tir))
  713. return (ENOMEM);
  714. if ((error = copyin(newp, &tir, sizeof (tir))) != 0 )
  715. return (error);
  716. } else {
  717. if (oldp == NULL)
  718. return (EINVAL);
  719. if (*oldlenp < sizeof(tir))
  720. return (ENOMEM);
  721. if (newp != NULL || newlen != 0)
  722. return (EINVAL);
  723. if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 )
  724. return (error);
  725. }
  726. switch (tir.faddr.ss_family) {
  727. #ifdef INET6
  728. case AF_INET6:
  729. fin6 = (struct sockaddr_in6 *)&tir.faddr;
  730. error = in6_embedscope(&f6, fin6, NULL, NULL);
  731. if (error)
  732. return EINVAL; /*?*/
  733. lin6 = (struct sockaddr_in6 *)&tir.laddr;
  734. error = in6_embedscope(&l6, lin6, NULL, NULL);
  735. if (error)
  736. return EINVAL; /*?*/
  737. break;
  738. #endif
  739. case AF_INET:
  740. fin = (struct sockaddr_in *)&tir.faddr;
  741. lin = (struct sockaddr_in *)&tir.laddr;
  742. break;
  743. default:
  744. return (EINVAL);
  745. }
  746. s = splsoftnet();
  747. switch (tir.faddr.ss_family) {
  748. #ifdef INET6
  749. case AF_INET6:
  750. inp = in6_pcbhashlookup(&tcbtable, &f6,
  751. fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain);
  752. break;
  753. #endif
  754. case AF_INET:
  755. inp = in_pcbhashlookup(&tcbtable, fin->sin_addr,
  756. fin->sin_port, lin->sin_addr, lin->sin_port, tir.rdomain);
  757. break;
  758. default:
  759. unhandled_af(tir.faddr.ss_family);
  760. }
  761. if (dodrop) {
  762. if (inp && (tp = intotcpcb(inp)) &&
  763. ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0))
  764. tp = tcp_drop(tp, ECONNABORTED);
  765. else
  766. error = ESRCH;
  767. splx(s);
  768. return (error);
  769. }
  770. if (inp == NULL) {
  771. ++tcpstat.tcps_pcbhashmiss;
  772. switch (tir.faddr.ss_family) {
  773. #ifdef INET6
  774. case AF_INET6:
  775. inp = in6_pcblookup_listen(&tcbtable,
  776. &l6, lin6->sin6_port, 0, NULL, tir.rdomain);
  777. break;
  778. #endif
  779. case AF_INET:
  780. inp = in_pcblookup_listen(&tcbtable,
  781. lin->sin_addr, lin->sin_port, 0, NULL, tir.rdomain);
  782. break;
  783. }
  784. }
  785. if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) {
  786. tir.ruid = inp->inp_socket->so_ruid;
  787. tir.euid = inp->inp_socket->so_euid;
  788. } else {
  789. tir.ruid = -1;
  790. tir.euid = -1;
  791. }
  792. splx(s);
  793. *oldlenp = sizeof (tir);
  794. error = copyout((void *)&tir, oldp, sizeof (tir));
  795. return (error);
  796. }
  797. /*
  798. * Sysctl for tcp variables.
  799. */
  800. int
  801. tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
  802. int *name;
  803. u_int namelen;
  804. void *oldp;
  805. size_t *oldlenp;
  806. void *newp;
  807. size_t newlen;
  808. {
  809. int error, nval;
  810. /* All sysctl names at this level are terminal. */
  811. if (namelen != 1)
  812. return (ENOTDIR);
  813. switch (name[0]) {
  814. #ifdef TCP_SACK
  815. case TCPCTL_SACK:
  816. return (sysctl_int(oldp, oldlenp, newp, newlen,
  817. &tcp_do_sack));
  818. #endif
  819. case TCPCTL_SLOWHZ:
  820. return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ));
  821. case TCPCTL_BADDYNAMIC:
  822. return (sysctl_struct(oldp, oldlenp, newp, newlen,
  823. baddynamicports.tcp, sizeof(baddynamicports.tcp)));
  824. case TCPCTL_IDENT:
  825. return (tcp_ident(oldp, oldlenp, newp, newlen, 0));
  826. case TCPCTL_DROP:
  827. return (tcp_ident(oldp, oldlenp, newp, newlen, 1));
  828. case TCPCTL_ALWAYS_KEEPALIVE:
  829. return (sysctl_int(oldp, oldlenp, newp, newlen,
  830. &tcp_always_keepalive));
  831. #ifdef TCP_ECN
  832. case TCPCTL_ECN:
  833. return (sysctl_int(oldp, oldlenp, newp, newlen,
  834. &tcp_do_ecn));
  835. #endif
  836. case TCPCTL_REASS_LIMIT:
  837. nval = tcp_reass_limit;
  838. error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
  839. if (error)
  840. return (error);
  841. if (nval != tcp_reass_limit) {
  842. error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0);
  843. if (error)
  844. return (error);
  845. tcp_reass_limit = nval;
  846. }
  847. return (0);
  848. #ifdef TCP_SACK
  849. case TCPCTL_SACKHOLE_LIMIT:
  850. nval = tcp_sackhole_limit;
  851. error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
  852. if (error)
  853. return (error);
  854. if (nval != tcp_sackhole_limit) {
  855. error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0);
  856. if (error)
  857. return (error);
  858. tcp_sackhole_limit = nval;
  859. }
  860. return (0);
  861. #endif
  862. case TCPCTL_STATS:
  863. if (newp != NULL)
  864. return (EPERM);
  865. return (sysctl_struct(oldp, oldlenp, newp, newlen,
  866. &tcpstat, sizeof(tcpstat)));
  867. default:
  868. if (name[0] < TCPCTL_MAXID)
  869. return (sysctl_int_arr(tcpctl_vars, name, namelen,
  870. oldp, oldlenp, newp, newlen));
  871. return (ENOPROTOOPT);
  872. }
  873. /* NOTREACHED */
  874. }
  875. /*
  876. * Scale the send buffer so that inflight data is not accounted against
  877. * the limit. The buffer will scale with the congestion window, if the
  878. * the receiver stops acking data the window will shrink and therefor
  879. * the buffer size will shrink as well.
  880. * In low memory situation try to shrink the buffer to the initial size
  881. * disabling the send buffer scaling as long as the situation persists.
  882. */
  883. void
  884. tcp_update_sndspace(struct tcpcb *tp)
  885. {
  886. struct socket *so = tp->t_inpcb->inp_socket;
  887. u_long nmax;
  888. if (sbchecklowmem())
  889. /* low on memory try to get rid of some */
  890. nmax = tcp_sendspace;
  891. else if (so->so_snd.sb_wat != tcp_sendspace)
  892. /* user requested buffer size, auto-scaling disabled */
  893. nmax = so->so_snd.sb_wat;
  894. else
  895. /* automatic buffer scaling */
  896. nmax = MIN(sb_max, so->so_snd.sb_wat + tp->snd_max -
  897. tp->snd_una);
  898. /* round to MSS boundary */
  899. nmax = roundup(nmax, tp->t_maxseg);
  900. if (nmax != so->so_snd.sb_hiwat)
  901. sbreserve(&so->so_snd, nmax);
  902. }
  903. /*
  904. * Scale the recv buffer by looking at how much data was transferred in
  905. * on approximated RTT. If more then a big part of the recv buffer was
  906. * transferred during that time we increase the buffer by a constant.
  907. * In low memory situation try to shrink the buffer to the initial size.
  908. */
  909. void
  910. tcp_update_rcvspace(struct tcpcb *tp)
  911. {
  912. struct socket *so = tp->t_inpcb->inp_socket;
  913. u_long nmax = so->so_rcv.sb_hiwat;
  914. if (sbchecklowmem())
  915. /* low on memory try to get rid of some */
  916. nmax = tcp_recvspace;
  917. else if (so->so_rcv.sb_wat != tcp_recvspace)
  918. /* user requested buffer size, auto-scaling disabled */
  919. nmax = so->so_rcv.sb_wat;
  920. else {
  921. /* automatic buffer scaling */
  922. if (tp->rfbuf_cnt > so->so_rcv.sb_hiwat / 8 * 7)
  923. nmax = MIN(sb_max, so->so_rcv.sb_hiwat +
  924. tcp_autorcvbuf_inc);
  925. }
  926. if (nmax == so->so_rcv.sb_hiwat)
  927. return;
  928. /* round to MSS boundary */
  929. nmax = roundup(nmax, tp->t_maxseg);
  930. sbreserve(&so->so_rcv, nmax);
  931. }