tcp_stats.c 8.5 KB


  1. /*-
  2. * Copyright (c) 2016-2018 Netflix, Inc.
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24. * SUCH DAMAGE.
  25. */
  26. /*
  27. * Author: Lawrence Stewart <lstewart@netflix.com>
  28. */
  29. #include <sys/cdefs.h>
  30. __FBSDID("$FreeBSD$");
  31. #include <sys/param.h>
  32. #include <sys/arb.h>
  33. #include <sys/errno.h>
  34. #include <sys/malloc.h>
  35. #include <sys/qmath.h>
  36. #include <sys/queue.h>
  37. #include <sys/socket.h>
  38. #include <sys/socketvar.h>
  39. #include <sys/sysctl.h>
  40. #ifdef _KERNEL
  41. #include <sys/kernel.h>
  42. #include <sys/lock.h>
  43. #include <sys/rmlock.h>
  44. #include <sys/systm.h>
  45. #endif
  46. #include <sys/stats.h>
  47. #include <net/vnet.h>
  48. #include <netinet/in.h>
  49. #include <netinet/in_pcb.h>
  50. #include <netinet/tcp.h>
  51. #include <netinet/tcp_var.h>
  52. #include <netinet/cc/cc.h>
  53. VNET_DEFINE(int, tcp_perconn_stats_dflt_tpl) = -1;
  54. #ifndef _KERNEL
  55. #define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
  56. #define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
  57. #else /* _KERNEL */
  58. VNET_DEFINE(int, tcp_perconn_stats_enable) = 2;
  59. VNET_DEFINE_STATIC(struct stats_tpl_sample_rate *, tcp_perconn_stats_sample_rates);
  60. VNET_DEFINE_STATIC(int, tcp_stats_nrates) = 0;
  61. #define V_tcp_perconn_stats_sample_rates VNET(tcp_perconn_stats_sample_rates)
  62. #define V_tcp_stats_nrates VNET(tcp_stats_nrates)
  63. static struct rmlock tcp_stats_tpl_sampling_lock;
  64. static int tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
  65. struct stats_tpl_sample_rate **rates, int *nrates, void *ctx);
  66. SYSCTL_INT(_net_inet_tcp, OID_AUTO, perconn_stats_enable,
  67. CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_perconn_stats_enable), 0,
  68. "Enable per-connection TCP stats gathering; 1 enables for all connections, "
  69. "2 enables random sampling across log id connection groups");
  70. SYSCTL_PROC(_net_inet_tcp, OID_AUTO, perconn_stats_sample_rates,
  71. CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_NEEDGIANT, tcp_stats_tpl_sr_cb,
  72. sizeof(struct rm_priotracker), stats_tpl_sample_rates, "A",
  73. "TCP stats per template random sampling rates, in CSV tpl_spec=percent "
  74. "key-value pairs (see stats(9) for template spec details)");
  75. #endif /* _KERNEL */
  76. #ifdef _KERNEL
  77. int
  78. #else
  79. static int
  80. /* Ensure all templates are also added to the userland template list. */
  81. __attribute__ ((constructor))
  82. #endif
  83. tcp_stats_init()
  84. {
  85. int err, lasterr;
  86. err = lasterr = 0;
  87. V_tcp_perconn_stats_dflt_tpl = stats_tpl_alloc("TCP_DEFAULT", 0);
  88. if (V_tcp_perconn_stats_dflt_tpl < 0)
  89. return (-V_tcp_perconn_stats_dflt_tpl);
  90. struct voistatspec vss_sum[] = {
  91. STATS_VSS_SUM(),
  92. };
  93. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  94. VOI_TCP_TXPB, "TCP_TXPB", VSD_DTYPE_INT_U64,
  95. NVSS(vss_sum), vss_sum, 0);
  96. lasterr = err ? err : lasterr;
  97. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  98. VOI_TCP_RETXPB, "TCP_RETXPB", VSD_DTYPE_INT_U32,
  99. NVSS(vss_sum), vss_sum, 0);
  100. lasterr = err ? err : lasterr;
  101. struct voistatspec vss_max[] = {
  102. STATS_VSS_MAX(),
  103. };
  104. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  105. VOI_TCP_FRWIN, "TCP_FRWIN", VSD_DTYPE_INT_ULONG,
  106. NVSS(vss_max), vss_max, 0);
  107. lasterr = err ? err : lasterr;
  108. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  109. VOI_TCP_LCWIN, "TCP_LCWIN", VSD_DTYPE_INT_ULONG,
  110. NVSS(vss_max), vss_max, 0);
  111. lasterr = err ? err : lasterr;
  112. struct voistatspec vss_rtt[] = {
  113. STATS_VSS_MAX(),
  114. STATS_VSS_MIN(),
  115. STATS_VSS_TDGSTCLUST32(20, 4),
  116. };
  117. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  118. VOI_TCP_RTT, "TCP_RTT", VSD_DTYPE_INT_U32,
  119. NVSS(vss_rtt), vss_rtt, 0);
  120. lasterr = err ? err : lasterr;
  121. struct voistatspec vss_congsig[] = {
  122. STATS_VSS_DVHIST32_USR(HBKTS(DVBKT(CC_ECN), DVBKT(CC_RTO),
  123. DVBKT(CC_RTO_ERR), DVBKT(CC_NDUPACK)), 0)
  124. };
  125. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  126. VOI_TCP_CSIG, "TCP_CSIG", VSD_DTYPE_INT_U32,
  127. NVSS(vss_congsig), vss_congsig, 0);
  128. lasterr = err ? err : lasterr;
  129. struct voistatspec vss_gput[] = {
  130. STATS_VSS_MAX(),
  131. STATS_VSS_TDGSTCLUST32(20, 4),
  132. };
  133. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  134. VOI_TCP_GPUT, "TCP_GPUT", VSD_DTYPE_INT_U32,
  135. NVSS(vss_gput), vss_gput, 0);
  136. lasterr = err ? err : lasterr;
  137. struct voistatspec vss_gput_nd[] = {
  138. STATS_VSS_TDGSTCLUST32(10, 4),
  139. };
  140. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  141. VOI_TCP_GPUT_ND, "TCP_GPUT_ND", VSD_DTYPE_INT_S32,
  142. NVSS(vss_gput_nd), vss_gput_nd, 0);
  143. lasterr = err ? err : lasterr;
  144. struct voistatspec vss_windiff[] = {
  145. STATS_VSS_CRHIST32_USR(HBKTS(CRBKT(0)), VSD_HIST_LBOUND_INF)
  146. };
  147. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  148. VOI_TCP_CALCFRWINDIFF, "TCP_CALCFRWINDIFF", VSD_DTYPE_INT_S32,
  149. NVSS(vss_windiff), vss_windiff, 0);
  150. lasterr = err ? err : lasterr;
  151. struct voistatspec vss_acklen[] = {
  152. STATS_VSS_MAX(),
  153. STATS_VSS_CRHIST32_LIN(0, 9, 1, VSD_HIST_UBOUND_INF)
  154. };
  155. err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
  156. VOI_TCP_ACKLEN, "TCP_ACKLEN", VSD_DTYPE_INT_U32,
  157. NVSS(vss_acklen), vss_acklen, 0);
  158. lasterr = err ? err : lasterr;
  159. return (lasterr);
  160. }
  161. #ifdef _KERNEL
  162. int
  163. tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
  164. size_t seed_len)
  165. {
  166. struct rm_priotracker tracker;
  167. int tpl;
  168. tpl = -1;
  169. if (V_tcp_stats_nrates > 0) {
  170. rm_rlock(&tcp_stats_tpl_sampling_lock, &tracker);
  171. tpl = stats_tpl_sample_rollthedice(V_tcp_perconn_stats_sample_rates,
  172. V_tcp_stats_nrates, seed_bytes, seed_len);
  173. rm_runlock(&tcp_stats_tpl_sampling_lock, &tracker);
  174. if (tpl >= 0) {
  175. INP_WLOCK_ASSERT(tp->t_inpcb);
  176. if (tp->t_stats != NULL)
  177. stats_blob_destroy(tp->t_stats);
  178. tp->t_stats = stats_blob_alloc(tpl, 0);
  179. if (tp->t_stats == NULL)
  180. tpl = -ENOMEM;
  181. }
  182. }
  183. return (tpl);
  184. }
  185. /*
  186. * Callback function for stats_tpl_sample_rates() to interact with the TCP
  187. * subsystem's stats template sample rates list.
  188. */
  189. int
  190. tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
  191. struct stats_tpl_sample_rate **rates, int *nrates, void *ctx)
  192. {
  193. struct stats_tpl_sample_rate *old_rates;
  194. int old_nrates;
  195. if (ctx == NULL)
  196. return (ENOMEM);
  197. switch (action) {
  198. case TPL_SR_RLOCKED_GET:
  199. /*
  200. * Return with rlock held i.e. this call must be paired with a
  201. * "action == TPL_SR_RUNLOCK" call.
  202. */
  203. rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
  204. rm_rlock(&tcp_stats_tpl_sampling_lock,
  205. (struct rm_priotracker *)ctx);
  206. /* FALLTHROUGH */
  207. case TPL_SR_UNLOCKED_GET:
  208. if (rates != NULL)
  209. *rates = V_tcp_perconn_stats_sample_rates;
  210. if (nrates != NULL)
  211. *nrates = V_tcp_stats_nrates;
  212. break;
  213. case TPL_SR_RUNLOCK:
  214. rm_assert(&tcp_stats_tpl_sampling_lock, RA_RLOCKED);
  215. rm_runlock(&tcp_stats_tpl_sampling_lock,
  216. (struct rm_priotracker *)ctx);
  217. break;
  218. case TPL_SR_PUT:
  219. KASSERT(rates != NULL && nrates != NULL,
  220. ("%s: PUT without new rates", __func__));
  221. rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
  222. if (rates == NULL || nrates == NULL)
  223. return (EINVAL);
  224. rm_wlock(&tcp_stats_tpl_sampling_lock);
  225. old_rates = V_tcp_perconn_stats_sample_rates;
  226. old_nrates = V_tcp_stats_nrates;
  227. V_tcp_perconn_stats_sample_rates = *rates;
  228. V_tcp_stats_nrates = *nrates;
  229. rm_wunlock(&tcp_stats_tpl_sampling_lock);
  230. *rates = old_rates;
  231. *nrates = old_nrates;
  232. break;
  233. default:
  234. return (EINVAL);
  235. break;
  236. }
  237. return (0);
  238. }
  239. RM_SYSINIT(tcp_stats_tpl_sampling_lock, &tcp_stats_tpl_sampling_lock,
  240. "tcp_stats_tpl_sampling_lock");
  241. #endif /* _KERNEL */